fs-caching-server.js 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #!/usr/bin/env node
  2. /**
  3. * A caching HTTP server/proxy that stores data on the local filesystem
  4. *
  5. * Author: Dave Eddy <dave@daveeddy.com>
  6. * Date: May 05, 2015
  7. * License: MIT
  8. */
  9. var fs = require('fs');
  10. var http = require('http');
  11. var url = require('url');
  12. var util = require('util');
  13. var accesslog = require('access-log');
  14. var getopt = require('posix-getopt');
  15. var mime = require('mime');
  16. var mkdirp = require('mkdirp');
  17. var path = require('path-platform');
  18. var uuid = require('node-uuid');
  19. var package = require('./package.json');
  20. function hap(o, p) {
  21. return ({}).hasOwnProperty.call(o, p);
  22. }
  23. // don't copy these headers when proxying request
  24. var NO_PROXY_HEADERS = ['date', 'server', 'host'];
  25. // these methods use the cache, everything is proxied
  26. var CACHE_METHODS = ['GET', 'HEAD'];
  27. // command line arguments
  28. var opts = {
  29. host: process.env.FS_CACHE_HOST || '0.0.0.0',
  30. port: process.env.FS_CACHE_PORT || 8080,
  31. url: process.env.FS_CACHE_URL,
  32. regex: process.env.FS_CACHE_REGEX || '\\.(png|jpg|jpeg|css|html|js|tar|tgz|tar\\.gz)$',
  33. debug: process.env.FS_CACHE_DEBUG,
  34. };
  35. var usage = [
  36. 'usage: fs-caching-server [options]',
  37. '',
  38. 'options',
  39. ' -c, --cache-dir <dir> directory to use for caching data, defaults to CWD',
  40. ' -d, --debug enable debug logging to stderr',
  41. ' -H, --host <host> [env FS_CACHE_HOST] the host on which to listen, defaults to ' + opts.host,
  42. ' -h, --help print this message and exit',
  43. ' -p, --port <port> [env FS_CACHE_PORT] print this message and exit',
  44. ' -r, --regex <regex> [env FS_CACHE_REGEX] regex to match to cache files, defaults to ' + opts.regex,
  45. ' -U, --url <url> [env FS_CACHE_URL] URL to proxy to, defaults to ' + opts.url,
  46. ' -u, --updates check npm for available updates',
  47. ' -v, --version print the version number and exit',
  48. ].join('\n');
  49. var options = [
  50. 'c:(cache-dir)',
  51. 'd(debug)',
  52. 'H:(host)',
  53. 'h(help)',
  54. 'p:(port)',
  55. 'r:(regex)',
  56. 'U:(url)',
  57. 'u(updates)',
  58. 'v(version)'
  59. ].join('');
  60. var parser = new getopt.BasicParser(options, process.argv);
  61. var option;
  62. while ((option = parser.getopt()) !== undefined) {
  63. switch (option.option) {
  64. case 'c': process.chdir(option.optarg); break;
  65. case 'd': opts.debug = true; break;
  66. case 'H': opts.host = option.optarg; break;
  67. case 'h': console.log(usage); process.exit(0); break;
  68. case 'p': opts.port = parseInt(option.optarg, 10); break;
  69. case 'r': opts.regex = option.optarg; break;
  70. case 'U': opts.url = option.optarg; break;
  71. case 'u': // check for updates
  72. require('latest').checkupdate(package, function(ret, msg) {
  73. console.log(msg);
  74. process.exit(ret);
  75. });
  76. return;
  77. case 'v': console.log(package.version); process.exit(0); break;
  78. default: console.error(usage); process.exit(1);
  79. }
  80. }
  81. if (!opts.url) {
  82. console.error('url must be specified with `-U <url>` or as FS_CACHE_URL');
  83. process.exit(1);
  84. }
  85. // remove trailing slash
  86. opts.url = opts.url.replace(/\/*$/, '');
  87. // create the regex option - this may throw
  88. opts.regex = new RegExp(opts.regex);
  89. // start the server
  90. http.createServer(onrequest).listen(opts.port, opts.host, listening);
  91. function listening() {
  92. console.log('listening on http://%s:%d', opts.host, opts.port);
  93. console.log('proxying requests to %s', opts.url);
  94. console.log('caching matches of %s', opts.regex);
  95. console.log('caching to %s', process.cwd());
  96. }
  97. // store files that are currently in progress -
  98. // if multiple requests are made for the same file, this will ensure that
  99. // only 1 connection is made to the server, and all subsequent requests will
  100. // be queued and then handled after the initial transfer is finished
  101. var inprogress = {};
  102. function onrequest(req, res) {
  103. accesslog(req, res);
  104. var _id = uuid.v4();
  105. function log() {
  106. if (!opts.debug)
  107. return;
  108. var s = util.format.apply(util, arguments);
  109. return console.error('[%s] %s', _id, s);
  110. }
  111. log('INCOMING REQUEST - %s %s', req.method, req.url);
  112. // parse the URL and determine the filename
  113. var parsed = url.parse(req.url);
  114. var file;
  115. try {
  116. file = '.' + path.posix.normalize(decodeURIComponent(parsed.pathname));
  117. } catch (e) {
  118. log('failed to parse pathname - sending 400 to client -', e.message);
  119. res.statusCode = 400;
  120. res.end();
  121. return;
  122. }
  123. // If the request is not a HEAD or GET request, or if it does not match the
  124. // regex supplied, we simply proxy it without a cache.
  125. if (CACHE_METHODS.indexOf(req.method) < 0 || ! opts.regex.test(file)) {
  126. log('request will be proxied with no caching');
  127. var uristring = opts.url + parsed.path;
  128. var uri = url.parse(uristring);
  129. uri.method = req.method;
  130. uri.headers = {};
  131. Object.keys(req.headers || {}).forEach(function(header) {
  132. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  133. uri.headers[header] = req.headers[header];
  134. });
  135. uri.headers.host = uri.host;
  136. var oreq = http.request(uri, function(ores) {
  137. res.statusCode = ores.statusCode;
  138. Object.keys(ores.headers || {}).forEach(function(header) {
  139. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  140. res.setHeader(header, ores.headers[header]);
  141. });
  142. ores.pipe(res);
  143. });
  144. oreq.on('error', function(e) {
  145. res.statusCode = 500;
  146. res.end();
  147. });
  148. req.pipe(oreq);
  149. return;
  150. }
  151. // check to see if the file exists
  152. fs.stat(file, function(err, stats) {
  153. // directory, give up
  154. if (stats && stats.isDirectory()) {
  155. log('%s is a directory - sending 400 to client', file);
  156. res.statusCode = 400;
  157. res.end();
  158. return;
  159. }
  160. // file exists, stream it locally
  161. if (stats) {
  162. log('%s is a file (cached) - streaming to client', file);
  163. streamfile(file, stats, req, res);
  164. return;
  165. }
  166. // another request is already proxying for this file, we wait
  167. if (hap(inprogress, file)) {
  168. log('%s download in progress - response queued', file);
  169. inprogress[file].push([req, res]);
  170. return;
  171. }
  172. // error with stat, proxy it
  173. inprogress[file] = [];
  174. var uristring = opts.url + parsed.path;
  175. var uri = url.parse(uristring);
  176. uri.method = req.method;
  177. uri.headers = {};
  178. Object.keys(req.headers || {}).forEach(function(header) {
  179. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  180. uri.headers[header] = req.headers[header];
  181. });
  182. uri.headers.host = uri.host;
  183. log('proxying %s to %s', uri.method, uristring);
  184. // proxy it
  185. var oreq = http.request(uri, function(ores) {
  186. res.statusCode = ores.statusCode;
  187. Object.keys(ores.headers || {}).forEach(function(header) {
  188. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  189. res.setHeader(header, ores.headers[header]);
  190. });
  191. if (res.statusCode !== 200) {
  192. ores.pipe(res);
  193. finish();
  194. return;
  195. }
  196. mkdirp(path.dirname(file), function(err) {
  197. var tmp = file + '.in-progress';
  198. log('saving local file to %s', tmp);
  199. var ws = fs.createWriteStream(tmp);
  200. ws.on('finish', function() {
  201. fs.rename(tmp, file, function(err) {
  202. if (err) {
  203. log('failed to rename %s to %s', tmp, file);
  204. finish();
  205. } else {
  206. log('renamed %s to %s', tmp, file);
  207. finish(file, ores);
  208. }
  209. });
  210. });
  211. ws.on('error', function(e) {
  212. log('failed to save local file %s', e.message);
  213. ores.unpipe(ws);
  214. finish();
  215. });
  216. ores.pipe(ws);
  217. ores.pipe(res);
  218. });
  219. });
  220. oreq.on('error', function(e) {
  221. log('error with proxy request %s', e.message);
  222. finish();
  223. res.statusCode = 500;
  224. res.end();
  225. });
  226. oreq.end();
  227. });
  228. }
  229. // finish queued up requests
  230. function finish(file, ores) {
  231. if (!file || !ores) {
  232. inprogress[file].forEach(function(o) {
  233. var res = o[1];
  234. res.statusCode = 400;
  235. res.end();
  236. });
  237. delete inprogress[file];
  238. return;
  239. }
  240. fs.stat(file, function(err, stats) {
  241. if (stats && stats.isDirectory()) {
  242. // directory, give up
  243. inprogress[file].forEach(function(o) {
  244. var res = o[1];
  245. res.statusCode = 400;
  246. res.end();
  247. });
  248. } else if (stats) {
  249. // file exists, stream it locally
  250. inprogress[file].forEach(function(o) {
  251. var req = o[0];
  252. var res = o[1];
  253. res.statusCode = ores.statusCode;
  254. Object.keys(ores.headers || {}).forEach(function(header) {
  255. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  256. res.setHeader(header, ores.headers[header]);
  257. });
  258. streamfile(file, stats, req, res);
  259. });
  260. return;
  261. } else {
  262. // not found
  263. inprogress[file].forEach(function(o) {
  264. var res = o[1];
  265. res.statusCode = 500;
  266. res.end();
  267. });
  268. }
  269. delete inprogress[file];
  270. });
  271. }
  272. // given a filename and its stats object (and req and res)
  273. // stream it
  274. function streamfile(file, stats, req, res) {
  275. var etag = util.format('"%d-%d"', stats.size, stats.mtime.getTime());
  276. res.setHeader('Last-Modified', stats.mtime.toUTCString());
  277. res.setHeader('Content-Type', mime.lookup(file));
  278. res.setHeader('ETag', etag);
  279. if (req.headers['if-none-match'] === etag) {
  280. // etag matched, end the request
  281. res.statusCode = 304;
  282. res.end();
  283. return;
  284. }
  285. res.setHeader('Content-Length', stats.size);
  286. if (req.method === 'HEAD') {
  287. res.end();
  288. return;
  289. }
  290. var rs = fs.createReadStream(file);
  291. rs.pipe(res);
  292. rs.on('error', function(e) {
  293. res.statusCode = e.code === 'ENOENT' ? 404 : 500;
  294. res.end();
  295. });
  296. res.on('close', rs.destroy.bind(rs));
  297. }