fs-caching-server.js 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. #!/usr/bin/env node
  2. /**
  3. * A caching HTTP server/proxy that stores data on the local filesystem
  4. *
  5. * Author: Dave Eddy <dave@daveeddy.com>
  6. * Date: May 05, 2015
  7. * License: MIT
  8. */
  9. var fs = require('fs');
  10. var http = require('http');
  11. var url = require('url');
  12. var util = require('util');
  13. var accesslog = require('access-log');
  14. var getopt = require('posix-getopt');
  15. var mime = require('mime');
  16. var mkdirp = require('mkdirp');
  17. var path = require('path-platform');
  18. var uuid = require('node-uuid');
  19. var clone = require("readable-stream-clone");
  20. var package = require('./package.json');
  21. function hap(o, p) {
  22. return ({}).hasOwnProperty.call(o, p);
  23. }
  24. // don't copy these headers when proxying request
  25. var NO_PROXY_HEADERS = ['date', 'server', 'host'];
  26. // these methods use the cache, everything is proxied
  27. var CACHE_METHODS = ['GET', 'HEAD'];
  28. // command line arguments
  29. var opts = {
  30. host: process.env.FS_CACHE_HOST || '0.0.0.0',
  31. port: process.env.FS_CACHE_PORT || 8080,
  32. url: process.env.FS_CACHE_URL,
  33. regex: process.env.FS_CACHE_REGEX || '\\.(png|jpg|jpeg|css|html|js|tar|tgz|tar\\.gz)$',
  34. debug: process.env.FS_CACHE_DEBUG,
  35. };
  36. var usage = [
  37. 'usage: fs-caching-server [options]',
  38. '',
  39. 'options',
  40. ' -c, --cache-dir <dir> directory to use for caching data, defaults to CWD',
  41. ' -d, --debug enable debug logging to stderr',
  42. ' -H, --host <host> [env FS_CACHE_HOST] the host on which to listen, defaults to ' + opts.host,
  43. ' -h, --help print this message and exit',
  44. ' -p, --port <port> [env FS_CACHE_PORT] the port on which to listen, defaults to ' + opts.port,
  45. ' -r, --regex <regex> [env FS_CACHE_REGEX] regex to match to cache files, defaults to ' + opts.regex,
  46. ' -U, --url <url> [env FS_CACHE_URL] URL to proxy to',
  47. ' -u, --updates check npm for available updates',
  48. ' -v, --version print the version number and exit',
  49. ].join('\n');
  50. var options = [
  51. 'c:(cache-dir)',
  52. 'd(debug)',
  53. 'H:(host)',
  54. 'h(help)',
  55. 'p:(port)',
  56. 'r:(regex)',
  57. 'U:(url)',
  58. 'u(updates)',
  59. 'v(version)'
  60. ].join('');
  61. var parser = new getopt.BasicParser(options, process.argv);
  62. var option;
  63. while ((option = parser.getopt()) !== undefined) {
  64. switch (option.option) {
  65. case 'c': process.chdir(option.optarg); break;
  66. case 'd': opts.debug = true; break;
  67. case 'H': opts.host = option.optarg; break;
  68. case 'h': console.log(usage); process.exit(0); break;
  69. case 'p': opts.port = parseInt(option.optarg, 10); break;
  70. case 'r': opts.regex = option.optarg; break;
  71. case 'U': opts.url = option.optarg; break;
  72. case 'u': // check for updates
  73. require('latest').checkupdate(package, function(ret, msg) {
  74. console.log(msg);
  75. process.exit(ret);
  76. });
  77. return;
  78. case 'v': console.log(package.version); process.exit(0); break;
  79. default: console.error(usage); process.exit(1);
  80. }
  81. }
  82. if (!opts.url) {
  83. console.error('url must be specified with `-U <url>` or as FS_CACHE_URL');
  84. process.exit(1);
  85. }
  86. // remove trailing slash
  87. opts.url = opts.url.replace(/\/*$/, '');
  88. // create the regex option - this may throw
  89. opts.regex = new RegExp(opts.regex);
  90. // start the server
  91. http.createServer(onrequest).listen(opts.port, opts.host, listening);
  92. function listening() {
  93. console.log('listening on http://%s:%d', opts.host, opts.port);
  94. console.log('proxying requests to %s', opts.url);
  95. console.log('caching matches of %s', opts.regex);
  96. console.log('caching to %s', process.cwd());
  97. }
  98. // store files that are currently in progress -
  99. // if multiple requests are made for the same file, this will ensure that
  100. // only 1 connection is made to the server, and all subsequent requests will
  101. // be queued and then handled after the initial transfer is finished
  102. var inprogress = {};
  103. function onrequest(req, res) {
  104. accesslog(req, res);
  105. var _id = uuid.v4();
  106. function log() {
  107. if (!opts.debug)
  108. return;
  109. var s = util.format.apply(util, arguments);
  110. return console.error('[%s] %s', _id, s);
  111. }
  112. log('INCOMING REQUEST - %s %s', req.method, req.url);
  113. // parse the URL and determine the filename
  114. var parsed = url.parse(req.url);
  115. var file;
  116. try {
  117. file = '.' + path.posix.normalize(decodeURIComponent(parsed.pathname));
  118. } catch (e) {
  119. log('failed to parse pathname - sending 400 to client -', e.message);
  120. res.statusCode = 400;
  121. res.end();
  122. return;
  123. }
  124. // If the request is not a HEAD or GET request, or if it does not match the
  125. // regex supplied, we simply proxy it without a cache.
  126. if (CACHE_METHODS.indexOf(req.method) < 0 || ! opts.regex.test(file)) {
  127. log('request will be proxied with no caching');
  128. var uristring = opts.url + parsed.path;
  129. var uri = url.parse(uristring);
  130. uri.method = req.method;
  131. uri.headers = {};
  132. Object.keys(req.headers || {}).forEach(function(header) {
  133. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  134. uri.headers[header] = req.headers[header];
  135. });
  136. uri.headers.host = uri.host;
  137. var oreq = http.request(uri, function(ores) {
  138. res.statusCode = ores.statusCode;
  139. Object.keys(ores.headers || {}).forEach(function(header) {
  140. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  141. res.setHeader(header, ores.headers[header]);
  142. });
  143. ores.pipe(res);
  144. });
  145. oreq.on('error', function(e) {
  146. res.statusCode = 500;
  147. res.end();
  148. });
  149. req.pipe(oreq);
  150. return;
  151. }
  152. // check to see if the file exists
  153. fs.stat(file, function(err, stats) {
  154. // directory, give up
  155. if (stats && stats.isDirectory()) {
  156. log('%s is a directory - sending 400 to client', file);
  157. res.statusCode = 400;
  158. res.end();
  159. return;
  160. }
  161. // file exists, stream it locally
  162. if (stats) {
  163. log('%s is a file (cached) - streaming to client', file);
  164. streamfile(file, stats, req, res);
  165. return;
  166. }
  167. // another request is already proxying for this file, we wait
  168. if (hap(inprogress, file)) {
  169. log('%s download in progress - response queued', file);
  170. inprogress[file].push([req, res]);
  171. return;
  172. }
  173. // error with stat, proxy it
  174. inprogress[file] = [];
  175. var uristring = opts.url + parsed.path;
  176. var uri = url.parse(uristring);
  177. uri.method = req.method;
  178. uri.headers = {};
  179. Object.keys(req.headers || {}).forEach(function(header) {
  180. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  181. uri.headers[header] = req.headers[header];
  182. });
  183. uri.headers.host = uri.host;
  184. log('proxying %s to %s', uri.method, uristring);
  185. // proxy it
  186. var oreq = http.request(uri, function(ores) {
  187. res.statusCode = ores.statusCode;
  188. Object.keys(ores.headers || {}).forEach(function(header) {
  189. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  190. res.setHeader(header, ores.headers[header]);
  191. });
  192. if (res.statusCode !== 200) {
  193. ores.pipe(res);
  194. finish();
  195. return;
  196. }
  197. mkdirp(path.dirname(file), function(err) {
  198. var tmp = file + '.in-progress';
  199. log('saving local file to %s', tmp);
  200. var ws = fs.createWriteStream(tmp);
  201. ws.on('finish', function() {
  202. fs.rename(tmp, file, function(err) {
  203. if (err) {
  204. log('failed to rename %s to %s', tmp, file);
  205. finish();
  206. } else {
  207. log('renamed %s to %s', tmp, file);
  208. finish(file, ores);
  209. }
  210. });
  211. });
  212. ws.on('error', function(e) {
  213. log('failed to save local file %s', e.message);
  214. ores.unpipe(ws);
  215. finish();
  216. });
  217. ores_ws = new clone(ores);
  218. ores_res = new clone(ores);
  219. ores_ws.pipe(ws);
  220. ores_res.pipe(res);
  221. });
  222. });
  223. oreq.on('error', function(e) {
  224. log('error with proxy request %s', e.message);
  225. finish();
  226. res.statusCode = 500;
  227. res.end();
  228. });
  229. oreq.end();
  230. });
  231. }
  232. // finish queued up requests
  233. function finish(file, ores) {
  234. if (!file || !ores) {
  235. inprogress[file].forEach(function(o) {
  236. var res = o[1];
  237. res.statusCode = 400;
  238. res.end();
  239. });
  240. delete inprogress[file];
  241. return;
  242. }
  243. fs.stat(file, function(err, stats) {
  244. if (stats && stats.isDirectory()) {
  245. // directory, give up
  246. inprogress[file].forEach(function(o) {
  247. var res = o[1];
  248. res.statusCode = 400;
  249. res.end();
  250. });
  251. } else if (stats) {
  252. // file exists, stream it locally
  253. inprogress[file].forEach(function(o) {
  254. var req = o[0];
  255. var res = o[1];
  256. res.statusCode = ores.statusCode;
  257. Object.keys(ores.headers || {}).forEach(function(header) {
  258. if (NO_PROXY_HEADERS.indexOf(header) === -1)
  259. res.setHeader(header, ores.headers[header]);
  260. });
  261. streamfile(file, stats, req, res);
  262. });
  263. } else {
  264. // not found
  265. inprogress[file].forEach(function(o) {
  266. var res = o[1];
  267. res.statusCode = 500;
  268. res.end();
  269. });
  270. }
  271. delete inprogress[file];
  272. });
  273. }
  274. // given a filename and its stats object (and req and res)
  275. // stream it
  276. function streamfile(file, stats, req, res) {
  277. var etag = util.format('"%d-%d"', stats.size, stats.mtime.getTime());
  278. res.setHeader('Last-Modified', stats.mtime.toUTCString());
  279. res.setHeader('Content-Type', mime.lookup(file));
  280. res.setHeader('ETag', etag);
  281. if (req.headers['if-none-match'] === etag) {
  282. // etag matched, end the request
  283. res.statusCode = 304;
  284. res.end();
  285. return;
  286. }
  287. res.setHeader('Content-Length', stats.size);
  288. if (req.method === 'HEAD') {
  289. res.end();
  290. return;
  291. }
  292. var rs = fs.createReadStream(file);
  293. rs.pipe(res);
  294. rs.on('error', function(e) {
  295. res.statusCode = e.code === 'ENOENT' ? 404 : 500;
  296. res.end();
  297. });
  298. res.on('close', rs.destroy.bind(rs));
  299. }