Explorar el Código

initial commit

Dave Eddy hace 10 años
commit
3e138beb64
Se han modificado 5 ficheros con 464 adiciones y 0 borrados
  1. 2 0
      .gitignore
  2. 72 0
      README.md
  3. 327 0
      fs-caching-server.js
  4. 37 0
      package.json
  5. 26 0
      smf/manifest.xml

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+node_modules
+cache

+ 72 - 0
README.md

@@ -0,0 +1,72 @@
+fs-caching-server
+=================
+
+A caching HTTP server/proxy that stores data on the local filesystem
+
+Installation
+------------
+
+    [sudo] npm install -g fs-caching-server
+
+Description
+-----------
+
+The `fs-caching-server` program installed can be used to spin up an HTTP server
+that acts a proxy to any other HTTP(s) server - with the added ability to
+cache GET and HEAD requests that match a given regex.
+
+Example
+-------
+
+This will create a caching proxy that fronts Joyent's pkgsrc servers
+
+    $ mkdir cache
+    $ fs-caching-server -c cache/ -d -U http://pkgsrc.joyent.com
+    listening on http://0.0.0.0:8080
+    proxying requests to http://pkgsrc.joyent.com
+    caching matches of /\.(png|jpg|jpeg|css|html|js|tar|tgz|tar\.gz)$/
+    caching to /home/dave/dev/fs-caching-server/cache
+
+`-d` enables debug output which can be used to determine if a file was a cache
+hit, cache miss, or skipped the cache completely.  For example, we can request
+a file twice to see that it will be proxied and downloaded the first time, and
+the second time it will just be streamed from the local cache.
+
+    [58b93965-b7de-4669-9cb1-aff39e16a4fb] INCOMING REQUEST - GET /packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz
+    [58b93965-b7de-4669-9cb1-aff39e16a4fb] proxying GET to http://pkgsrc.joyent.com/packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz
+    [58b93965-b7de-4669-9cb1-aff39e16a4fb] saving local file to ./packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz.in-progress
+    10.0.1.35 - - [16/May/2015:20:31:39 -0400] "GET /packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz HTTP/1.1" 200 12432 "-" "libfetch/2.0"
+    [58b93965-b7de-4669-9cb1-aff39e16a4fb] renamed ./packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz.in-progress to ./packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz
+    ...
+    [ff8a1519-597f-4f9a-a999-bd05677896c2] INCOMING REQUEST - GET /packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz
+    [ff8a1519-597f-4f9a-a999-bd05677896c2] ./packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz is a file (cached) - streaming to client
+    10.0.1.35 - - [16/May/2015:20:32:48 -0400] "GET /packages/SmartOS/2014Q4/x86_64/All/watch-3.2.6nb1.tgz HTTP/1.1" 200 12432 "-" "curl/7.39.0"
+
+The lines that begin with `[<uuid>]` are only printed when debug (`-d`) is
+enabled - each UUID represents a unique incoming request.  The first request
+shows the file was proxied to pkgsrc.joyent.com and streamed to both the client
+and the local filesystem.  The second request shows the file was already
+present so it was streamed to the client without ever reaching out to
+pkgsrc.joyent.com.
+
+Usage
+-----
+
+    $ fs-caching-server -h
+    usage: fs-caching-server [options]
+
+    options
+      -c, --cache-dir <dir>     directory to use for caching data, defaults to CWD
+      -d, --debug               enable debug logging to stderr
+      -H, --host <host>         [env FS_CACHE_HOST] the host on which to listen, defaults to 0.0.0.0
+      -h, --help                print this message and exit
+      -p, --port <port>         [env FS_CACHE_PORT] print this message and exit
+      -r, --regex <regex>       [env FS_CACHE_REGEX] regex to match to cache files, defaults to \.(png|jpg|jpeg|css|html|js|tar|tgz|tar\.gz)$
+      -U, --url <url>           [env FS_CACHE_URL] URL to proxy to, defaults to undefined
+      -u, --updates             check npm for available updates
+      -v, --version             print the version number and exit
+
+License
+-------
+
+MIT License

+ 327 - 0
fs-caching-server.js

@@ -0,0 +1,327 @@
+#!/usr/bin/env node
+/**
+ * A caching HTTP server/proxy that stores data on the local filesystem
+ *
+ * Author: Dave Eddy <dave@daveeddy.com>
+ * Date: May 05, 2015
+ * License: MIT
+ */
+
+var fs = require('fs');
+var http = require('http');
+var url = require('url');
+var util = require('util');
+
+var accesslog = require('access-log');
+var getopt = require('posix-getopt');
+var mime = require('mime');
+var mkdirp = require('mkdirp');
+var path = require('path-platform');
+var uuid = require('node-uuid');
+
+var package = require('./package.json');
+
+function hap(o, p) {
+  return ({}).hasOwnProperty.call(o, p);
+}
+
+// don't copy these headers when proxying request
+var NO_PROXY_HEADERS = ['date', 'server', 'host'];
+
+// these methods use the cache, everything is proxied
+var CACHE_METHODS = ['GET', 'HEAD'];
+
+// command line arguments
+var opts = {
+  host: process.env.FS_CACHE_HOST || '0.0.0.0',
+  port: process.env.FS_CACHE_PORT || 8080,
+  url: process.env.FS_CACHE_URL,
+  regex: process.env.FS_CACHE_REGEX || '\\.(png|jpg|jpeg|css|html|js|tar|tgz|tar\\.gz)$',
+  debug: process.env.FS_CACHE_DEBUG,
+};
+
+var usage = [
+  'usage: fs-caching-server [options]',
+  '',
+  'options',
+  '  -c, --cache-dir <dir>     directory to use for caching data, defaults to CWD',
+  '  -d, --debug               enable debug logging to stderr',
+  '  -H, --host <host>         [env FS_CACHE_HOST] the host on which to listen, defaults to ' + opts.host,
+  '  -h, --help                print this message and exit',
+  '  -p, --port <port>         [env FS_CACHE_PORT] print this message and exit',
+  '  -r, --regex <regex>       [env FS_CACHE_REGEX] regex to match to cache files, defaults to ' + opts.regex,
+  '  -U, --url <url>           [env FS_CACHE_URL] URL to proxy to, defaults to ' + opts.url,
+  '  -u, --updates             check npm for available updates',
+  '  -v, --version             print the version number and exit',
+].join('\n');
+
+var options = [
+  'c:(cache-dir)',
+  'd(debug)',
+  'H:(host)',
+  'h(help)',
+  'p:(port)',
+  'r:(regex)',
+  'U:(url)',
+  'u(updates)',
+  'v(version)'
+].join('');
+var parser = new getopt.BasicParser(options, process.argv);
+var option;
+while ((option = parser.getopt()) !== undefined) {
+  switch (option.option) {
+    case 'c': process.chdir(option.optarg); break;
+    case 'd': opts.debug = true; break;
+    case 'H': opts.host = option.optarg; break;
+    case 'h': console.log(usage); process.exit(0); break;
+    case 'p': opts.port = parseInt(option.optarg, 10); break;
+    case 'r': opts.regex = option.optarg; break;
+    case 'U': opts.url = option.optarg; break;
+    case 'u': // check for updates
+      require('latest').checkupdate(package, function(ret, msg) {
+        console.log(msg);
+        process.exit(ret);
+      });
+      return;
+    case 'v': console.log(package.version); process.exit(0); break;
+    default: console.error(usage); process.exit(1);
+  }
+}
+
+if (!opts.url) {
+  console.error('url must be specified with `-U <url>` or as FS_CACHE_URL');
+  process.exit(1);
+}
+
+
+// remove trailing slash
+opts.url = opts.url.replace(/\/*$/, '');
+
+// create the regex option - this may throw
+opts.regex = new RegExp(opts.regex);
+
+// start the server
+http.createServer(onrequest).listen(opts.port, opts.host, listening);
+
+function listening() {
+  console.log('listening on http://%s:%d', opts.host, opts.port);
+  console.log('proxying requests to %s', opts.url);
+  console.log('caching matches of %s', opts.regex);
+  console.log('caching to %s', process.cwd());
+}
+
+// store files that are currently in progress -
+// if multiple requests are made for the same file, this will ensure that
+// only 1 connection is made to the server, and all subsequent requests will
+// be queued and then handled after the initial transfer is finished
+var inprogress = {};
+function onrequest(req, res) {
+  accesslog(req, res);
+
+  var _id = uuid.v4();
+  function log() {
+    if (!opts.debug)
+      return;
+    var s = util.format.apply(util, arguments);
+    return console.error('[%s] %s', _id, s);
+  }
+  log('INCOMING REQUEST - %s %s', req.method, req.url);
+
+  // parse the URL and determine the filename
+  var parsed = url.parse(req.url);
+  var file;
+  try {
+    file = '.' + path.posix.normalize(decodeURIComponent(parsed.pathname));
+  } catch (e) {
+    log('failed to parse pathname - sending 400 to client -', e.message);
+    res.statusCode = 400;
+    res.end();
+    return;
+  }
+
+  // If the request is not a HEAD or GET request, or if it does not match the
+  // regex supplied, we simply proxy it without a cache.
+  if (CACHE_METHODS.indexOf(req.method) < 0 || ! opts.regex.test(file)) {
+    log('request will be proxied with no caching');
+    var uristring = opts.url + parsed.path;
+    var uri = url.parse(uristring);
+    uri.method = req.method;
+    uri.headers = {};
+    Object.keys(req.headers || {}).forEach(function(header) {
+      if (NO_PROXY_HEADERS.indexOf(header) === -1)
+        uri.headers[header] = req.headers[header];
+    });
+    uri.headers.host = uri.host;
+    var oreq = http.request(uri, function(ores) {
+      res.statusCode = ores.statusCode;
+      Object.keys(ores.headers || {}).forEach(function(header) {
+        if (NO_PROXY_HEADERS.indexOf(header) === -1)
+          res.setHeader(header, ores.headers[header]);
+      });
+      ores.pipe(res);
+    });
+    oreq.on('error', function(e) {
+      res.statusCode = 500;
+      res.end();
+    });
+    req.pipe(oreq);
+    return;
+  }
+
+  // check to see if the file exists
+  fs.stat(file, function(err, stats) {
+    // directory, give up
+    if (stats && stats.isDirectory()) {
+      log('%s is a directory - sending 400 to client', file);
+      res.statusCode = 400;
+      res.end();
+      return;
+    }
+
+    // file exists, stream it locally
+    if (stats) {
+      log('%s is a file (cached) - streaming to client', file);
+      streamfile(file, stats, req, res);
+      return;
+    }
+
+    // another request is already proxying for this file, we wait
+    if (hap(inprogress, file)) {
+      log('%s download in progress - response queued', file);
+      inprogress[file].push([req, res]);
+      return;
+    }
+
+    // error with stat, proxy it
+    inprogress[file] = [];
+    var uristring = opts.url + parsed.path;
+    var uri = url.parse(uristring);
+    uri.method = req.method;
+    uri.headers = {};
+    Object.keys(req.headers || {}).forEach(function(header) {
+      if (NO_PROXY_HEADERS.indexOf(header) === -1)
+        uri.headers[header] = req.headers[header];
+    });
+    uri.headers.host = uri.host;
+    log('proxying %s to %s', uri.method, uristring);
+
+    // proxy it
+    var oreq = http.request(uri, function(ores) {
+      res.statusCode = ores.statusCode;
+      Object.keys(ores.headers || {}).forEach(function(header) {
+        if (NO_PROXY_HEADERS.indexOf(header) === -1)
+          res.setHeader(header, ores.headers[header]);
+      });
+
+      if (res.statusCode !== 200) {
+        ores.pipe(res);
+        finish();
+        return;
+      }
+
+      mkdirp(path.dirname(file), function(err) {
+        var tmp = file + '.in-progress';
+        log('saving local file to %s', tmp);
+        var ws = fs.createWriteStream(tmp);
+        ws.on('finish', function() {
+          fs.rename(tmp, file, function(err) {
+            if (err) {
+              log('failed to rename %s to %s', tmp, file);
+              finish();
+            } else {
+              log('renamed %s to %s', tmp, file);
+              finish(file, ores);
+            }
+          });
+        });
+        ws.on('error', function(e) {
+          log('failed to save local file %s', e.message);
+          ores.unpipe(ws);
+          finish();
+        });
+        ores.pipe(ws);
+        ores.pipe(res);
+      });
+    });
+    oreq.on('error', function(e) {
+      log('error with proxy request %s', e.message);
+      finish();
+      res.statusCode = 500;
+      res.end();
+    });
+    oreq.end();
+  });
+}
+
+// finish queued up requests
+function finish(file, ores) {
+  if (!file || !ores) {
+    inprogress[file].forEach(function(o) {
+      var res = o[1];
+      res.statusCode = 400;
+      res.end();
+    });
+    delete inprogress[file];
+    return;
+  }
+  fs.stat(file, function(err, stats) {
+    if (stats && stats.isDirectory()) {
+      // directory, give up
+      inprogress[file].forEach(function(o) {
+        var res = o[1];
+        res.statusCode = 400;
+        res.end();
+      });
+    } else if (stats) {
+      // file exists, stream it locally
+      inprogress[file].forEach(function(o) {
+        var req = o[0];
+        var res = o[1];
+        res.statusCode = ores.statusCode;
+        Object.keys(ores.headers || {}).forEach(function(header) {
+          if (NO_PROXY_HEADERS.indexOf(header) === -1)
+            res.setHeader(header, ores.headers[header]);
+        });
+        streamfile(file, stats, req, res);
+      });
+      return;
+    } else {
+      // not found
+      inprogress[file].forEach(function(o) {
+        var res = o[1];
+        res.statusCode = 500;
+        res.end();
+      });
+    }
+    delete inprogress[file];
+  });
+}
+
+// given a filename and its stats object (and req and res)
+// stream it
+function streamfile(file, stats, req, res) {
+  var etag = util.format('"%d-%d"', stats.size, stats.mtime.getTime());
+  res.setHeader('Last-Modified', stats.mtime.toUTCString());
+  res.setHeader('Content-Type', mime.lookup(file));
+  res.setHeader('ETag', etag);
+  if (req.headers['if-none-match'] === etag) {
+    // etag matched, end the request
+    res.statusCode = 304;
+    res.end();
+    return;
+  }
+
+  res.setHeader('Content-Length', stats.size);
+  if (req.method === 'HEAD') {
+    res.end();
+    return;
+  }
+
+  var rs = fs.createReadStream(file);
+  rs.pipe(res);
+  rs.on('error', function(e) {
+    res.statusCode = e.code === 'ENOENT' ? 404 : 500;
+    res.end();
+  });
+  res.on('close', rs.destroy.bind(rs));
+}

+ 37 - 0
package.json

@@ -0,0 +1,37 @@
+{
+  "name": "fs-caching-server",
+  "version": "0.0.0",
+  "description": "A caching HTTP server/proxy that stores data on the local filesystem",
+  "main": "fs-caching-server.js",
+  "preferGlobal": true,
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "bin": {
+    "fs-caching-server": "./fs-caching-server.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/bahamas10/node-fs-caching-server.git"
+  },
+  "keywords": [
+    "fs",
+    "caching",
+    "proxy"
+  ],
+  "author": "Dave Eddy <dave@daveeddy.com> (http://www.daveeddy.com)",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/bahamas10/node-fs-caching-server/issues"
+  },
+  "homepage": "https://github.com/bahamas10/node-fs-caching-server",
+  "dependencies": {
+    "access-log": "^0.3.9",
+    "latest": "^0.2.0",
+    "mime": "^1.3.4",
+    "mkdirp": "^0.5.0",
+    "node-uuid": "^1.4.3",
+    "path-platform": "^0.11.15",
+    "posix-getopt": "^1.1.0"
+  }
+}

+ 26 - 0
smf/manifest.xml

@@ -0,0 +1,26 @@
+<?xml version='1.0'?>
+<!DOCTYPE service_bundle SYSTEM '/usr/share/lib/xml/dtd/service_bundle.dtd.1'>
+<service_bundle type='manifest' name='application-fs-caching-server'>
+	<service name='application/fs-caching-server' type='service' version='0'>
+		<create_default_instance enabled='true'/>
+		<dependency name='dep0' grouping='require_all' restart_on='error' type='service'>
+			<service_fmri value='svc:/milestone/multi-user:default'/>
+		</dependency>
+		<method_context working_directory='/tmp'>
+			<method_credential user='nobody' group='other'/>
+			<method_environment>
+				<envvar name='FS_CACHE_HOST' value='0.0.0.0'/>
+				<envvar name='FS_CACHE_PORT' value='8080'/>
+				<envvar name='FS_CACHE_REGEX' value='\.(gif|png|jpg|tar|tgz|tar\.gz)$'/> <!-- leave blank for default -->
+				<envvar name='FS_CACHE_DEBUG' value=''/> <!-- set to anything to enable -->
+			</method_environment>
+		</method_context>
+		<exec_method name='start' type='method' exec='fs-caching-server &amp;' timeout_seconds='10'/>
+		<exec_method name='stop' type='method' exec=':kill' timeout_seconds='30'/>
+		<template>
+			<common_name>
+				<loctext xml:lang='C'>FS Caching Server</loctext>
+			</common_name>
+		</template>
+	</service>
+</service_bundle>