git » pyweave » commit 197b8ed

Initial commit

author Alberto Bertogli
2009-09-28 20:47:15 UTC
committer Alberto Bertogli
2009-09-28 20:47:15 UTC

Initial commit

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>

LICENSE +30 -0
config.py.sample +14 -0
pyweave.cgi +514 -0

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..987d3b9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+
+I don't like licenses, because I don't like having to worry about all this
+legal stuff just for a simple piece of software I don't really mind anyone
+using. But I also believe that it's important that people share and give back;
+so I'm placing this work under the following license.
+
+
+BOLA - Buena Onda License Agreement (v1.1)
+------------------------------------------
+
+This work is provided 'as-is', without any express or implied warranty. In no
+event will the authors be held liable for any damages arising from the use of
+this work.
+
+To all effects and purposes, this work is to be considered Public Domain.
+
+
+However, if you want to be "buena onda", you should:
+
+1. Not take credit for it, and give proper recognition to the authors.
+2. Share your modifications, so everybody benefits from them.
+3. Do something nice for the authors.
+4. Help someone who needs it: sign up for some volunteer work or help your
+   neighbour paint the house.
+5. Don't waste. Anything, but specially energy that comes from natural
+   non-renewable resources. Extra points if you discover or invent something
+   to replace them.
+6. Be tolerant. Everything that's good in nature comes from cooperation.
+
+
diff --git a/config.py.sample b/config.py.sample
new file mode 100644
index 0000000..e91bde4
--- /dev/null
+++ b/config.py.sample
@@ -0,0 +1,14 @@
+#coding: utf8
+
+# This is the sample configuration file for pyweave.
+# If you omit a variable, the default will be used.
+#
+# If you prefer, you can set the values directly inside pyweave.cgi and not
+# have a configuration file.
+
+# Directory where entries are stored. Do *NOT* put it in an http-accessible
+# path, as that will leak information. Put it in a separate path and make it
+# read and writeable by the httpd user.
+data_path = "/var/weave/data/"
+
+
diff --git a/pyweave.cgi b/pyweave.cgi
new file mode 100755
index 0000000..dc6d9cf
--- /dev/null
+++ b/pyweave.cgi
@@ -0,0 +1,514 @@
+#!/usr/bin/env python
+#coding: utf8
+
+# pyweave - A single-file mozilla weave server.
+# Alberto Bertogli (albertito@blitiri.com.ar)
+
+#
+# Configuration section
+#
+# You can edit these values, or create a file named "config.py" and put them
+# there to make updating easier. The ones in config.py take precedence.
+#
+
+# Directory where entries are stored. Do *NOT* put it in an http-accessible
+# path, as that will leak information. Put it in a separate path and make it
+# read and writeable by the httpd user.
+data_path = "/var/weave/data/"
+
+#
+# End of configuration
+# DO *NOT* EDIT ANYTHING PAST HERE
+#
+
+
+import sys
+import os
+import errno
+import time
+import urllib
+import cgi
+
+try:
+	import cPickle as pickle
+except ImportError:
+	import pickle
+
+try:
+	import json
+except ImportError:
+	try:
+		import simplejson as json
+	except ImportError:
+		print "Error: json (or simplejson) module is needed"
+		sys.exit(1)
+
+
+# Load the config file, if there is one
+try:
+	from config import *
+except:
+	pass
+
+
+#
+# Storage backend
+#
+# The storage is very simple, using Python's pickle for serialization of
+# metadata and the filesystem for general storage. We have an intermediate
+# collection class to make things more friendlier to use, but it has intimate
+# knowledege of the backend.
+#
+# Note that it does not concern about users, as each user has his/her own
+# independant storage.
+
+# TODO: validate parentid, previd exist on store, and that no children exist
+# on delete (?)
+
+def fsencode(s):
+	"""Encodes the string s so it's safe to use as a file name (i.e. no
+	'/') and easy to read (otherwise we could just use base64)."""
+	ns = s.replace('%', '%P')
+	ns = ns.replace('/', '%S')
+	return ns
+
+def fsdecode(s):
+	"Opposite of fsencode()."
+	# Note that the order is the reverse of the fsencode() operations
+	ns = s.replace('%S', '/')
+	ns = ns.replace('%P', '%')
+	return ns
+
+
+COL_PREFIX = "collection-"
+PAYLOAD_PREFIX = "payload-"
+
+def col_path(col_id):
+	"Returns the path component corresponding to the given collection id."
+	return COL_PREFIX + fsencode(col_id)
+
+def payload_path(wbo_id):
+	return PAYLOAD_PREFIX + fsencode(wbo_id)
+
+class dummy (object):
+	pass
+
+class WBO (object):
+	def __init__(self, id, parent_id = None, pred_id = None, modified = 0,
+			sortidx = 0, payload = None):
+		# We must be careful with what we put here as it will be
+		# pickled. That means no absolute paths (so the root data path
+		# can be changed).
+		self.id = id
+		self.parent_id = parent_id
+		self.pred_id = pred_id
+		self.modified = modified
+		self.sortidx = sortidx
+		self.payload = payload
+
+	def __getstate__(self):
+		"""Called by pickle.dump() to ask us what we need pickled. We
+		use it to remove the payload, to prevent it from being
+		pickled by accident."""
+		d = dict(self.__dict__)
+		d['payload'] = None
+		return d
+
+	def load_payload(self, basepath):
+		p = basepath + '/' + payload_path(self.id)
+		if os.path.exists(p):
+			self.payload = open(p).read()
+		else:
+			self.payload = None
+
+	def save_payload(self, basepath):
+		p = basepath + '/' + payload_path(self.id)
+		open(p + '.tmp', 'w').write(self.payload)
+		os.rename(p + '.tmp', p)
+
+	def to_dict(self, basepath):
+		self.load_payload(basepath)
+		return {
+			'id': self.id,
+			'parentid': self.parent_id,
+			'predecessorid': self.pred_id,
+			'modified': self.modified,
+			'sortindex': self.sortidx,
+			'payload': self.payload
+		}
+
+	def from_dict(self, d, mtime = None):
+		if 'parentid' in d:
+			self.parent_id = d['parentid']
+		if 'predecessorid' in d:
+			self.pred_id = d['predecessorid']
+		if mtime is not None:
+			self.modified = mtime
+		elif 'modified' in d:
+			self.modified = d['modified']
+		else:
+			self.modified = time.time()
+		if 'sortindex' in d:
+			self.sortidx = d['sortindex']
+		if 'payload' in d:
+			self.payload = d['payload']
+
+class Storage (object):
+	def __init__(self, basepath):
+		self.basepath = basepath
+
+	def get_collection(self, id, create = False):
+		colp = self.basepath + '/' + col_path(id)
+		if not os.path.exists(colp):
+			if create:
+				os.mkdir(colp)
+			else:
+				raise KeyError
+		return Collection(self.basepath, id)
+
+	def new_collection(self, id):
+		os.mkdir(self.basepath + '/' + col_path(id))
+		return Collection(self.basepath, id)
+
+	def list_collections(self):
+		cs = []
+		for d in os.listdir(self.basepath):
+			if not d.startswith(COL_PREFIX):
+				continue
+			cs.append(Collection(self.basepath,
+					d[len(COL_PREFIX):]))
+		return cs
+
+# TODO: per-collection locking
+class Collection (object):
+	def __init__(self, basepath, id):
+		self.basepath = basepath + '/' + col_path(id)
+		self.mtime = 0
+		if os.path.exists(self.basepath):
+			self.mtime = os.stat(self.basepath).st_mtime
+		self.id = id
+		self._wbos = None
+
+	@property
+	def wbos(self):
+		if self._wbos is None:
+			self.load_wbos()
+		return self._wbos
+
+	def load_wbos(self):
+		if os.path.exists(self.basepath + '/wbo.db'):
+			self._wbos = pickle.load(open(self.basepath + '/wbo.db'))
+		else:
+			self._wbos = {}
+
+	def save_wbos(self):
+		if not os.path.exists(self.basepath):
+			os.mkdir(self.basepath)
+		tp = self.basepath + '/wbo.db' + '.tmp'
+		pickle.dump(self.wbos, open(tp, 'w'), pickle.HIGHEST_PROTOCOL)
+		os.rename(tp, self.basepath + '/wbo.db')
+
+	def put_wbo_json(self, wid, json_obj, ts = None):
+		if ts is None:
+			ts = time.time()
+
+		if wid in self.wbos:
+			wbo = self.wbos[wid]
+		else:
+			wbo = WBO(wid)
+			self.wbos[wbo.id] = wbo
+		wbo.from_dict(json_obj, ts)
+		wbo.save_payload(self.basepath)
+		return wbo.modified
+
+	def list_wbos(self, ids = None, pred_id = None, parent_id = None,
+			older = None, newer = None, full = None,
+			idx_above = None, idx_below = None, limit = None,
+			offset = None, sort = None):
+		ws = self.wbos.values()
+
+		if ids is not None:
+			ws = [ w for w in ws if w.id in ids ]
+		if pred_id is not None:
+			ws = [ w for w in ws if w.pred_id == pred_id ]
+		if parent_id is not None:
+			ws = [ w for w in ws if w.parent_id == parent_id ]
+		if older is not None:
+			ws = [ w for w in ws if w.modified < older ]
+		if newer is not None:
+			ws = [ w for w in ws if w.modified > newer]
+		if idx_above is not None:
+			ws = [ w for w in ws if w.sortidx > idx_above ]
+		if idx_below is not None:
+			ws = [ w for w in ws if w.sortidx < idx_below ]
+
+		if limit is not None:
+			ws = ws[:limit]
+		if offset is not None:
+			ws = ws[offset:]
+
+		if sort == 'oldest':
+			ws.sort(cmp = \
+				lambda x, y: cmp(x.modified, y.modified))
+		elif sort == 'newest':
+			ws.sort(reverse = True, cmp = \
+				lambda x, y: cmp(x.modified, y.modified))
+		elif sort == 'index':
+			ws.sort(reverse = True, cmp = \
+				lambda x, y: cmp(x.sortidx, y.sortidx))
+
+		if full:
+			ws = [ w.to_dict(self.basepath) for w in ws ]
+		else:
+			ws = [ w.id for w in ws ]
+
+		return ws
+
+	def delete_wbos(self, ids, parent_id, older, newer, limit, offset):
+		# XXX: the spec says there's a sort parameter, but that makes
+		# no sense since we're going to delete them. Maybe there's
+		# something else going on.
+		wids = self.list_wbos(ids, parent_id = parent_id,
+				older = older, newer = newer, limit = limit,
+				offset = offset)
+		for wid in wids:
+			w = self.wbos[wid]
+			ppath = self.basepath + '/' + payload_path(wid)
+			if os.path.exists(ppath):
+				os.unlink(ppath)
+			del self.wbos[wid]
+		self.save_wbos()
+
+
+#
+# HTTP request handling
+#
+
+# TODO: handle  X-If-Unmodified-Since header
+
+class InvalidPathError (Exception):
+	pass
+
+def path_info(path):
+	"""Checks the basic path information, and returns the username and the
+	path components (as a list). Raises an exception if the path is
+	invalid."""
+
+	p = path.strip('/').split('/')
+	if len(p) < 3:
+		raise InvalidPathError
+
+	if p[0] != '0.5':
+		raise InvalidPathError
+
+	if not os.path.exists(data_path + '/' + fsencode(p[1])):
+		raise InvalidPathError
+
+	return p[1], p[2:]
+
+def debug(msg):
+	sys.stderr.write('DEBUG: ' + str(msg) + '\n')
+
+def error(http, msg = None):
+	et = {
+		400: 'Bad Request',
+		401: 'Unauthorized',
+		404: 'Not Found',
+		500: 'Internal server error',
+		503: 'Service Unavailable',
+	}
+
+	debug('pyweave err %d: %s\n' % (http, msg))
+	print 'HTTP/1.1 ' + str(http) + ' ' + et.get(http, str(http))
+	if msg:
+		print 'X-Weave-Alert:', msg
+		print 'Content-type: text/plain'
+		print
+		print msg
+	print
+
+def bad_request(msg = None):
+	error(400, msg)
+
+def output(obj, timestamp = None):
+	if timestamp is None:
+		timestamp = time.time()
+	print 'X-Weave-Timestamp: %.2f' % timestamp
+
+	accept = os.environ.get('HTTP_ACCEPT', 'application/json')
+	if accept == 'application/whoisi':
+		import struct
+		print "Content-type: application/whoisi"
+		print
+		# The spec doesn't specify the endianness of the integer, send
+		# it in host byte order. Note we assume o is a wbo.
+		sys.stdout.write(struct.pack('I', obj.id) + json.dumps(obj))
+	elif accept == 'application/newlines':
+		print "Content-type: application/newlines"
+		print
+		# We assume we have multiple objects in a list
+		for o in obj:
+			print json.dumps(o).replace('\n', r'\u000a')
+	else:
+		print "Content-type: application/json"
+		print
+		print json.dumps(obj)
+
+def fromform(form, name, convert = None):
+	v = form.getfirst(name, None)
+	if convert and v is not None:
+		return convert(v)
+	return v
+
+def read_stdin():
+	s = sys.stdin.read(int(os.environ['CONTENT_LENGTH']))
+	debug('STDIN: ' + repr(s))
+	return s
+
+def handle_cgi():
+	user = os.environ.get('REMOTE_USER', None)
+	method = os.environ['REQUEST_METHOD']
+	try:
+		path_user, path = path_info(os.environ['PATH_INFO'])
+	except InvalidPathError:
+		error(404, "Path error")
+		return
+
+	if user != path_user:
+		error(401, "User/path mismatch: %s - %s" % (user, path_user))
+		return
+
+	storage = Storage(data_path + '/' + user)
+
+	# TODO: put these into different functions and clean them up
+	if method == 'GET':
+		if len(path) < 2:
+			bad_request("Path too short")
+		if path[0] == 'info':
+			if path[1] == 'collections':
+				cs = storage.list_collections()
+				d = {}
+				for c in cs:
+					d[c.id] = c.mtime
+				output(d)
+			elif path[1] == 'collection_counts':
+				cs = storage.list_collections()
+				d = {}
+				for c in cs:
+					d[c.id] = len(c.wbos)
+				output(d)
+			elif path[1] == 'quota':
+				# TODO
+				output((1, 100 * 1024))
+			else:
+				bad_request("Unknown info request")
+		elif path[0] == 'storage':
+			try:
+				c = storage.get_collection(path[1])
+			except KeyError:
+				# dummy empty collection
+				c = Collection(storage.basepath, path[1])
+
+			if len(path) == 3:
+				if path[2] in c.wbos:
+					w = c.wbos[path[2]]
+					output(w.to_dict(c.basepath))
+				else:
+					error(404, "WBO not found")
+			else:
+				form = cgi.FieldStorage()
+				ids = fromform(form, "ids")
+				predecessorid = fromform(form, "predecessorid")
+				parentid = fromform(form, "parentid")
+				older = fromform(form, "older", float)
+				newer = fromform(form, "newer", float)
+				full = fromform(form, "full")
+				index_above = fromform(form, "index_above",
+						int)
+				index_below = fromform(form, "index_below",
+						int)
+				limit = fromform(form, "limit", int)
+				offset = fromform(form, "offset", int)
+				sort = fromform(form, "sort")
+
+				wl = c.list_wbos(ids, predecessorid, parentid,
+						older, newer, full,
+						index_above, index_below,
+						limit, offset, sort)
+				output(wl)
+		else:
+			bad_request("Unknown GET request")
+
+	elif method == 'PUT':
+		if path[0] != 'storage' or len(path) != 3:
+			bad_request("Malformed PUT path")
+			return
+
+		c = storage.get_collection(path[1], create = True)
+		ts = c.put_wbo_json(path[2], json.loads(read_stdin()))
+		c.save_wbos()
+		output(ts, timestamp = ts)
+
+	elif method == 'POST':
+		if path[0] != 'storage' or len(path) != 2:
+			bad_request("Malformed POST path")
+			return
+
+		ts = time.time()
+		c = storage.get_collection(path[1], create = True)
+		objs = json.loads(read_stdin())
+
+		res = {
+			'modified': ts,
+			'success': [],
+			'failed': {},
+		}
+		for o in objs:
+			c.put_wbo_json(o['id'], o, ts)
+			res['success'].append(o['id'])
+		c.save_wbos()
+		output(res, timestamp = ts)
+
+	elif method == 'DELETE':
+		if path[0] != 'storage':
+			bad_request("Malformed DELETE path")
+			return
+
+		try:
+			c = storage.get_collection(path[1])
+		except KeyError:
+			output(time.time())
+			return
+
+		if len(path) == 3:
+			del c.wbos[path[2]]
+			c.save_wbos()
+			output(time.time())
+		else:
+			form = cgi.FieldStorage()
+			ids = fromform(form, "ids")
+			parentid = fromform(form, "parentid")
+			older = fromform(form, "older", float)
+			newer = fromform(form, "newer", float)
+			limit = fromform(form, "limit", int)
+			offset = fromform(form, "offset", int)
+
+			wl = c.delete_wbos(ids, parentid, older, newer, limit,
+					offset)
+			output(time.time())
+
+
+def handle_cmd():
+	print "This is a CGI application."
+	print "It only runs inside a web server."
+	return 1
+
+if os.environ.has_key('GATEWAY_INTERFACE'):
+	try:
+		handle_cgi()
+	except Exception, e:
+		error(500, "Unhandled exception")
+		raise
+else:
+	sys.exit(handle_cmd())
+