#!/usr/bin/env python
# urraka - An epub manager.
# Alberto Bertogli (albertito@blitiri.com.ar)
#
# Configuration section
#
# You can edit these values, or create a file named "config.py" and put them
# there to make updating easier. The ones in config.py take precedence.
#
# Directory where epub files are stored.
epub_dir = "/var/www/urraka/files/"
# Path to our user list.
passwd_file = '/var/www-data/urraka/passwd'
# Directory to store the sessions.
sessions_dir = '/tmp/urraka-sessions/'
import sys
import os
import urllib2
import hashlib
import pickle
import logging
import string
import random
import time
import subprocess
import struct
from xml.sax.saxutils import escape, unescape
import bottle
from bottle import request, response
# Before importing the config, add our cwd to the Python path
sys.path.append(os.getcwd())
# Load the config file, if there is one
try:
from config import *
except ImportError:
pass
# Pimp *_path config variables to support relative paths.
epub_dir = os.path.realpath(epub_dir)
# URL to convert article URLs to epub.
CONVERT_URL = "http://fivefilters.org/kindle-it/send.php" + \
"?context=download&format=mobi&url=%s"
#
# Utilities.
#
def with_config(f):
"""Decorator for adding the config values."""
# NOTE: This used to contain useful config values to pass to the
# templates, but we don't need them anymore. Consider removing it.
config = dict()
def wrapped(*args, **kwargs):
"""Wrapped function we will return."""
d = f(*args, **kwargs)
d.update(config)
d.update({'urlroot': request.script_name})
return d
wrapped.__name__ = f.__name__
wrapped.__doc__ = f.__doc__
return wrapped
def save_fd(dst_path, src):
"""Write fd to dst_path."""
dst = open(dst_path, 'w')
BSIZE = 2 * 1024 * 1024
buf = src.read(BSIZE)
while buf:
dst.write(buf)
buf = src.read(BSIZE)
dst.close()
def is_mobi(fname):
"""True if the file is a mobi file, False otherwise."""
# Look in the Palm Database Format header, the type and creator fields.
head = open(fname).read(68)
return head[60:68] == 'BOOKMOBI'
def get_mobi_title(fname):
"""Gets the title of a .mobi file."""
# The mobi format is a Palm Database Format file, with the first 32 bytes
# being the title of the document. The first record of the database is the
# mobi header, where the proper title is contained.
# We read the first 10k of the file, that should be enough.
head = open(fname).read(10 * 1024)
try:
get_short = lambda c, offset: \
struct.unpack('>H', c[offset:offset+2])[0]
get_int = lambda c, offset: \
struct.unpack('>I', c[offset:offset+4])[0]
# Because we only care for the first record, we know it starts at 78.
# From the record, the first 4 bytes are the data offset.
mobi_hdr_offset = get_int(head, 78)
# Get name offset + len from the mobi header. The offset will be
# relative to the header, so we add mobi_hdr_offset to get an
# absolute offset.
name_offset = mobi_hdr_offset + get_int(head, mobi_hdr_offset + 84)
name_len = get_int(head, mobi_hdr_offset + 88)
title = head[name_offset:name_offset + name_len]
except (IndexError, struct.error) as e:
# As a fallback, palm databases have a title as the first 32 bytes;
# it's usually a crippled book title.
logging.debug('Error getting title for %r: %s', fname, e)
title = head[:32].strip('\0')
return title.decode('utf8', errors = 'replace')
#
# Session.
#
class SessionManager (object):
"""A simple session manager with backing in temporary files."""
def __init__(self, data_path, timeout = None):
"""Constructor.
data_path: Path we use to store the sessions.
timeout: Session timeout. None == no timeout.
"""
self.data_path = data_path
self.timeout = timeout
if not os.path.exists(data_path):
os.mkdir(data_path, 0700)
if not os.path.isdir(data_path):
raise EnvironmentError(
"Invalid session data path %r" % data_path)
VALID_SID_CHARS = string.ascii_letters + string.digits
def _session_path(self, session_id):
"""Returns the potential path for that session id."""
if not session_id:
return None
session_id = ''.join(c for c in session_id
if c in self.VALID_SID_CHARS)
return self.data_path + '/' + session_id + '.session'
def new(self, data):
"""Create a new session, return session id.
data: Any pickable object we want to associate with this session. Must
not be None. Keep it small.
"""
ds = pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
# Randomized session id; pick some randomish as a source.
session_id = hashlib.sha256(
str(random.random()) + str(time.time())).hexdigest()
fd = open(self._session_path(session_id), 'w')
pickle.dump(data, fd, pickle.HIGHEST_PROTOCOL)
return session_id
def validate(self, session_id):
"""Validate the given session id.
Returns the associated data object if valid, None if invalid.
"""
path = self._session_path(session_id)
if not path or not os.path.exists(path):
logging.debug('Unknown session')
return None
if self.timeout:
if time.time() - os.stat(path).st_mtime > self.timeout:
logging.debug('Session %s timed out', session_id)
return None
try:
data = pickle.load(open(path))
except (IOError, pickle.PickleError) as e:
logging.warning('Error unpickling %r: %s', path, str(e))
return None
# Refresh the timeout.
os.utime(path, None)
return data
# The instance we will use in the rest of the code.
sessions = SessionManager(sessions_dir)
def with_session(f):
"""Decorator to validate sessions and get their data."""
def new_f(*args, **kwargs):
sid = bottle.request.get_cookie("session_id")
session_data = sessions.validate(sid)
if session_data is None:
bottle.abort(401, 'Invalid session id')
return f(session_data, *args, **kwargs)
new_f.__name__ = f.__name__
new_f.__doc__ = f.__doc__
return new_f
#
# Authentication.
#
def authenticate(username, password):
"""True if the password matches the username, False otherwise."""
# TODO: This is not ideal (no salt, etc.) but will do for now. Considering
# moving to the Persona API and getting rid of managing auth.
hashed_passwd = hashlib.sha256(password).hexdigest()
passwdfd = open(passwd_file)
for line in passwdfd:
user, valid_passwd, _ = line.split(':')
if user != username:
continue
if hashed_passwd == valid_passwd:
return True
return False
return False
def user_secret(username):
"""Returns the secret associated with that username."""
passwdfd = open(passwd_file)
for line in passwdfd:
user, _, secret = line.strip().split(':')
if user != username:
continue
return secret
# The username should always be valid at this point.
raise ValueError("Unknown user %r", username)
def secret_is_valid(secret_to_match):
"""True if the secret is valid, False otherwise."""
passwdfd = open(passwd_file)
for line in passwdfd:
_, _, secret = line.strip().split(':')
if secret_to_match == secret:
return True
return False
#
# URL handling.
#
@bottle.route('/', method = 'GET')
@bottle.view('index')
@with_config
def index():
return {}
@bottle.route('/', method = 'POST')
@bottle.view('index')
@with_config
def auth_post():
if (not request.forms.username or
not request.forms.passwd):
return dict(message = 'Both fields, Simba...')
if not authenticate(request.forms.username, request.forms.passwd):
return dict(message = 'Invalid username or password')
session_id = sessions.new(request.forms.username)
response.set_cookie('session_id', session_id)
bottle.redirect('u/')
@bottle.route('/u/')
@bottle.view('user')
@with_config
@with_session
def user(username):
return dict(username = username)
@bottle.route('/u/list')
@with_config
@with_session
def list(username):
file_info = []
rootpath = epub_dir + '/' + user_secret(username) + '/'
for fname in os.listdir(rootpath):
full_path = rootpath + '/' + fname
if fname.endswith('.mobi') and is_mobi(full_path):
title = get_mobi_title(full_path)
else:
title = fname
file_info.append((escape(fname), escape(title)))
file_info.sort(key = lambda val: val[1].lower())
return dict(file_info = file_info)
@bottle.route('/u/remove', method = 'POST')
@with_config
@with_session
def remove(username):
fnames = request.forms.getall('fnames[]')
if not fnames:
return dict(success = False,
message = 'No files selected')
rootpath = epub_dir + '/' + user_secret(username) + '/'
# Do one sanity check pass.
fnames = [ f.replace('/', '') for f in fnames ]
for fname in fnames:
if not os.path.isfile(rootpath + '/' + fname):
return dict(success = False,
message = 'Unknown file %r' % escape(fname))
# And now remove the files.
for fname in fnames:
full_path = rootpath + '/' + fname
logging.debug('Removing %r', full_path)
os.unlink(full_path)
return dict(success = True,
message = 'Removed %d files' % len(fnames))
@bottle.route('/u/article', method = 'POST')
@with_config
@with_session
def article(username):
rootpath = epub_dir + '/' + user_secret(username) + '/'
if not os.path.isdir(rootpath):
os.makedirs(rootpath)
url = request.forms.url
convert_url = CONVERT_URL % url
logging.debug('Got article: %r', url)
if not url:
return dict(success = False,
message = 'Please provide an URL')
# Compute a filename based on the current time and the hash of the url,
# so we can avoid duplicates. Collisions are not a big deal.
# TODO: When the server gives us one (it doesn't currently), get this from
# fd.info()['Content-Disposition'] after sanitizing it.
filename = hashlib.sha256(url).hexdigest()[:10] + '.mobi'
dstpath = rootpath + '/' + filename
success = False
if os.path.exists(dstpath):
message = "Article already exists (%s)." % (filename)
else:
try:
fd = urllib2.urlopen(convert_url, timeout = 30)
save_fd(dstpath, fd)
message = "Got article"
success = True
except urllib2.URLError as e:
message = "Error converting article: %s" % e
return dict(success = success, message = message)
@bottle.route('/u/ebook', method = 'POST')
@bottle.view('user')
@with_config
@with_session
def ebook(username):
rootpath = epub_dir + '/' + user_secret(username) + '/'
if not os.path.isdir(rootpath):
os.makedirs(rootpath)
# Build the destination path, taking some precautions with the file name
# just in case. We leave only "a-zA-Z0-9." so dumb clients don't get
# confused with non-ascii characters or spaces
allowed = string.ascii_letters + string.digits + '.'
filename = ''.join(
c for c in request.files.ebook.filename if c in allowed)
dstpath = rootpath + '/' + filename
if os.path.exists(dstpath):
message = 'File %r already exists.' % filename
else:
save_fd(dstpath, request.files.ebook.file)
message = "Got file %r." % filename
return dict(username = username, ebook_result = message)
@bottle.route('/b/<secret>/')
def secret_index(secret):
if not secret_is_valid(secret):
bottle.abort(404, "Unknown secret")
bottle.response.content_type = 'text/plain; charset=utf8'
# Only sane secrets should be allowed to be created anyway, but just in
# case we filter.
allowed = string.ascii_letters + string.digits + '.-_'
secret = ''.join(c for c in secret if c in allowed)
rootpath = epub_dir + '/' + secret + '/'
if not os.path.isdir(rootpath):
return ''
s = ''
for f in sorted(os.listdir(rootpath)):
s += request.url + f + '\n'
return s
@bottle.route('/b/<secret>/<fname>')
def serve_file(secret, fname):
if not secret_is_valid(secret):
bottle.abort(404, "Unknown secret")
rootpath = epub_dir + '/' + secret + '/'
return bottle.static_file(fname, root = rootpath)
@bottle.route('/static/<path:path>')
def static(path):
return bottle.static_file(path, root = './static/')
def main():
bottle.debug(True)
logging.basicConfig(level = logging.DEBUG)
if 'GATEWAY_INTERFACE' in os.environ:
bottle.run(server = bottle.CGIServer, quiet = True)
else:
bottle.run(host = 'localhost', port = 8008, reloader = True)
if __name__ == '__main__':
main()