#!/usr/bin/env python3 """ git-arr: A git web html generator. """ import configparser import math import optparse import os import re import sys from typing import Union import bottle # type: ignore import git import utils # Tell bottle where to find the views. # Note this assumes they live next to the executable, and that is not a good # assumption; but it's good enough for now. bottle.TEMPLATE_PATH.insert( 0, os.path.abspath(os.path.dirname(sys.argv[0])) + "/views/" ) # The path to our static files. # Note this assumes they live next to the executable, and that is not a good # assumption; but it's good enough for now. static_path = os.path.abspath(os.path.dirname(sys.argv[0])) + "/static/" # The list of repositories is a global variable for convenience. It will be # populated by load_config(). repos = {} def load_config(path): """Load the configuration from the given file. The "repos" global variable will be filled with the repositories as configured. """ defaults = { "tree": "yes", "rootdiff": "yes", "desc": "", "recursive": "no", "prefix": "", "commits_in_summary": "10", "commits_per_page": "50", "max_pages": "250", "web_url": "", "web_url_file": "web_url", "git_url": "", "git_url_file": "cloneurl", "embed_markdown": "yes", "embed_images": "no", "ignore": "", "generate_patch": "yes", } config = configparser.ConfigParser(defaults) config.read(path) # Do a first pass for general sanity checking and recursive expansion. for s in config.sections(): if config.getboolean(s, "recursive"): root = config.get(s, "path") prefix = config.get(s, "prefix") for path in os.listdir(root): fullpath = find_git_dir(root + "/" + path) if not fullpath: continue if os.path.exists(fullpath + "/disable_gitweb"): continue section = prefix + path if config.has_section(section): continue config.add_section(section) for opt, value in config.items(s, raw=True): config.set(section, opt, value) config.set(section, "path", fullpath) config.set(section, "recursive", "no") # This recursive section is no longer useful. config.remove_section(s) for s in config.sections(): if config.get(s, "ignore") and re.search(config.get(s, "ignore"), s): continue fullpath = find_git_dir(config.get(s, "path")) if not fullpath: raise ValueError( "%s: path %s is not a valid git repository" % (s, config.get(s, "path")) ) config.set(s, "path", fullpath) config.set(s, "name", s) desc = config.get(s, "desc") if not desc and os.path.exists(fullpath + "/description"): desc = open(fullpath + "/description").read().strip() r = git.Repo(fullpath, name=s) r.info.desc = desc r.info.commits_in_summary = config.getint(s, "commits_in_summary") r.info.commits_per_page = config.getint(s, "commits_per_page") r.info.max_pages = config.getint(s, "max_pages") if r.info.max_pages <= 0: r.info.max_pages = sys.maxsize r.info.generate_tree = config.getboolean(s, "tree") r.info.root_diff = config.getboolean(s, "rootdiff") r.info.generate_patch = config.getboolean(s, "generate_patch") r.info.web_url = config.get(s, "web_url") web_url_file = fullpath + "/" + config.get(s, "web_url_file") if not r.info.web_url and os.path.isfile(web_url_file): r.info.web_url = open(web_url_file).read() r.info.git_url = config.get(s, "git_url") git_url_file = fullpath + "/" + config.get(s, "git_url_file") if not r.info.git_url and os.path.isfile(git_url_file): r.info.git_url = open(git_url_file).read() r.info.embed_markdown = config.getboolean(s, "embed_markdown") r.info.embed_images = config.getboolean(s, "embed_images") repos[r.name] = r def find_git_dir(path): """Returns the path to the git directory for the given repository. This function takes a path to a git repository, and returns the path to its git directory. If the repo is bare, it will be the same path; otherwise it will be path + '.git/'. An empty string is returned if the given path is not a valid repository. """ def check(p): """A dirty check for whether this is a git dir or not.""" # Note silent stderr because we expect this to fail and don't want the # noise; and also we strip the final \n from the output. return git.run_git( p, ["rev-parse", "--git-dir"], silent_stderr=True ).read()[:-1] for p in [path, path + "/.git"]: if check(p): return p return "" def repo_filter(unused_conf): """Bottle route filter for repos.""" # TODO: consider allowing /, which is tricky. regexp = r"[\w\.~-]+" def to_python(s): """Return the corresponding Python object.""" if s in repos: return repos[s] bottle.abort(404, "Unknown repository") def to_url(r): """Return the corresponding URL string.""" return r.name return regexp, to_python, to_url app = bottle.Bottle() app.router.add_filter("repo", repo_filter) bottle.app.push(app) def with_utils(f): """Decorator to add the utilities to the return value. Used to wrap functions that return dictionaries which are then passed to templates. """ utilities = { "shorten": utils.shorten, "can_colorize": utils.can_colorize, "colorize_diff": utils.colorize_diff, "colorize_blob": utils.colorize_blob, "can_markdown": utils.can_markdown, "markdown_blob": utils.markdown_blob, "can_embed_image": utils.can_embed_image, "embed_image_blob": utils.embed_image_blob, "is_binary": utils.is_binary, "hexdump": utils.hexdump, "abort": bottle.abort, "smstr": git.smstr, } def wrapped(*args, **kwargs): """Wrapped function we will return.""" d = f(*args, **kwargs) d.update(utilities) return d wrapped.__name__ = f.__name__ wrapped.__doc__ = f.__doc__ return wrapped @bottle.route("/") @bottle.view("index") @with_utils def index(): return dict(repos=repos) @bottle.route("/r//") @bottle.view("summary") @with_utils def summary(repo): return dict(repo=repo) @bottle.route("/r//c//") @bottle.view("commit") @with_utils def commit(repo, cid): c = repo.commit(cid) if not c: bottle.abort(404, "Commit not found") return dict(repo=repo, c=c) @bottle.route("/r//c/.patch") @bottle.view( "patch", # Output is text/plain, don't do HTML escaping. template_settings={"noescape": True}, ) def patch(repo, cid): c = repo.commit(cid) if not c: bottle.abort(404, "Commit not found") bottle.response.content_type = "text/plain; charset=utf8" return dict(repo=repo, c=c) @bottle.route("/r//b//t/f=.html") @bottle.route( "/r//b//t//f=.html" ) @bottle.view("blob") @with_utils def blob(repo, bname, fname, dirname=""): if dirname and not dirname.endswith("/"): dirname = dirname + "/" dirname = git.smstr.from_url(dirname) fname = git.smstr.from_url(fname) path = dirname.raw + fname.raw # Handle backslash-escaped characters, which are not utf8. # This matches the generated links from git.unquote(). path = path.encode("utf8").decode("unicode-escape").encode("latin1") content = repo.blob(path, bname) if content is None: bottle.abort(404, "File %r not found in branch %s" % (path, bname)) return dict( repo=repo, branch=bname, dirname=dirname, fname=fname, blob=content ) @bottle.route("/r//b//t/") @bottle.route("/r//b//t//") @bottle.view("tree") @with_utils def tree(repo, bname, dirname=""): if dirname and not dirname.endswith("/"): dirname = dirname + "/" dirname = git.smstr.from_url(dirname) return dict( repo=repo, branch=bname, tree=repo.tree(bname), dirname=dirname ) @bottle.route("/r//b//") @bottle.route("/r//b//.html") @bottle.view("branch") @with_utils def branch(repo, bname, offset=0): return dict(repo=repo, branch=bname, offset=offset) @bottle.route("/static/") def static(path): return bottle.static_file(path, root=static_path) # # Static HTML generation # def is_404(e): """True if e is an HTTPError with status 404, False otherwise.""" # We need this because older bottle.py versions put the status code in # e.status as an integer, and newer versions make that a string, and using # e.status_code for the code. if isinstance(e.status, int): return e.status == 404 else: return e.status_code == 404 def generate(output: str, only=None): """Generate static html to the output directory.""" def write_to(path: str, func_or_str, args=(), mtime=None): path = output + "/" + path dirname = os.path.dirname(path) if not os.path.exists(dirname): os.makedirs(dirname) if mtime: path_mtime: Union[float, int] = 0 if os.path.exists(path): path_mtime = os.stat(path).st_mtime # Make sure they're both float or int, to avoid failing # comparisons later on because of this. if isinstance(path_mtime, int): mtime = int(mtime) # If we were given mtime, we compare against it to see if we # should write the file or not. Compare with almost-equality # because otherwise floating point equality gets in the way, and # we rather write a bit more, than generate the wrong output. if abs(path_mtime - mtime) < 0.000001: return print(path) s = func_or_str(*args) else: # Otherwise, be lazy if we were given a function to run, or write # always if they gave us a string. if isinstance(func_or_str, str): print(path) s = func_or_str else: if os.path.exists(path): return print(path) s = func_or_str(*args) open(path, "w").write(s) if mtime: os.utime(path, (mtime, mtime)) def link(from_path, to_path): from_path = output + "/" + from_path if os.path.lexists(from_path): return print(from_path, "->", to_path) os.symlink(to_path, from_path) def write_tree(r: git.Repo, bn: str, mtime): t: git.Tree = r.tree(bn) write_to("r/%s/b/%s/t/index.html" % (r.name, bn), tree, (r, bn), mtime) for otype, oname, _ in t.ls("", recursive=True): # FIXME: bottle cannot route paths with '\n' so those are sadly # expected to fail for now; we skip them. if "\n" in oname.raw: print("skipping file with \\n: %r" % (oname.raw)) continue if otype == "blob": dirname = git.smstr(os.path.dirname(oname.raw)) fname = git.smstr(os.path.basename(oname.raw)) write_to( "r/%s/b/%s/t/%s%sf=%s.html" % ( str(r.name), str(bn), dirname.raw, "/" if dirname.raw else "", fname.raw, ), blob, (r, bn, fname.url, dirname.url), mtime, ) else: write_to( "r/%s/b/%s/t/%s/index.html" % (str(r.name), str(bn), oname.raw), tree, (r, bn, oname.url), mtime, ) # Always generate the index, to keep the "last updated" time fresh. write_to("index.html", index()) # We can't call static() because it relies on HTTP headers. read_f = lambda f: open(f).read() write_to( "static/git-arr.css", read_f, [static_path + "/git-arr.css"], os.stat(static_path + "/git-arr.css").st_mtime, ) write_to( "static/git-arr.js", read_f, [static_path + "/git-arr.js"], os.stat(static_path + "/git-arr.js").st_mtime, ) write_to( "static/syntax.css", read_f, [static_path + "/syntax.css"], os.stat(static_path + "/syntax.css").st_mtime, ) rs = sorted(list(repos.values()), key=lambda r: r.name) if only: rs = [r for r in rs if r.name in only] for r in rs: write_to("r/%s/index.html" % r.name, summary(r)) for bn in r.branch_names(): commit_count = 0 commit_ids = r.commit_ids( "refs/heads/" + bn, limit=r.info.commits_per_page * r.info.max_pages, ) for cid in commit_ids: write_to( "r/%s/c/%s/index.html" % (r.name, cid), commit, (r, cid) ) if r.info.generate_patch: write_to( "r/%s/c/%s.patch" % (r.name, cid), patch, (r, cid) ) commit_count += 1 # To avoid regenerating files that have not changed, we will # instruct write_to() to set their mtime to the branch's committer # date, and then compare against it to decide whether or not to # write. branch_mtime = r.commit(bn).committer_date.epoch nr_pages = int( math.ceil(float(commit_count) / r.info.commits_per_page) ) nr_pages = min(nr_pages, r.info.max_pages) for page in range(nr_pages): write_to( "r/%s/b/%s/%d.html" % (r.name, bn, page), branch, (r, bn, page), branch_mtime, ) link( from_path="r/%s/b/%s/index.html" % (r.name, bn), to_path="0.html", ) if r.info.generate_tree: write_tree(r, bn, branch_mtime) for tag_name, obj_id in r.tags(): try: write_to( "r/%s/c/%s/index.html" % (r.name, obj_id), commit, (r, obj_id), ) except bottle.HTTPError as e: # Some repos can have tags pointing to non-commits. This # happens in the Linux Kernel's v2.6.11, which points directly # to a tree. Ignore them. if is_404(e): print("404 in tag %s (%s)" % (tag_name, obj_id)) else: raise def main(): parser = optparse.OptionParser("usage: %prog [options] serve|generate") parser.add_option( "-c", "--config", metavar="FILE", help="configuration file" ) parser.add_option( "-o", "--output", metavar="DIR", help="output directory (for generate)" ) parser.add_option( "", "--only", metavar="REPO", action="append", default=[], help="generate/serve only this repository", ) opts, args = parser.parse_args() if not opts.config: parser.error("--config is mandatory") try: load_config(opts.config) except (configparser.NoOptionError, ValueError) as e: print("Error parsing config:", e) return if not args: parser.error("Must specify an action (serve|generate)") if args[0] == "serve": bottle.run(host="localhost", port=8008, reloader=True) elif args[0] == "generate": if not opts.output: parser.error("Must specify --output") generate(output=opts.output, only=opts.only) else: parser.error("Unknown action %s" % args[0]) if __name__ == "__main__": main()