#!/usr/bin/env python3
"""
git-arr: A git web html generator.
"""
import configparser
import math
import optparse
import os
import re
import sys
from typing import Union
import bottle # type: ignore
import git
import utils
# Tell bottle where to find the views.
# Note this assumes they live next to the executable, and that is not a good
# assumption; but it's good enough for now.
bottle.TEMPLATE_PATH.insert(
0, os.path.abspath(os.path.dirname(sys.argv[0])) + "/views/"
)
# The path to our static files.
# Note this assumes they live next to the executable, and that is not a good
# assumption; but it's good enough for now.
static_path = os.path.abspath(os.path.dirname(sys.argv[0])) + "/static/"
# The list of repositories is a global variable for convenience. It will be
# populated by load_config().
repos = {}
def load_config(path):
"""Load the configuration from the given file.
The "repos" global variable will be filled with the repositories
as configured.
"""
defaults = {
"tree": "yes",
"rootdiff": "yes",
"desc": "",
"recursive": "no",
"prefix": "",
"commits_in_summary": "10",
"commits_per_page": "50",
"max_pages": "250",
"web_url": "",
"web_url_file": "web_url",
"git_url": "",
"git_url_file": "cloneurl",
"embed_markdown": "yes",
"embed_images": "no",
"ignore": "",
"generate_patch": "yes",
}
config = configparser.ConfigParser(defaults)
config.read(path)
# Do a first pass for general sanity checking and recursive expansion.
for s in config.sections():
if config.getboolean(s, "recursive"):
root = config.get(s, "path")
prefix = config.get(s, "prefix")
for path in os.listdir(root):
fullpath = find_git_dir(root + "/" + path)
if not fullpath:
continue
if os.path.exists(fullpath + "/disable_gitweb"):
continue
section = prefix + path
if config.has_section(section):
continue
config.add_section(section)
for opt, value in config.items(s, raw=True):
config.set(section, opt, value)
config.set(section, "path", fullpath)
config.set(section, "recursive", "no")
# This recursive section is no longer useful.
config.remove_section(s)
for s in config.sections():
if config.get(s, "ignore") and re.search(config.get(s, "ignore"), s):
continue
fullpath = find_git_dir(config.get(s, "path"))
if not fullpath:
raise ValueError(
"%s: path %s is not a valid git repository"
% (s, config.get(s, "path"))
)
config.set(s, "path", fullpath)
config.set(s, "name", s)
desc = config.get(s, "desc")
if not desc and os.path.exists(fullpath + "/description"):
desc = open(fullpath + "/description").read().strip()
r = git.Repo(fullpath, name=s)
r.info.desc = desc
r.info.commits_in_summary = config.getint(s, "commits_in_summary")
r.info.commits_per_page = config.getint(s, "commits_per_page")
r.info.max_pages = config.getint(s, "max_pages")
if r.info.max_pages <= 0:
r.info.max_pages = sys.maxsize
r.info.generate_tree = config.getboolean(s, "tree")
r.info.root_diff = config.getboolean(s, "rootdiff")
r.info.generate_patch = config.getboolean(s, "generate_patch")
r.info.web_url = config.get(s, "web_url")
web_url_file = fullpath + "/" + config.get(s, "web_url_file")
if not r.info.web_url and os.path.isfile(web_url_file):
r.info.web_url = open(web_url_file).read()
r.info.git_url = config.get(s, "git_url")
git_url_file = fullpath + "/" + config.get(s, "git_url_file")
if not r.info.git_url and os.path.isfile(git_url_file):
r.info.git_url = open(git_url_file).read()
r.info.embed_markdown = config.getboolean(s, "embed_markdown")
r.info.embed_images = config.getboolean(s, "embed_images")
repos[r.name] = r
def find_git_dir(path):
"""Returns the path to the git directory for the given repository.
This function takes a path to a git repository, and returns the path to
its git directory. If the repo is bare, it will be the same path;
otherwise it will be path + '.git/'.
An empty string is returned if the given path is not a valid repository.
"""
def check(p):
"""A dirty check for whether this is a git dir or not."""
# Note silent stderr because we expect this to fail and don't want the
# noise; and also we strip the final \n from the output.
return git.run_git(
p, ["rev-parse", "--git-dir"], silent_stderr=True
).read()[:-1]
for p in [path, path + "/.git"]:
if check(p):
return p
return ""
def repo_filter(unused_conf):
"""Bottle route filter for repos."""
# TODO: consider allowing /, which is tricky.
regexp = r"[\w\.~-]+"
def to_python(s):
"""Return the corresponding Python object."""
if s in repos:
return repos[s]
bottle.abort(404, "Unknown repository")
def to_url(r):
"""Return the corresponding URL string."""
return r.name
return regexp, to_python, to_url
app = bottle.Bottle()
app.router.add_filter("repo", repo_filter)
bottle.app.push(app)
def with_utils(f):
"""Decorator to add the utilities to the return value.
Used to wrap functions that return dictionaries which are then passed to
templates.
"""
utilities = {
"shorten": utils.shorten,
"can_colorize": utils.can_colorize,
"colorize_diff": utils.colorize_diff,
"colorize_blob": utils.colorize_blob,
"can_markdown": utils.can_markdown,
"markdown_blob": utils.markdown_blob,
"can_embed_image": utils.can_embed_image,
"embed_image_blob": utils.embed_image_blob,
"is_binary": utils.is_binary,
"hexdump": utils.hexdump,
"abort": bottle.abort,
"smstr": git.smstr,
}
def wrapped(*args, **kwargs):
"""Wrapped function we will return."""
d = f(*args, **kwargs)
d.update(utilities)
return d
wrapped.__name__ = f.__name__
wrapped.__doc__ = f.__doc__
return wrapped
@bottle.route("/")
@bottle.view("index")
@with_utils
def index():
return dict(repos=repos)
@bottle.route("/r/<repo:repo>/")
@bottle.view("summary")
@with_utils
def summary(repo):
return dict(repo=repo)
@bottle.route("/r/<repo:repo>/c/<cid:re:[0-9a-f]{5,40}>/")
@bottle.view("commit")
@with_utils
def commit(repo, cid):
c = repo.commit(cid)
if not c:
bottle.abort(404, "Commit not found")
return dict(repo=repo, c=c)
@bottle.route("/r/<repo:repo>/c/<cid:re:[0-9a-f]{5,40}>.patch")
@bottle.view(
"patch",
# Output is text/plain, don't do HTML escaping.
template_settings={"noescape": True},
)
def patch(repo, cid):
c = repo.commit(cid)
if not c:
bottle.abort(404, "Commit not found")
bottle.response.content_type = "text/plain; charset=utf8"
return dict(repo=repo, c=c)
@bottle.route("/r/<repo:repo>/b/<bname:path>/t/f=<fname:path>.html")
@bottle.route(
"/r/<repo:repo>/b/<bname:path>/t/<dirname:path>/f=<fname:path>.html"
)
@bottle.view("blob")
@with_utils
def blob(repo, bname, fname, dirname=""):
if dirname and not dirname.endswith("/"):
dirname = dirname + "/"
dirname = git.smstr.from_url(dirname)
fname = git.smstr.from_url(fname)
path = dirname.raw + fname.raw
# Handle backslash-escaped characters, which are not utf8.
# This matches the generated links from git.unquote().
path = path.encode("utf8").decode("unicode-escape").encode("latin1")
content = repo.blob(path, bname)
if content is None:
bottle.abort(404, "File %r not found in branch %s" % (path, bname))
return dict(
repo=repo, branch=bname, dirname=dirname, fname=fname, blob=content
)
@bottle.route("/r/<repo:repo>/b/<bname:path>/t/")
@bottle.route("/r/<repo:repo>/b/<bname:path>/t/<dirname:path>/")
@bottle.view("tree")
@with_utils
def tree(repo, bname, dirname=""):
if dirname and not dirname.endswith("/"):
dirname = dirname + "/"
dirname = git.smstr.from_url(dirname)
return dict(
repo=repo, branch=bname, tree=repo.tree(bname), dirname=dirname
)
@bottle.route("/r/<repo:repo>/b/<bname:path>/")
@bottle.route("/r/<repo:repo>/b/<bname:path>/<offset:int>.html")
@bottle.view("branch")
@with_utils
def branch(repo, bname, offset=0):
return dict(repo=repo, branch=bname, offset=offset)
@bottle.route("/static/<path:path>")
def static(path):
return bottle.static_file(path, root=static_path)
#
# Static HTML generation
#
def is_404(e):
"""True if e is an HTTPError with status 404, False otherwise."""
# We need this because older bottle.py versions put the status code in
# e.status as an integer, and newer versions make that a string, and using
# e.status_code for the code.
if isinstance(e.status, int):
return e.status == 404
else:
return e.status_code == 404
def generate(output: str, only=None):
"""Generate static html to the output directory."""
def write_to(path: str, func_or_str, args=(), mtime=None):
path = output + "/" + path
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
if mtime:
path_mtime: Union[float, int] = 0
if os.path.exists(path):
path_mtime = os.stat(path).st_mtime
# Make sure they're both float or int, to avoid failing
# comparisons later on because of this.
if isinstance(path_mtime, int):
mtime = int(mtime)
# If we were given mtime, we compare against it to see if we
# should write the file or not. Compare with almost-equality
# because otherwise floating point equality gets in the way, and
# we rather write a bit more, than generate the wrong output.
if abs(path_mtime - mtime) < 0.000001:
return
print(path)
s = func_or_str(*args)
else:
# Otherwise, be lazy if we were given a function to run, or write
# always if they gave us a string.
if isinstance(func_or_str, str):
print(path)
s = func_or_str
else:
if os.path.exists(path):
return
print(path)
s = func_or_str(*args)
open(path, "w").write(s)
if mtime:
os.utime(path, (mtime, mtime))
def link(from_path, to_path):
from_path = output + "/" + from_path
if os.path.lexists(from_path):
return
print(from_path, "->", to_path)
os.symlink(to_path, from_path)
def write_tree(r: git.Repo, bn: str, mtime):
t: git.Tree = r.tree(bn)
write_to("r/%s/b/%s/t/index.html" % (r.name, bn), tree, (r, bn), mtime)
for otype, oname, _ in t.ls("", recursive=True):
# FIXME: bottle cannot route paths with '\n' so those are sadly
# expected to fail for now; we skip them.
if "\n" in oname.raw:
print("skipping file with \\n: %r" % (oname.raw))
continue
if otype == "blob":
dirname = git.smstr(os.path.dirname(oname.raw))
fname = git.smstr(os.path.basename(oname.raw))
write_to(
"r/%s/b/%s/t/%s%sf=%s.html"
% (
str(r.name),
str(bn),
dirname.raw,
"/" if dirname.raw else "",
fname.raw,
),
blob,
(r, bn, fname.url, dirname.url),
mtime,
)
else:
write_to(
"r/%s/b/%s/t/%s/index.html"
% (str(r.name), str(bn), oname.raw),
tree,
(r, bn, oname.url),
mtime,
)
# Always generate the index, to keep the "last updated" time fresh.
write_to("index.html", index())
# We can't call static() because it relies on HTTP headers.
read_f = lambda f: open(f).read()
write_to(
"static/git-arr.css",
read_f,
[static_path + "/git-arr.css"],
os.stat(static_path + "/git-arr.css").st_mtime,
)
write_to(
"static/git-arr.js",
read_f,
[static_path + "/git-arr.js"],
os.stat(static_path + "/git-arr.js").st_mtime,
)
write_to(
"static/syntax.css",
read_f,
[static_path + "/syntax.css"],
os.stat(static_path + "/syntax.css").st_mtime,
)
rs = sorted(list(repos.values()), key=lambda r: r.name)
if only:
rs = [r for r in rs if r.name in only]
for r in rs:
write_to("r/%s/index.html" % r.name, summary(r))
for bn in r.branch_names():
commit_count = 0
commit_ids = r.commit_ids(
"refs/heads/" + bn,
limit=r.info.commits_per_page * r.info.max_pages,
)
for cid in commit_ids:
write_to(
"r/%s/c/%s/index.html" % (r.name, cid), commit, (r, cid)
)
if r.info.generate_patch:
write_to(
"r/%s/c/%s.patch" % (r.name, cid), patch, (r, cid)
)
commit_count += 1
# To avoid regenerating files that have not changed, we will
# instruct write_to() to set their mtime to the branch's committer
# date, and then compare against it to decide whether or not to
# write.
branch_mtime = r.commit(bn).committer_date.epoch
nr_pages = int(
math.ceil(float(commit_count) / r.info.commits_per_page)
)
nr_pages = min(nr_pages, r.info.max_pages)
for page in range(nr_pages):
write_to(
"r/%s/b/%s/%d.html" % (r.name, bn, page),
branch,
(r, bn, page),
branch_mtime,
)
link(
from_path="r/%s/b/%s/index.html" % (r.name, bn),
to_path="0.html",
)
if r.info.generate_tree:
write_tree(r, bn, branch_mtime)
for tag_name, obj_id in r.tags():
try:
write_to(
"r/%s/c/%s/index.html" % (r.name, obj_id),
commit,
(r, obj_id),
)
except bottle.HTTPError as e:
# Some repos can have tags pointing to non-commits. This
# happens in the Linux Kernel's v2.6.11, which points directly
# to a tree. Ignore them.
if is_404(e):
print("404 in tag %s (%s)" % (tag_name, obj_id))
else:
raise
def main():
parser = optparse.OptionParser("usage: %prog [options] serve|generate")
parser.add_option(
"-c", "--config", metavar="FILE", help="configuration file"
)
parser.add_option(
"-o", "--output", metavar="DIR", help="output directory (for generate)"
)
parser.add_option(
"",
"--only",
metavar="REPO",
action="append",
default=[],
help="generate/serve only this repository",
)
opts, args = parser.parse_args()
if not opts.config:
parser.error("--config is mandatory")
try:
load_config(opts.config)
except (configparser.NoOptionError, ValueError) as e:
print("Error parsing config:", e)
return
if not args:
parser.error("Must specify an action (serve|generate)")
if args[0] == "serve":
bottle.run(host="localhost", port=8008, reloader=True)
elif args[0] == "generate":
if not opts.output:
parser.error("Must specify --output")
generate(output=opts.output, only=opts.only)
else:
parser.error("Unknown action %s" % args[0])
if __name__ == "__main__":
main()