author | Alberto Bertogli
<albertito@blitiri.com.ar> 2020-05-24 14:01:04 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2020-05-24 15:04:04 UTC |
parent | 1183d6f817046a9f2b82a8d61b56046f046afb3f |
git-arr | +236 | -168 |
git.py | +146 | -112 |
pyproject.toml | +3 | -0 |
utils.py | +41 | -27 |
diff --git a/git-arr b/git-arr index 98a6bc7..05ac171 100755 --- a/git-arr +++ b/git-arr @@ -20,12 +20,13 @@ import utils # Note this assumes they live next to the executable, and that is not a good # assumption; but it's good enough for now. bottle.TEMPLATE_PATH.insert( - 0, os.path.abspath(os.path.dirname(sys.argv[0])) + '/views/') + 0, os.path.abspath(os.path.dirname(sys.argv[0])) + "/views/" +) # The path to our static files. # Note this assumes they live next to the executable, and that is not a good # assumption; but it's good enough for now. -static_path = os.path.abspath(os.path.dirname(sys.argv[0])) + '/static/' +static_path = os.path.abspath(os.path.dirname(sys.argv[0])) + "/static/" # The list of repositories is a global variable for convenience. It will be @@ -40,22 +41,22 @@ def load_config(path): as configured. """ defaults = { - 'tree': 'yes', - 'rootdiff': 'yes', - 'desc': '', - 'recursive': 'no', - 'prefix': '', - 'commits_in_summary': '10', - 'commits_per_page': '50', - 'max_pages': '250', - 'web_url': '', - 'web_url_file': 'web_url', - 'git_url': '', - 'git_url_file': 'cloneurl', - 'embed_markdown': 'yes', - 'embed_images': 'no', - 'ignore': '', - 'generate_patch': 'yes', + "tree": "yes", + "rootdiff": "yes", + "desc": "", + "recursive": "no", + "prefix": "", + "commits_in_summary": "10", + "commits_per_page": "50", + "max_pages": "250", + "web_url": "", + "web_url_file": "web_url", + "git_url": "", + "git_url_file": "cloneurl", + "embed_markdown": "yes", + "embed_images": "no", + "ignore": "", + "generate_patch": "yes", } config = configparser.ConfigParser(defaults) @@ -63,16 +64,16 @@ def load_config(path): # Do a first pass for general sanity checking and recursive expansion. for s in config.sections(): - if config.getboolean(s, 'recursive'): - root = config.get(s, 'path') - prefix = config.get(s, 'prefix') + if config.getboolean(s, "recursive"): + root = config.get(s, "path") + prefix = config.get(s, "prefix") for path in os.listdir(root): - fullpath = find_git_dir(root + '/' + path) + fullpath = find_git_dir(root + "/" + path) if not fullpath: continue - if os.path.exists(fullpath + '/disable_gitweb'): + if os.path.exists(fullpath + "/disable_gitweb"): continue section = prefix + path @@ -80,58 +81,60 @@ def load_config(path): continue config.add_section(section) - for opt, value in config.items(s, raw = True): + for opt, value in config.items(s, raw=True): config.set(section, opt, value) - config.set(section, 'path', fullpath) - config.set(section, 'recursive', 'no') + config.set(section, "path", fullpath) + config.set(section, "recursive", "no") # This recursive section is no longer useful. config.remove_section(s) for s in config.sections(): - if config.get(s, 'ignore') and re.search(config.get(s, 'ignore'), s): + if config.get(s, "ignore") and re.search(config.get(s, "ignore"), s): continue - fullpath = find_git_dir(config.get(s, 'path')) + fullpath = find_git_dir(config.get(s, "path")) if not fullpath: raise ValueError( - '%s: path %s is not a valid git repository' % ( - s, config.get(s, 'path'))) + "%s: path %s is not a valid git repository" + % (s, config.get(s, "path")) + ) - config.set(s, 'path', fullpath) - config.set(s, 'name', s) + config.set(s, "path", fullpath) + config.set(s, "name", s) - desc = config.get(s, 'desc') - if not desc and os.path.exists(fullpath + '/description'): - desc = open(fullpath + '/description').read().strip() + desc = config.get(s, "desc") + if not desc and os.path.exists(fullpath + "/description"): + desc = open(fullpath + "/description").read().strip() - r = git.Repo(fullpath, name = s) + r = git.Repo(fullpath, name=s) r.info.desc = desc - r.info.commits_in_summary = config.getint(s, 'commits_in_summary') - r.info.commits_per_page = config.getint(s, 'commits_per_page') - r.info.max_pages = config.getint(s, 'max_pages') + r.info.commits_in_summary = config.getint(s, "commits_in_summary") + r.info.commits_per_page = config.getint(s, "commits_per_page") + r.info.max_pages = config.getint(s, "max_pages") if r.info.max_pages <= 0: r.info.max_pages = sys.maxsize - r.info.generate_tree = config.getboolean(s, 'tree') - r.info.root_diff = config.getboolean(s, 'rootdiff') - r.info.generate_patch = config.getboolean(s, 'generate_patch') + r.info.generate_tree = config.getboolean(s, "tree") + r.info.root_diff = config.getboolean(s, "rootdiff") + r.info.generate_patch = config.getboolean(s, "generate_patch") - r.info.web_url = config.get(s, 'web_url') - web_url_file = fullpath + '/' + config.get(s, 'web_url_file') + r.info.web_url = config.get(s, "web_url") + web_url_file = fullpath + "/" + config.get(s, "web_url_file") if not r.info.web_url and os.path.isfile(web_url_file): r.info.web_url = open(web_url_file).read() - r.info.git_url = config.get(s, 'git_url') - git_url_file = fullpath + '/' + config.get(s, 'git_url_file') + r.info.git_url = config.get(s, "git_url") + git_url_file = fullpath + "/" + config.get(s, "git_url_file") if not r.info.git_url and os.path.isfile(git_url_file): r.info.git_url = open(git_url_file).read() - r.info.embed_markdown = config.getboolean(s, 'embed_markdown') - r.info.embed_images = config.getboolean(s, 'embed_images') + r.info.embed_markdown = config.getboolean(s, "embed_markdown") + r.info.embed_images = config.getboolean(s, "embed_images") repos[r.name] = r + def find_git_dir(path): """Returns the path to the git directory for the given repository. @@ -141,25 +144,26 @@ def find_git_dir(path): An empty string is returned if the given path is not a valid repository. """ + def check(p): """A dirty check for whether this is a git dir or not.""" # Note silent stderr because we expect this to fail and don't want the # noise; and also we strip the final \n from the output. - return git.run_git(p, - ['rev-parse', '--git-dir'], - silent_stderr = True).read()[:-1] + return git.run_git( + p, ["rev-parse", "--git-dir"], silent_stderr=True + ).read()[:-1] - for p in [ path, path + '/.git' ]: + for p in [path, path + "/.git"]: if check(p): return p - return '' + return "" def repo_filter(unused_conf): """Bottle route filter for repos.""" # TODO: consider allowing /, which is tricky. - regexp = r'[\w\.~-]+' + regexp = r"[\w\.~-]+" def to_python(s): """Return the corresponding Python object.""" @@ -173,8 +177,9 @@ def repo_filter(unused_conf): return regexp, to_python, to_url + app = bottle.Bottle() -app.router.add_filter('repo', repo_filter) +app.router.add_filter("repo", repo_filter) bottle.app.push(app) @@ -185,18 +190,18 @@ def with_utils(f): templates. """ utilities = { - 'shorten': utils.shorten, - 'can_colorize': utils.can_colorize, - 'colorize_diff': utils.colorize_diff, - 'colorize_blob': utils.colorize_blob, - 'can_markdown': utils.can_markdown, - 'markdown_blob': utils.markdown_blob, - 'can_embed_image': utils.can_embed_image, - 'embed_image_blob': utils.embed_image_blob, - 'is_binary': utils.is_binary, - 'hexdump': utils.hexdump, - 'abort': bottle.abort, - 'smstr': git.smstr, + "shorten": utils.shorten, + "can_colorize": utils.can_colorize, + "colorize_diff": utils.colorize_diff, + "colorize_blob": utils.colorize_blob, + "can_markdown": utils.can_markdown, + "markdown_blob": utils.markdown_blob, + "can_embed_image": utils.can_embed_image, + "embed_image_blob": utils.embed_image_blob, + "is_binary": utils.is_binary, + "hexdump": utils.hexdump, + "abort": bottle.abort, + "smstr": git.smstr, } def wrapped(*args, **kwargs): @@ -210,48 +215,57 @@ def with_utils(f): return wrapped -@bottle.route('/') -@bottle.view('index') + +@bottle.route("/") +@bottle.view("index") @with_utils def index(): - return dict(repos = repos) + return dict(repos=repos) + -@bottle.route('/r/<repo:repo>/') -@bottle.view('summary') +@bottle.route("/r/<repo:repo>/") +@bottle.view("summary") @with_utils def summary(repo): - return dict(repo = repo) + return dict(repo=repo) -@bottle.route('/r/<repo:repo>/c/<cid:re:[0-9a-f]{5,40}>/') -@bottle.view('commit') + +@bottle.route("/r/<repo:repo>/c/<cid:re:[0-9a-f]{5,40}>/") +@bottle.view("commit") @with_utils def commit(repo, cid): c = repo.commit(cid) if not c: - bottle.abort(404, 'Commit not found') + bottle.abort(404, "Commit not found") + + return dict(repo=repo, c=c) - return dict(repo = repo, c=c) -@bottle.route('/r/<repo:repo>/c/<cid:re:[0-9a-f]{5,40}>.patch') -@bottle.view('patch', - # Output is text/plain, don't do HTML escaping. - template_settings={"noescape": True}) +@bottle.route("/r/<repo:repo>/c/<cid:re:[0-9a-f]{5,40}>.patch") +@bottle.view( + "patch", + # Output is text/plain, don't do HTML escaping. + template_settings={"noescape": True}, +) def patch(repo, cid): c = repo.commit(cid) if not c: - bottle.abort(404, 'Commit not found') + bottle.abort(404, "Commit not found") + + bottle.response.content_type = "text/plain; charset=utf8" - bottle.response.content_type = 'text/plain; charset=utf8' + return dict(repo=repo, c=c) - return dict(repo = repo, c=c) -@bottle.route('/r/<repo:repo>/b/<bname:path>/t/f=<fname:path>.html') -@bottle.route('/r/<repo:repo>/b/<bname:path>/t/<dirname:path>/f=<fname:path>.html') -@bottle.view('blob') +@bottle.route("/r/<repo:repo>/b/<bname:path>/t/f=<fname:path>.html") +@bottle.route( + "/r/<repo:repo>/b/<bname:path>/t/<dirname:path>/f=<fname:path>.html" +) +@bottle.view("blob") @with_utils -def blob(repo, bname, fname, dirname = ''): - if dirname and not dirname.endswith('/'): - dirname = dirname + '/' +def blob(repo, bname, fname, dirname=""): + if dirname and not dirname.endswith("/"): + dirname = dirname + "/" dirname = git.smstr.from_url(dirname) fname = git.smstr.from_url(fname) @@ -265,38 +279,44 @@ def blob(repo, bname, fname, dirname = ''): if content is None: bottle.abort(404, "File %r not found in branch %s" % (path, bname)) - return dict(repo = repo, branch = bname, dirname = dirname, fname = fname, - blob = content) + return dict( + repo=repo, branch=bname, dirname=dirname, fname=fname, blob=content + ) -@bottle.route('/r/<repo:repo>/b/<bname:path>/t/') -@bottle.route('/r/<repo:repo>/b/<bname:path>/t/<dirname:path>/') -@bottle.view('tree') + +@bottle.route("/r/<repo:repo>/b/<bname:path>/t/") +@bottle.route("/r/<repo:repo>/b/<bname:path>/t/<dirname:path>/") +@bottle.view("tree") @with_utils -def tree(repo, bname, dirname = ''): - if dirname and not dirname.endswith('/'): - dirname = dirname + '/' +def tree(repo, bname, dirname=""): + if dirname and not dirname.endswith("/"): + dirname = dirname + "/" dirname = git.smstr.from_url(dirname) - return dict(repo = repo, branch = bname, tree = repo.tree(bname), - dirname = dirname) + return dict( + repo=repo, branch=bname, tree=repo.tree(bname), dirname=dirname + ) + -@bottle.route('/r/<repo:repo>/b/<bname:path>/') -@bottle.route('/r/<repo:repo>/b/<bname:path>/<offset:int>.html') -@bottle.view('branch') +@bottle.route("/r/<repo:repo>/b/<bname:path>/") +@bottle.route("/r/<repo:repo>/b/<bname:path>/<offset:int>.html") +@bottle.view("branch") @with_utils -def branch(repo, bname, offset = 0): - return dict(repo = repo, branch = bname, offset = offset) +def branch(repo, bname, offset=0): + return dict(repo=repo, branch=bname, offset=offset) -@bottle.route('/static/<path:path>') + +@bottle.route("/static/<path:path>") def static(path): - return bottle.static_file(path, root = static_path) + return bottle.static_file(path, root=static_path) # # Static HTML generation # + def is_404(e): """True if e is an HTTPError with status 404, False otherwise.""" # We need this because older bottle.py versions put the status code in @@ -307,10 +327,12 @@ def is_404(e): else: return e.status_code == 404 -def generate(output, only = None): + +def generate(output, only=None): """Generate static html to the output directory.""" - def write_to(path, func_or_str, args = (), mtime = None): - path = output + '/' + path + + def write_to(path, func_or_str, args=(), mtime=None): + path = output + "/" + path dirname = os.path.dirname(path) if not os.path.exists(dirname): @@ -346,71 +368,99 @@ def generate(output, only = None): print(path) s = func_or_str(*args) - open(path, 'w').write(s) + open(path, "w").write(s) if mtime: os.utime(path, (mtime, mtime)) def link(from_path, to_path): - from_path = output + '/' + from_path + from_path = output + "/" + from_path if os.path.lexists(from_path): return - print(from_path, '->', to_path) + print(from_path, "->", to_path) os.symlink(to_path, from_path) def write_tree(r, bn, mtime): t = r.tree(bn) - write_to('r/%s/b/%s/t/index.html' % (r.name, bn), - tree, (r, bn), mtime) + write_to("r/%s/b/%s/t/index.html" % (r.name, bn), tree, (r, bn), mtime) - for otype, oname, _ in t.ls('', recursive = True): + for otype, oname, _ in t.ls("", recursive=True): # FIXME: bottle cannot route paths with '\n' so those are sadly # expected to fail for now; we skip them. - if '\n' in oname.raw: - print('skipping file with \\n: %r' % (oname.raw)) + if "\n" in oname.raw: + print("skipping file with \\n: %r" % (oname.raw)) continue - if otype == 'blob': + if otype == "blob": dirname = git.smstr(os.path.dirname(oname.raw)) fname = git.smstr(os.path.basename(oname.raw)) write_to( - 'r/%s/b/%s/t/%s%sf=%s.html' % - (str(r.name), str(bn), - dirname.raw, '/' if dirname.raw else '', fname.raw), - blob, (r, bn, fname.url, dirname.url), mtime) + "r/%s/b/%s/t/%s%sf=%s.html" + % ( + str(r.name), + str(bn), + dirname.raw, + "/" if dirname.raw else "", + fname.raw, + ), + blob, + (r, bn, fname.url, dirname.url), + mtime, + ) else: - write_to('r/%s/b/%s/t/%s/index.html' % - (str(r.name), str(bn), oname.raw), - tree, (r, bn, oname.url), mtime) + write_to( + "r/%s/b/%s/t/%s/index.html" + % (str(r.name), str(bn), oname.raw), + tree, + (r, bn, oname.url), + mtime, + ) # Always generate the index, to keep the "last updated" time fresh. - write_to('index.html', index()) + write_to("index.html", index()) # We can't call static() because it relies on HTTP headers. read_f = lambda f: open(f).read() - write_to('static/git-arr.css', read_f, [static_path + '/git-arr.css'], - os.stat(static_path + '/git-arr.css').st_mtime) - write_to('static/git-arr.js', read_f, [static_path + '/git-arr.js'], - os.stat(static_path + '/git-arr.js').st_mtime) - write_to('static/syntax.css', read_f, [static_path + '/syntax.css'], - os.stat(static_path + '/syntax.css').st_mtime) - - rs = sorted(list(repos.values()), key = lambda r: r.name) + write_to( + "static/git-arr.css", + read_f, + [static_path + "/git-arr.css"], + os.stat(static_path + "/git-arr.css").st_mtime, + ) + write_to( + "static/git-arr.js", + read_f, + [static_path + "/git-arr.js"], + os.stat(static_path + "/git-arr.js").st_mtime, + ) + write_to( + "static/syntax.css", + read_f, + [static_path + "/syntax.css"], + os.stat(static_path + "/syntax.css").st_mtime, + ) + + rs = sorted(list(repos.values()), key=lambda r: r.name) if only: rs = [r for r in rs if r.name in only] for r in rs: - write_to('r/%s/index.html' % r.name, summary(r)) + write_to("r/%s/index.html" % r.name, summary(r)) for bn in r.branch_names(): commit_count = 0 - commit_ids = r.commit_ids('refs/heads/' + bn, - limit = r.info.commits_per_page * r.info.max_pages) + commit_ids = r.commit_ids( + "refs/heads/" + bn, + limit=r.info.commits_per_page * r.info.max_pages, + ) for cid in commit_ids: - write_to('r/%s/c/%s/index.html' % (r.name, cid), - commit, (r, cid)) + write_to( + "r/%s/c/%s/index.html" % (r.name, cid), commit, (r, cid) + ) if r.info.generate_patch: - write_to('r/%s/c/%s.patch' % (r.name, cid), patch, (r, cid)) + write_to( + "r/%s/c/%s.patch" % (r.name, cid), patch, (r, cid) + ) commit_count += 1 # To avoid regenerating files that have not changed, we will @@ -419,65 +469,83 @@ def generate(output, only = None): # write. branch_mtime = r.commit(bn).committer_date.epoch - nr_pages = int(math.ceil( - float(commit_count) / r.info.commits_per_page)) + nr_pages = int( + math.ceil(float(commit_count) / r.info.commits_per_page) + ) nr_pages = min(nr_pages, r.info.max_pages) for page in range(nr_pages): - write_to('r/%s/b/%s/%d.html' % (r.name, bn, page), - branch, (r, bn, page), branch_mtime) + write_to( + "r/%s/b/%s/%d.html" % (r.name, bn, page), + branch, + (r, bn, page), + branch_mtime, + ) - link(from_path = 'r/%s/b/%s/index.html' % (r.name, bn), - to_path = '0.html') + link( + from_path="r/%s/b/%s/index.html" % (r.name, bn), + to_path="0.html", + ) if r.info.generate_tree: write_tree(r, bn, branch_mtime) for tag_name, obj_id in r.tags(): try: - write_to('r/%s/c/%s/index.html' % (r.name, obj_id), - commit, (r, obj_id)) + write_to( + "r/%s/c/%s/index.html" % (r.name, obj_id), + commit, + (r, obj_id), + ) except bottle.HTTPError as e: # Some repos can have tags pointing to non-commits. This # happens in the Linux Kernel's v2.6.11, which points directly # to a tree. Ignore them. if is_404(e): - print('404 in tag %s (%s)' % (tag_name, obj_id)) + print("404 in tag %s (%s)" % (tag_name, obj_id)) else: raise def main(): - parser = optparse.OptionParser('usage: %prog [options] serve|generate') - parser.add_option('-c', '--config', metavar = 'FILE', - help = 'configuration file') - parser.add_option('-o', '--output', metavar = 'DIR', - help = 'output directory (for generate)') - parser.add_option('', '--only', metavar = 'REPO', action = 'append', - default = [], - help = 'generate/serve only this repository') + parser = optparse.OptionParser("usage: %prog [options] serve|generate") + parser.add_option( + "-c", "--config", metavar="FILE", help="configuration file" + ) + parser.add_option( + "-o", "--output", metavar="DIR", help="output directory (for generate)" + ) + parser.add_option( + "", + "--only", + metavar="REPO", + action="append", + default=[], + help="generate/serve only this repository", + ) opts, args = parser.parse_args() if not opts.config: - parser.error('--config is mandatory') + parser.error("--config is mandatory") try: load_config(opts.config) except (configparser.NoOptionError, ValueError) as e: - print('Error parsing config:', e) + print("Error parsing config:", e) return if not args: - parser.error('Must specify an action (serve|generate)') + parser.error("Must specify an action (serve|generate)") - if args[0] == 'serve': - bottle.run(host = 'localhost', port = 8008, reloader = True) - elif args[0] == 'generate': + if args[0] == "serve": + bottle.run(host="localhost", port=8008, reloader=True) + elif args[0] == "generate": if not opts.output: - parser.error('Must specify --output') - generate(output = opts.output, only = opts.only) + parser.error("Must specify --output") + generate(output=opts.output, only=opts.only) else: - parser.error('Unknown action %s' % args[0]) + parser.error("Unknown action %s" % args[0]) + -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/git.py b/git.py index 09ccd37..3b2b5b7 100644 --- a/git.py +++ b/git.py @@ -19,37 +19,45 @@ from html import escape # Path to the git binary. GIT_BIN = "git" -def run_git(repo_path, params, stdin = None, silent_stderr = False, raw = False): + +def run_git(repo_path, params, stdin=None, silent_stderr=False, raw=False): """Invokes git with the given parameters. This function invokes git with the given parameters, and returns a file-like object with the output (from a pipe). """ - params = [GIT_BIN, '--git-dir=%s' % repo_path] + list(params) + params = [GIT_BIN, "--git-dir=%s" % repo_path] + list(params) stderr = None if silent_stderr: stderr = subprocess.PIPE if not stdin: - p = subprocess.Popen(params, - stdin = None, stdout = subprocess.PIPE, stderr = stderr) + p = subprocess.Popen( + params, stdin=None, stdout=subprocess.PIPE, stderr=stderr + ) else: - p = subprocess.Popen(params, - stdin = subprocess.PIPE, stdout = subprocess.PIPE, - stderr = stderr) + p = subprocess.Popen( + params, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=stderr, + ) + p.stdin.write(stdin) p.stdin.close() if raw: return p.stdout - return io.TextIOWrapper(p.stdout, encoding = 'utf8', - errors = 'backslashreplace') + return io.TextIOWrapper( + p.stdout, encoding="utf8", errors="backslashreplace" + ) -class GitCommand (object): +class GitCommand(object): """Convenient way of invoking git.""" + def __init__(self, path, cmd, *args, **kwargs): self._override = True self._path = path @@ -63,10 +71,10 @@ class GitCommand (object): self.__setattr__(k, v) def __setattr__(self, k, v): - if k == '_override' or self._override: + if k == "_override" or self._override: self.__dict__[k] = v return - k = k.replace('_', '-') + k = k.replace("_", "-") self._kwargs[k] = v def arg(self, a): @@ -92,19 +100,20 @@ class GitCommand (object): params = [self._cmd] for k, v in list(self._kwargs.items()): - dash = '--' if len(k) > 1 else '-' + dash = "--" if len(k) > 1 else "-" if v is None: - params.append('%s%s' % (dash, k)) + params.append("%s%s" % (dash, k)) else: - params.append('%s%s=%s' % (dash, k, str(v))) + params.append("%s%s=%s" % (dash, k, str(v))) params.extend(self._args) - return run_git(self._path, params, self._stdin_buf, raw = self._raw) + return run_git(self._path, params, self._stdin_buf, raw=self._raw) -class SimpleNamespace (object): +class SimpleNamespace(object): """An entirely flexible object, which provides a convenient namespace.""" + def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -120,14 +129,15 @@ class smstr: readable. .html -> an HTML-embeddable representation. """ + def __init__(self, raw): if not isinstance(raw, (str, bytes)): raise TypeError( - "The raw string must be instance of 'str', not %s" % - type(raw)) + "The raw string must be instance of 'str', not %s" % type(raw) + ) self.raw = raw if isinstance(raw, bytes): - self.unicode = raw.decode('utf8', errors = 'backslashreplace') + self.unicode = raw.decode("utf8", errors="backslashreplace") else: self.unicode = raw self.url = urllib.request.pathname2url(raw) @@ -147,7 +157,7 @@ class smstr: def split(self, sep): """Like str.split().""" - return [ smstr(s) for s in self.raw.split(sep) ] + return [smstr(s) for s in self.raw.split(sep)] def __add__(self, other): if isinstance(other, smstr): @@ -156,9 +166,9 @@ class smstr: def _to_html(self): """Returns an html representation of the unicode string.""" - html = '' + html = "" for c in escape(self.unicode): - if c in '\t\r\n\r\f\a\b\v\0': + if c in "\t\r\n\r\f\a\b\v\0": esc_c = c.encode("unicode-escape").decode("utf8") html += '<span class="ctrlchr">%s</span>' % esc_c else: @@ -186,7 +196,7 @@ def unquote(s): s = s.encode("latin1").decode("unicode-escape") # Convert to utf8. - s = s.encode("latin1").decode("utf8", errors='backslashreplace') + s = s.encode("latin1").decode("utf8", errors="backslashreplace") return s @@ -194,7 +204,7 @@ def unquote(s): class Repo: """A git repository.""" - def __init__(self, path, name = None, info = None): + def __init__(self, path, name=None, info=None): self.path = path self.name = name self.info = info or SimpleNamespace() @@ -203,9 +213,9 @@ class Repo: """Returns a GitCommand() on our path.""" return GitCommand(self.path, cmd) - def for_each_ref(self, pattern = None, sort = None, count = None): + def for_each_ref(self, pattern=None, sort=None, count=None): """Returns a list of references.""" - cmd = self.cmd('for-each-ref') + cmd = self.cmd("for-each-ref") if sort: cmd.sort = sort if count: @@ -217,61 +227,61 @@ class Repo: obj_id, obj_type, ref = l.split() yield obj_id, obj_type, ref - def branches(self, sort = '-authordate'): + def branches(self, sort="-authordate"): """Get the (name, obj_id) of the branches.""" - refs = self.for_each_ref(pattern = 'refs/heads/', sort = sort) + refs = self.for_each_ref(pattern="refs/heads/", sort=sort) for obj_id, _, ref in refs: - yield ref[len('refs/heads/'):], obj_id + yield ref[len("refs/heads/") :], obj_id def branch_names(self): """Get the names of the branches.""" - return ( name for name, _ in self.branches() ) + return (name for name, _ in self.branches()) - def tags(self, sort = '-taggerdate'): + def tags(self, sort="-taggerdate"): """Get the (name, obj_id) of the tags.""" - refs = self.for_each_ref(pattern = 'refs/tags/', sort = sort) + refs = self.for_each_ref(pattern="refs/tags/", sort=sort) for obj_id, _, ref in refs: - yield ref[len('refs/tags/'):], obj_id + yield ref[len("refs/tags/") :], obj_id def tag_names(self): """Get the names of the tags.""" - return ( name for name, _ in self.tags() ) + return (name for name, _ in self.tags()) - def commit_ids(self, ref, limit = None): + def commit_ids(self, ref, limit=None): """Generate commit ids.""" - cmd = self.cmd('rev-list') + cmd = self.cmd("rev-list") if limit: cmd.max_count = limit cmd.arg(ref) - cmd.arg('--') + cmd.arg("--") for l in cmd.run(): - yield l.rstrip('\n') + yield l.rstrip("\n") def commit(self, commit_id): """Return a single commit.""" - cs = list(self.commits(commit_id, limit = 1)) + cs = list(self.commits(commit_id, limit=1)) if len(cs) != 1: return None return cs[0] - def commits(self, ref, limit = None, offset = 0): + def commits(self, ref, limit=None, offset=0): """Generate commit objects for the ref.""" - cmd = self.cmd('rev-list') + cmd = self.cmd("rev-list") if limit: cmd.max_count = limit + offset cmd.header = None cmd.arg(ref) - cmd.arg('--') + cmd.arg("--") - info_buffer = '' + info_buffer = "" count = 0 for l in cmd.run(): - if '\0' in l: - pre, post = l.split('\0', 1) + if "\0" in l: + pre, post = l.split("\0", 1) info_buffer += pre count += 1 @@ -290,11 +300,11 @@ class Repo: def diff(self, ref): """Return a Diff object for the ref.""" - cmd = self.cmd('diff-tree') + cmd = self.cmd("diff-tree") cmd.patch = None cmd.numstat = None cmd.find_renames = None - if (self.info.root_diff): + if self.info.root_diff: cmd.root = None # Note we intentionally do not use -z, as the filename is just for # reference, and it is safer to let git do the escaping. @@ -305,13 +315,13 @@ class Repo: def refs(self): """Return a dict of obj_id -> ref.""" - cmd = self.cmd('show-ref') + cmd = self.cmd("show-ref") cmd.dereference = None r = defaultdict(list) for l in cmd.run(): l = l.strip() - obj_id, ref = l.split(' ', 1) + obj_id, ref = l.split(" ", 1) r[obj_id].append(ref) return r @@ -322,9 +332,9 @@ class Repo: def blob(self, path, ref): """Returns a Blob instance for the given path.""" - cmd = self.cmd('cat-file') + cmd = self.cmd("cat-file") cmd.raw(True) - cmd.batch = '%(objectsize)' + cmd.batch = "%(objectsize)" # Format: <ref>:<path> # Construct it in binary since the path might not be utf8. @@ -332,29 +342,39 @@ class Repo: out = cmd.run() head = out.readline() - if not head or head.strip().endswith(b'missing'): + if not head or head.strip().endswith(b"missing"): return None - return Blob(out.read()[:int(head)]) + return Blob(out.read()[: int(head)]) def last_commit_timestamp(self): """Return the timestamp of the last commit.""" - refs = self.for_each_ref(pattern = 'refs/heads/', - sort = '-committerdate', count = 1) + refs = self.for_each_ref( + pattern="refs/heads/", sort="-committerdate", count=1 + ) for obj_id, _, _ in refs: commit = self.commit(obj_id) return commit.committer_epoch return -1 -class Commit (object): +class Commit(object): """A git commit.""" - def __init__(self, repo, - commit_id, parents, tree, - author, author_epoch, author_tz, - committer, committer_epoch, committer_tz, - message): + def __init__( + self, + repo, + commit_id, + parents, + tree, + author, + author_epoch, + author_tz, + committer, + committer_epoch, + committer_tz, + message, + ): self._repo = repo self.id = commit_id self.parents = parents @@ -367,28 +387,30 @@ class Commit (object): self.committer_tz = committer_tz self.message = message - self.author_name, self.author_email = \ - email.utils.parseaddr(self.author) + self.author_name, self.author_email = email.utils.parseaddr( + self.author + ) - self.committer_name, self.committer_email = \ - email.utils.parseaddr(self.committer) + self.committer_name, self.committer_email = email.utils.parseaddr( + self.committer + ) - self.subject, self.body = self.message.split('\n', 1) + self.subject, self.body = self.message.split("\n", 1) self.author_date = Date(self.author_epoch, self.author_tz) self.committer_date = Date(self.committer_epoch, self.committer_tz) - # Only get this lazily when we need it; most of the time it's not # required by the caller. self._diff = None def __repr__(self): - return '<C %s p:%s a:%s s:%r>' % ( - self.id[:7], - ','.join(p[:7] for p in self.parents), - self.author_email, - self.subject[:20]) + return "<C %s p:%s a:%s s:%r>" % ( + self.id[:7], + ",".join(p[:7] for p in self.parents), + self.author_email, + self.subject[:20], + ) @property def diff(self): @@ -400,57 +422,68 @@ class Commit (object): @staticmethod def from_str(repo, buf): """Parses git rev-list output, returns a commit object.""" - if '\n\n' in buf: + if "\n\n" in buf: # Header, commit message - header, raw_message = buf.split('\n\n', 1) + header, raw_message = buf.split("\n\n", 1) else: # Header only, no commit message - header, raw_message = buf.rstrip(), ' ' + header, raw_message = buf.rstrip(), " " - header_lines = header.split('\n') + header_lines = header.split("\n") commit_id = header_lines.pop(0) header_dict = defaultdict(list) for line in header_lines: - k, v = line.split(' ', 1) + k, v = line.split(" ", 1) header_dict[k].append(v) - tree = header_dict['tree'][0] - parents = set(header_dict['parent']) - author, author_epoch, author_tz = \ - header_dict['author'][0].rsplit(' ', 2) - committer, committer_epoch, committer_tz = \ - header_dict['committer'][0].rsplit(' ', 2) + tree = header_dict["tree"][0] + parents = set(header_dict["parent"]) + + authorhdr = header_dict["author"][0] + author, author_epoch, author_tz = authorhdr.rsplit(" ", 2) + + committerhdr = header_dict["committer"][0] + committer, committer_epoch, committer_tz = committerhdr.rsplit(" ", 2) # Remove the first four spaces from the message's lines. - message = '' - for line in raw_message.split('\n'): - message += line[4:] + '\n' - - return Commit(repo, - commit_id = commit_id, tree = tree, parents = parents, - author = author, - author_epoch = author_epoch, author_tz = author_tz, - committer = committer, - committer_epoch = committer_epoch, committer_tz = committer_tz, - message = message) + message = "" + for line in raw_message.split("\n"): + message += line[4:] + "\n" + + return Commit( + repo, + commit_id=commit_id, + tree=tree, + parents=parents, + author=author, + author_epoch=author_epoch, + author_tz=author_tz, + committer=committer, + committer_epoch=committer_epoch, + committer_tz=committer_tz, + message=message, + ) + class Date: """Handy representation for a datetime from git.""" + def __init__(self, epoch, tz): self.epoch = int(epoch) self.tz = tz self.utc = datetime.datetime.utcfromtimestamp(self.epoch) self.tz_sec_offset_min = int(tz[1:3]) * 60 + int(tz[4:]) - if tz[0] == '-': + if tz[0] == "-": self.tz_sec_offset_min = -self.tz_sec_offset_min self.local = self.utc + datetime.timedelta( - minutes = self.tz_sec_offset_min) + minutes=self.tz_sec_offset_min + ) - self.str = self.utc.strftime('%a, %d %b %Y %H:%M:%S +0000 ') - self.str += '(%s %s)' % (self.local.strftime('%H:%M'), self.tz) + self.str = self.utc.strftime("%a, %d %b %Y %H:%M:%S +0000 ") + self.str += "(%s %s)" % (self.local.strftime("%H:%M"), self.tz) def __str__(self): return self.str @@ -458,6 +491,7 @@ class Date: class Diff: """A diff between two trees.""" + def __init__(self, ref, changes, body): """Constructor. @@ -477,23 +511,23 @@ class Diff: ref_id = next(lines) except StopIteration: # No diff; this can happen in merges without conflicts. - return Diff(None, [], '') + return Diff(None, [], "") # First, --numstat information. changes = [] l = next(lines) - while l != '\n': - l = l.rstrip('\n') - added, deleted, fname = l.split('\t', 2) - added = added.replace('-', '0') - deleted = deleted.replace('-', '0') + while l != "\n": + l = l.rstrip("\n") + added, deleted, fname = l.split("\t", 2) + added = added.replace("-", "0") + deleted = deleted.replace("-", "0") fname = smstr(unquote(fname)) changes.append((int(added), int(deleted), fname)) l = next(lines) # And now the diff body. We just store as-is, we don't really care for # the contents. - body = ''.join(lines) + body = "".join(lines) return Diff(ref_id, changes, body) @@ -505,9 +539,9 @@ class Tree: self.repo = repo self.ref = ref - def ls(self, path, recursive = False): + def ls(self, path, recursive=False): """Generates (type, name, size) for each file in path.""" - cmd = self.repo.cmd('ls-tree') + cmd = self.repo.cmd("ls-tree") cmd.long = None if recursive: cmd.r = None @@ -521,17 +555,17 @@ class Tree: for l in cmd.run(): _mode, otype, _oid, size, name = l.split(None, 4) - if size == '-': + if size == "-": size = None else: size = int(size) # Remove the quoting (if any); will always give us a str. - name = unquote(name.strip('\n')) + name = unquote(name.strip("\n")) # Strip the leading path, the caller knows it and it's often # easier to work with this way. - name = name[len(path):] + name = name[len(path) :] # We use a smart string for the name, as it's often tricky to # manipulate otherwise. @@ -548,5 +582,5 @@ class Blob: @property def utf8_content(self): if not self._utf8_content: - self._utf8_content = self.raw_content.decode('utf8', 'replace') + self._utf8_content = self.raw_content.decode("utf8", "replace") return self._utf8_content diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8573a6d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 79 +include = "(git-arr|git.py|utils.py)$" diff --git a/utils.py b/utils.py index 4e12b0d..0f25be1 100644 --- a/utils.py +++ b/utils.py @@ -23,11 +23,13 @@ import mimetypes import string import os.path -def shorten(s, width = 60): + +def shorten(s, width=60): if len(s) < 60: return s return s[:57] + "..." + def can_colorize(s): """True if we can colorize the string, False otherwise.""" if pygments is None: @@ -41,7 +43,7 @@ def can_colorize(s): # If any of the first 5 lines is over 300 characters long, don't colorize. start = 0 for i in range(5): - pos = s.find('\n', start) + pos = s.find("\n", start) if pos == -1: break @@ -51,6 +53,7 @@ def can_colorize(s): return True + def can_markdown(repo, fname): """True if we can process file through markdown, False otherwise.""" if markdown is None: @@ -61,75 +64,86 @@ def can_markdown(repo, fname): return fname.endswith(".md") + def can_embed_image(repo, fname): """True if we can embed image file in HTML, False otherwise.""" if not repo.info.embed_images: return False - return (('.' in fname) and - (fname.split('.')[-1].lower() in [ 'jpg', 'jpeg', 'png', 'gif' ])) + return ("." in fname) and ( + fname.split(".")[-1].lower() in ["jpg", "jpeg", "png", "gif"] + ) + def colorize_diff(s): - lexer = lexers.DiffLexer(encoding = 'utf-8') - formatter = HtmlFormatter(encoding = 'utf-8', - cssclass = 'source_code') + lexer = lexers.DiffLexer(encoding="utf-8") + formatter = HtmlFormatter(encoding="utf-8", cssclass="source_code") return highlight(s, lexer, formatter) + def colorize_blob(fname, s): try: - lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8') + lexer = lexers.guess_lexer_for_filename(fname, s, encoding="utf-8") except lexers.ClassNotFound: # Only try to guess lexers if the file starts with a shebang, # otherwise it's likely a text file and guess_lexer() is prone to # make mistakes with those. - lexer = lexers.TextLexer(encoding = 'utf-8') - if s.startswith('#!'): + lexer = lexers.TextLexer(encoding="utf-8") + if s.startswith("#!"): try: - lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8') + lexer = lexers.guess_lexer(s[:80], encoding="utf-8") except lexers.ClassNotFound: pass - formatter = HtmlFormatter(encoding = 'utf-8', - cssclass = 'source_code', - linenos = 'table', - anchorlinenos = True, - lineanchors = 'line') + formatter = HtmlFormatter( + encoding="utf-8", + cssclass="source_code", + linenos="table", + anchorlinenos=True, + lineanchors="line", + ) return highlight(s, lexer, formatter) + def markdown_blob(s): extensions = [ "markdown.extensions.fenced_code", "markdown.extensions.tables", RewriteLocalLinksExtension(), ] - return markdown.markdown(s, extensions = extensions) + return markdown.markdown(s, extensions=extensions) + def embed_image_blob(fname, image_data): mimetype = mimetypes.guess_type(fname)[0] b64img = base64.b64encode(image_data).decode("ascii") - return '<img style="max-width:100%;" src="data:{0};base64,{1}" />'.format( \ - mimetype, b64img) + return '<img style="max-width:100%;" src="data:{0};base64,{1}" />'.format( + mimetype, b64img + ) + def is_binary(s): # Git considers a blob binary if NUL in first ~8KB, so do the same. - return b'\0' in s[:8192] + return b"\0" in s[:8192] + def hexdump(s): - graph = string.ascii_letters + string.digits + string.punctuation + ' ' + graph = string.ascii_letters + string.digits + string.punctuation + " " s = s.decode("latin1") offset = 0 while s: t = s[:16] - hexvals = ['%.2x' % ord(c) for c in t] - text = ''.join(c if c in graph else '.' for c in t) - yield offset, ' '.join(hexvals[:8]), ' '.join(hexvals[8:]), text + hexvals = ["%.2x" % ord(c) for c in t] + text = "".join(c if c in graph else "." for c in t) + yield offset, " ".join(hexvals[:8]), " ".join(hexvals[8:]), text offset += 16 s = s[16:] if markdown: + class RewriteLocalLinks(markdown.treeprocessors.Treeprocessor): """Rewrites relative links to files, to match git-arr's links. @@ -139,6 +153,7 @@ if markdown: Note that we're already assuming a degree of sanity in the HTML, so we don't re-check that the path is reasonable. """ + def run(self, root): for child in root: if child.tag == "a": @@ -159,9 +174,8 @@ if markdown: new_target = os.path.join(head, "f=" + tail + ".html") tag.set("href", new_target) - class RewriteLocalLinksExtension(markdown.Extension): def extendMarkdown(self, md, md_globals): md.treeprocessors.add( - "RewriteLocalLinks", RewriteLocalLinks(), "_end") - + "RewriteLocalLinks", RewriteLocalLinks(), "_end" + )