author | Alberto Bertogli
<albertito@blitiri.com.ar> 2020-05-24 01:36:43 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2020-05-24 03:50:39 UTC |
parent | cbb36e087c1bcf1c81de53e920baf0c681abfd87 |
.gitignore | +2 | -1 |
git-arr | +11 | -13 |
git.py | +38 | -49 |
utils.py | +4 | -2 |
views/tree-list.html | +1 | -1 |
diff --git a/.gitignore b/.gitignore index faf410c..94fab83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.pyc __pycache__ -.*.swp +.* +!.gitignore diff --git a/git-arr b/git-arr index 5c4e7db..98a6bc7 100755 --- a/git-arr +++ b/git-arr @@ -1,21 +1,15 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ git-arr: A git web html generator. """ -from __future__ import print_function - +import configparser import math import optparse import os import re import sys -try: - import configparser -except ImportError: - import ConfigParser as configparser - import bottle import git @@ -64,7 +58,7 @@ def load_config(path): 'generate_patch': 'yes', } - config = configparser.SafeConfigParser(defaults) + config = configparser.ConfigParser(defaults) config.read(path) # Do a first pass for general sanity checking and recursive expansion. @@ -118,7 +112,7 @@ def load_config(path): r.info.commits_per_page = config.getint(s, 'commits_per_page') r.info.max_pages = config.getint(s, 'max_pages') if r.info.max_pages <= 0: - r.info.max_pages = sys.maxint + r.info.max_pages = sys.maxsize r.info.generate_tree = config.getboolean(s, 'tree') r.info.root_diff = config.getboolean(s, 'rootdiff') r.info.generate_patch = config.getboolean(s, 'generate_patch') @@ -263,6 +257,10 @@ def blob(repo, bname, fname, dirname = ''): fname = git.smstr.from_url(fname) path = dirname.raw + fname.raw + # Handle backslash-escaped characters, which are not utf8. + # This matches the generated links from git.unquote(). + path = path.encode("utf8").decode("unicode-escape").encode("latin1") + content = repo.blob(path, bname) if content is None: bottle.abort(404, "File %r not found in branch %s" % (path, bname)) @@ -339,7 +337,7 @@ def generate(output, only = None): else: # Otherwise, be lazy if we were given a function to run, or write # always if they gave us a string. - if isinstance(func_or_str, (str, unicode)): + if isinstance(func_or_str, str): print(path) s = func_or_str else: @@ -348,7 +346,7 @@ def generate(output, only = None): print(path) s = func_or_str(*args) - open(path, 'w').write(s.encode('utf8', errors = 'xmlcharrefreplace')) + open(path, 'w').write(s) if mtime: os.utime(path, (mtime, mtime)) @@ -398,7 +396,7 @@ def generate(output, only = None): write_to('static/syntax.css', read_f, [static_path + '/syntax.css'], os.stat(static_path + '/syntax.css').st_mtime) - rs = sorted(repos.values(), key = lambda r: r.name) + rs = sorted(list(repos.values()), key = lambda r: r.name) if only: rs = [r for r in rs if r.name in only] diff --git a/git.py b/git.py index 2240175..09ccd37 100644 --- a/git.py +++ b/git.py @@ -12,35 +12,13 @@ import subprocess from collections import defaultdict import email.utils import datetime -import urllib -from cgi import escape +import urllib.request, urllib.parse, urllib.error +from html import escape # Path to the git binary. GIT_BIN = "git" -class EncodeWrapper: - """File-like wrapper that returns data utf8 encoded.""" - def __init__(self, fd, encoding = 'utf8', errors = 'replace'): - self.fd = fd - self.encoding = encoding - self.errors = errors - - def __iter__(self): - for line in self.fd: - yield line.decode(self.encoding, errors = self.errors) - - def read(self): - """Returns the whole content.""" - s = self.fd.read() - return s.decode(self.encoding, errors = self.errors) - - def readline(self): - """Returns a single line.""" - s = self.fd.readline() - return s.decode(self.encoding, errors = self.errors) - - def run_git(repo_path, params, stdin = None, silent_stderr = False, raw = False): """Invokes git with the given parameters. @@ -66,13 +44,8 @@ def run_git(repo_path, params, stdin = None, silent_stderr = False, raw = False) if raw: return p.stdout - # We need to wrap stdout if we want to decode it as utf8, subprocess - # doesn't support us telling it the encoding. - if sys.version_info.major == 3: - return io.TextIOWrapper(p.stdout, encoding = 'utf8', - errors = 'replace') - else: - return EncodeWrapper(p.stdout) + return io.TextIOWrapper(p.stdout, encoding = 'utf8', + errors = 'backslashreplace') class GitCommand (object): @@ -109,6 +82,8 @@ class GitCommand (object): def stdin(self, s): """Sets the contents we will send in stdin.""" self._override = True + if isinstance(s, str): + s = s.encode("utf8") self._stdin_buf = s self._override = False @@ -116,7 +91,7 @@ class GitCommand (object): """Runs the git command.""" params = [self._cmd] - for k, v in self._kwargs.items(): + for k, v in list(self._kwargs.items()): dash = '--' if len(k) > 1 else '-' if v is None: params.append('%s%s' % (dash, k)) @@ -146,11 +121,16 @@ class smstr: .html -> an HTML-embeddable representation. """ def __init__(self, raw): - if not isinstance(raw, str): - raise TypeError("The raw string must be instance of 'str'") + if not isinstance(raw, (str, bytes)): + raise TypeError( + "The raw string must be instance of 'str', not %s" % + type(raw)) self.raw = raw - self.unicode = raw.decode('utf8', errors = 'replace') - self.url = urllib.pathname2url(raw) + if isinstance(raw, bytes): + self.unicode = raw.decode('utf8', errors = 'backslashreplace') + else: + self.unicode = raw + self.url = urllib.request.pathname2url(raw) self.html = self._to_html() def __cmp__(self, other): @@ -163,7 +143,7 @@ class smstr: @staticmethod def from_url(url): """Returns an smstr() instance from an url-encoded string.""" - return smstr(urllib.url2pathname(url)) + return smstr(urllib.request.url2pathname(url)) def split(self, sep): """Like str.split().""" @@ -176,10 +156,10 @@ class smstr: def _to_html(self): """Returns an html representation of the unicode string.""" - html = u'' + html = '' for c in escape(self.unicode): if c in '\t\r\n\r\f\a\b\v\0': - esc_c = c.encode('ascii').encode('string_escape') + esc_c = c.encode("unicode-escape").decode("utf8") html += '<span class="ctrlchr">%s</span>' % esc_c else: html += c @@ -190,14 +170,23 @@ class smstr: def unquote(s): """Git can return quoted file names, unquote them. Always return a str.""" if not (s[0] == '"' and s[-1] == '"'): - # Unquoted strings are always safe, no need to mess with them; just - # make sure we return str. - s = s.encode('ascii') + # Unquoted strings are always safe, no need to mess with them return s - # Get rid of the quotes, we never want them in the output, and convert to - # a raw string, un-escaping the backslashes. - s = s[1:-1].decode('string-escape') + # The string will be of the form `"<escaped>"`, where <escaped> is a + # backslash-escaped representation of the name of the file. + # Examples: "with\ttwo\ttabs" , "\303\261aca-utf8", "\361aca-latin1" + + # Get rid of the quotes, we never want them in the output. + s = s[1:-1] + + # Un-escape the backslashes. + # latin1 is ok to use here because in Python it just maps the code points + # 0-255 to the bytes 0x-0xff, which is what we expect. + s = s.encode("latin1").decode("unicode-escape") + + # Convert to utf8. + s = s.encode("latin1").decode("utf8", errors='backslashreplace') return s @@ -337,13 +326,13 @@ class Repo: cmd.raw(True) cmd.batch = '%(objectsize)' - if isinstance(ref, unicode): - ref = ref.encode('utf8') - cmd.stdin('%s:%s' % (ref, path)) + # Format: <ref>:<path> + # Construct it in binary since the path might not be utf8. + cmd.stdin(ref.encode("utf8") + b":" + path) out = cmd.run() head = out.readline() - if not head or head.strip().endswith('missing'): + if not head or head.strip().endswith(b'missing'): return None return Blob(out.read()[:int(head)]) diff --git a/utils.py b/utils.py index ada9c7e..4e12b0d 100644 --- a/utils.py +++ b/utils.py @@ -108,15 +108,17 @@ def markdown_blob(s): def embed_image_blob(fname, image_data): mimetype = mimetypes.guess_type(fname)[0] + b64img = base64.b64encode(image_data).decode("ascii") return '<img style="max-width:100%;" src="data:{0};base64,{1}" />'.format( \ - mimetype, base64.b64encode(image_data)) + mimetype, b64img) def is_binary(s): # Git considers a blob binary if NUL in first ~8KB, so do the same. - return '\0' in s[:8192] + return b'\0' in s[:8192] def hexdump(s): graph = string.ascii_letters + string.digits + string.punctuation + ' ' + s = s.decode("latin1") offset = 0 while s: t = s[:16] diff --git a/views/tree-list.html b/views/tree-list.html index 70f032a..ce5b0d6 100644 --- a/views/tree-list.html +++ b/views/tree-list.html @@ -1,5 +1,5 @@ <table class="nice toggable ls" id="ls"> -% key_func = lambda (t, n, s): (t != 'tree', n.raw) +% key_func = lambda x: (x[0] != 'tree', x[1].raw) % for type, name, size in sorted(tree.ls(dirname.raw), key = key_func): <tr class="{{type}}"> % if type == "blob":