git » git-arr » commit 62da3eb

Use heuristics to decide what to colorize

author Alberto Bertogli
2012-11-14 00:48:46 UTC
committer Alberto Bertogli
2012-11-18 14:55:22 UTC
parent ba3b2132f5ffa852579734a350104a719f654b64

Use heuristics to decide what to colorize

In practise pygments seems to have a very hard time processing large files and
files with long lines, so try to avoid using it in those cases.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>

git-arr +1 -1
utils.py +22 -2
views/blob.html +1 -1
views/commit.html +1 -1

diff --git a/git-arr b/git-arr
index 8688c12..3a44427 100755
--- a/git-arr
+++ b/git-arr
@@ -158,7 +158,7 @@ def with_utils(f):
     """
     utilities = {
         'shorten': utils.shorten,
-        'has_colorizer': utils.has_colorizer,
+        'can_colorize': utils.can_colorize,
         'colorize_diff': utils.colorize_diff,
         'colorize_blob': utils.colorize_blob,
         'abort': bottle.abort,
diff --git a/utils.py b/utils.py
index 3bd281f..cee4bb7 100644
--- a/utils.py
+++ b/utils.py
@@ -18,8 +18,28 @@ def shorten(s, width = 60):
         return s
     return s[:57] + "..."
 
-def has_colorizer():
-    return pygments is not None
+def can_colorize(s):
+    """True if we can colorize the string, False otherwise."""
+    if pygments is None:
+        return False
+
+    # Pygments can take a huge amount of time with long files, or with very
+    # long lines; these are heuristics to try to avoid those situations.
+    if len(s) > (512 * 1024):
+        return False
+
+    # If any of the first 5 lines is over 300 characters long, don't colorize.
+    start = 0
+    for i in range(5):
+        pos = s.find('\n', start)
+        if pos == -1:
+            break
+
+        if pos - start > 300:
+            return False
+        start = pos + 1
+
+    return True
 
 def colorize_diff(s):
     lexer = lexers.DiffLexer(encoding = 'utf-8')
diff --git a/views/blob.html b/views/blob.html
index 4d5f7d0..283bbcd 100644
--- a/views/blob.html
+++ b/views/blob.html
@@ -36,7 +36,7 @@
     <a href="">{{!fname.html}}</a>
 </h3>
 
-% if has_colorizer():
+% if can_colorize(blob):
 {{!colorize_blob(fname.unicode, blob)}}
 % else:
 <pre class="blob-body">
diff --git a/views/commit.html b/views/commit.html
index 9a9e99d..bc3119c 100644
--- a/views/commit.html
+++ b/views/commit.html
@@ -55,7 +55,7 @@
 
 <hr/>
 
-% if has_colorizer():
+% if can_colorize(c.diff.body):
 {{!colorize_diff(c.diff.body)}}
 % else:
 <pre class="diff-body">