git » git-arr » commit 15d909a

Use a separate file for repositories' "last modified time"

author Alberto Bertogli
2025-07-18 18:26:07 UTC
committer Alberto Bertogli
2025-07-18 18:38:29 UTC
parent c27ba7ce3a18b7210e313dc7d6c0c7b970074f10

Use a separate file for repositories' "last modified time"

Today, when regenerating a single repositoriy (with --only), we spend
most of the time going through the other repositories and finding their
last modified time, just to put it in the top-level index.

To speed that up, we stop putting the timestamp inline in the top-level
index, and instead have a separate modified_ts.json file to keep the
timestamp for each repo.

Then, when using --only, we update that file with the data from the
repositories being updated only, instead of all of them.

The resulting speedup depends on the number of repositories, but on a 40
repo config, when using --only this results in a 1s -> 0.5s improvement.

git-arr +43 -2
static/git-arr.js +18 -8
views/index.html +2 -2

diff --git a/git-arr b/git-arr
index f8ea145..adb784f 100755
--- a/git-arr
+++ b/git-arr
@@ -7,6 +7,7 @@ import configparser
 import math
 import optparse
 import functools
+import json
 import os
 import time
 import re
@@ -231,6 +232,17 @@ def index():
     return dict(repos=repos)
 
 
+@utils.log_timing()
+@bottle.route("/modified_ts.json")
+def modified_ts(only=None):
+    ts = {}
+    for r in repos.values():
+        if only and r.name not in only:
+            continue
+        ts[r.name] = r.last_commit_timestamp()
+    return dict(ts)
+
+
 @utils.log_timing()
 @bottle.route("/r/<repo:repo>/")
 @bottle.view("summary")
@@ -439,8 +451,26 @@ def generate(output: str, only=None):
                     oid,
                 )
 
-    # Always generate the index, to keep the "last updated" time fresh.
-    write_to("index.html", index())
+    @utils.log_timing()
+    def update_modified_ts_json(last_commit_timestamp):
+        """Update /modified_ts.json with the given last timestamps."""
+        # Note that the original file may have more repositories than our new
+        # dict, if --only was used. The point of updating it instead of doing
+        # a full regeneration is that it is much faster when --only is used.
+        path = output + "/modified_ts.json"
+        print(path)
+        if only and os.path.exists(path):
+            ts = json.load(open(path))
+        else:
+            ts = {}
+        ts.update(last_commit_timestamp)
+        s = json.dumps(ts, indent=4, sort_keys=True)
+        open(path, "w").write(s)
+
+    # Don't generate the top level index if we are generating a single
+    # repository.
+    if not only:
+        write_to("index.html", index())
 
     # We can't call static() because it relies on HTTP headers.
     read_f = lambda f: open(f).read()
@@ -467,6 +497,12 @@ def generate(output: str, only=None):
     if only:
         rs = [r for r in rs if r.name in only]
 
+    # We will keep track of the last commit timestamp for each repository,
+    # so we can write it to the top level index.
+    # This is an optimization, because computing the last commit timestamp
+    # for a repository when we are not generating it is expensive.
+    last_commit_timestamp = {}
+
     for r in rs:
         write_to("r/%s/index.html" % r.name, summary(r))
 
@@ -478,6 +514,7 @@ def generate(output: str, only=None):
         # written, and skip writing them again.
         commits_written = set()
 
+        last_commit_timestamp[r.name] = -1
         for bn in r.branch_names():
             commit_count = 0
             commit_ids = r.commit_ids(
@@ -503,6 +540,8 @@ def generate(output: str, only=None):
             # date, and then compare against it to decide whether or not to
             # write.
             branch_mtime = r.commit(bn).committer_date.epoch
+            if branch_mtime > last_commit_timestamp[r.name]:
+                last_commit_timestamp[r.name] = branch_mtime
 
             nr_pages = int(
                 math.ceil(float(commit_count) / r.info.commits_per_page)
@@ -541,6 +580,8 @@ def generate(output: str, only=None):
                 else:
                     raise
 
+    update_modified_ts_json(last_commit_timestamp)
+
 
 def main():
     parser = optparse.OptionParser("usage: %prog [options] serve|generate")
diff --git a/static/git-arr.js b/static/git-arr.js
index d39ca72..0533dc0 100644
--- a/static/git-arr.js
+++ b/static/git-arr.js
@@ -39,14 +39,24 @@ function how_long_ago(timestamp) {
     return "about now";
 }
 
-/* Go through the document and replace the contents of the span.age elements
- * with a human-friendly variant, and then show them. */
-function replace_timestamps() {
-    var elements = document.getElementsByClassName("age");
-    for (var i = 0; i < elements.length; i++) {
-        var e = elements[i];
-
-        var timestamp = e.innerHTML;
+/* Load the timestamps from the modified_ts.json file, and then
+ * insert the human-friendly representation into the corresponding span.age
+ * elements. */
+async function load_timestamps() {
+    const response = await fetch("modified_ts.json");
+    if (!response.ok) {
+        throw new Error(`fetch error, status: ${response.status}`);
+    }
+
+    const json = await response.json();
+    console.log("Loaded timestamps:", json);
+
+    for (const [repo_name, timestamp] of Object.entries(json)) {
+        const e = document.getElementById("age:"+repo_name);
+        if (!e) {
+            console.warn(`No element found for repo: ${repo_name}`);
+            continue;
+        }
         e.innerHTML = how_long_ago(timestamp);
         e.style.display = "inline";
 
diff --git a/views/index.html b/views/index.html
index 91517db..79bb486 100644
--- a/views/index.html
+++ b/views/index.html
@@ -8,7 +8,7 @@
 <script async src="static/git-arr.js"></script>
 </head>
 
-<body class="index" onload="replace_timestamps()">
+<body class="index" onload="load_timestamps()">
 <h1>git</h1>
 
 <table class="nice projects">
@@ -22,7 +22,7 @@
         <td class="name"><a href="r/{{repo.name}}/">{{repo.name}}</a></td>
         <td class="desc"><a href="r/{{repo.name}}/" title="{{repo.info.desc}}">
             {{repo.info.desc}}</a></td>
-        <td><span class="age">{{repo.last_commit_timestamp()}}</span></td>
+        <td><span id="age:{{repo.name}}" class="age"></span></td>
     </tr>
     %end
 </table>