author | Alberto Bertogli
<albertito@blitiri.com.ar> 2023-04-01 17:26:36 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2023-04-02 08:57:45 UTC |
.gitignore | +3 | -0 |
algo_test.go | +96 | -0 |
db.go | +127 | -0 |
go.mod | +10 | -0 |
go.sum | +6 | -0 |
summer.go | +281 | -0 |
test/access.t | +20 | -0 |
test/basic.t | +85 | -0 |
test/cover.sh | +24 | -0 |
test/help.t | +96 | -0 |
test/sqlite.t | +40 | -0 |
test/test.sh | +9 | -0 |
ui.go | +55 | -0 |
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69efd0e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.* +!.gitignore +summer diff --git a/algo_test.go b/algo_test.go new file mode 100644 index 0000000..082abee --- /dev/null +++ b/algo_test.go @@ -0,0 +1,96 @@ +package main + +import ( + "crypto/rand" + "hash/adler32" + "hash/crc32" + "hash/crc64" + "hash/fnv" + "testing" +) + +const size = 16 * 1024 + +var ( + crc32k = crc32.MakeTable(crc32.Koopman) + + crc64iso = crc64.MakeTable(crc64.ISO) + crc64ecma = crc64.MakeTable(crc64.ECMA) +) + +func randomBuf(b *testing.B) []byte { + buf := make([]byte, size) + n, err := rand.Read(buf) + if n != size || err != nil { + b.Fatalf("failed to generate random data: %v", err) + } + return buf +} + +func BenchmarkCRC32C(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + crc32.Checksum(buf, crc32c) + } +} + +func BenchmarkCRC32K(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + crc32.Checksum(buf, crc32k) + } +} + +func BenchmarkCRC32IEEE(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + crc32.ChecksumIEEE(buf) + } +} + +func BenchmarkAdler32(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + adler32.Checksum(buf) + } +} + +func BenchmarkCRC64ISO(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + crc64.Checksum(buf, crc64iso) + } +} + +func BenchmarkCRC64ECMA(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + crc64.Checksum(buf, crc64ecma) + } +} + +func BenchmarkFNV32(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + h := fnv.New32() + h.Write(buf) + h.Sum(nil) + } +} + +func BenchmarkFNV64(b *testing.B) { + buf := randomBuf(b) + b.ResetTimer() + for i := 0; i < b.N; i++ { + h := fnv.New64a() + h.Write(buf) + h.Sum(nil) + } +} diff --git a/db.go b/db.go new file mode 100644 index 0000000..c26ea07 --- /dev/null +++ b/db.go @@ -0,0 +1,127 @@ +package main + +import ( + "bytes" + "database/sql" + "encoding/binary" + "os" + "path/filepath" + + "github.com/pkg/xattr" + + _ "github.com/mattn/go-sqlite3" +) + +type DB interface { + Has(f *os.File) (bool, error) + Read(f *os.File) (ChecksumV1, error) + Write(f *os.File, cs ChecksumV1) error + Close() error +} + +type XattrDB struct{} + +func (_ XattrDB) Has(f *os.File) (bool, error) { + attrs, err := xattr.FList(f) + for _, a := range attrs { + if a == "user.summer-v1" { + return true, err + } + } + return false, err +} + +func (_ XattrDB) Read(f *os.File) (ChecksumV1, error) { + val, err := xattr.FGet(f, "user.summer-v1") + if err != nil { + return ChecksumV1{}, err + } + + buf := bytes.NewReader(val) + c := ChecksumV1{} + err = binary.Read(buf, binary.LittleEndian, &c) + return c, err +} + +func (_ XattrDB) Write(f *os.File, cs ChecksumV1) error { + buf := new(bytes.Buffer) + err := binary.Write(buf, binary.LittleEndian, cs) + if err != nil { + // We control the struct, it should never panic. + panic(err) + } + return xattr.FSet(f, "user.summer-v1", buf.Bytes()) +} + +func (_ XattrDB) Close() error { + return nil +} + +type SqliteDB struct { + root string + db *sql.DB +} + +const createTableV1 = ` + create table if not exists checksums ( + path string primary key, + crc32c integer, + modtimeusec integer + ); +` + +func OpenSqliteDB(dbPath, root string) (*SqliteDB, error) { + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + return nil, err + } + + if _, err := db.Exec(createTableV1); err != nil { + return nil, err + } + + return &SqliteDB{root, db}, nil +} + +func (s *SqliteDB) Has(f *os.File) (bool, error) { + path, err := filepath.Rel(s.root, f.Name()) + if err != nil { + return false, err + } + + q := 0 + err = s.db.QueryRow( + "select count(1) from checksums where path = ?", + path).Scan(&q) + return q == 1, err +} + +func (s *SqliteDB) Read(f *os.File) (ChecksumV1, error) { + cs := ChecksumV1{} + path, err := filepath.Rel(s.root, f.Name()) + if err != nil { + return cs, err + } + + err = s.db.QueryRow( + "select crc32c, modtimeusec from checksums where path = ?", + path).Scan(&cs.CRC32C, &cs.ModTimeUsec) + return cs, err +} + +func (s *SqliteDB) Write(f *os.File, cs ChecksumV1) error { + path, err := filepath.Rel(s.root, f.Name()) + if err != nil { + return err + } + + _, err = s.db.Exec( + "insert or replace into checksums "+ + "(path, crc32c, modtimeusec) values(?, ?, ?)", + path, cs.CRC32C, cs.ModTimeUsec) + return err +} + +func (s *SqliteDB) Close() error { + return s.db.Close() +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b097087 --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module blitiri.com.ar/go/summer + +go 1.19 + +require ( + github.com/mattn/go-sqlite3 v1.14.16 + github.com/pkg/xattr v0.4.9 +) + +require golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..2e4ed8c --- /dev/null +++ b/go.sum @@ -0,0 +1,6 @@ +github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y= +github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= +github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= +golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f h1:8w7RhxzTVgUzw/AH/9mUV5q0vMgy40SQRursCcfmkCw= +golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/summer.go b/summer.go new file mode 100644 index 0000000..bcbaf39 --- /dev/null +++ b/summer.go @@ -0,0 +1,281 @@ +package main + +import ( + "flag" + "fmt" + "hash/crc32" + "io" + "io/fs" + "os" + "path/filepath" +) + +const usage = `# summer 🌞 🏖 + +Utility to detect accidental data corruption (e.g. bitrot, storage media +problems). Not intended to detect malicious modification. + +Checksums are written to/read from each files' extended attributes by default, +or to a separate database file (with the -db flag). + +Usage: + summer update <dir> + Verify checksums in the given directory, and update them for new or + changed files. + summer verify <dir> + Verify checksums in the given directory. + summer generate <dir> + Write checksums for the given directory. Pre-existing checksums are + overwritten without verification. + +Flags: +` + +var ( + dbPath = flag.String("db", "", "database to read from/write to") +) + +func Usage() { + fmt.Fprintf(flag.CommandLine.Output(), usage) + flag.PrintDefaults() +} + +func main() { + var err error + + flag.Usage = Usage + flag.Parse() + + op := flag.Arg(0) + root := flag.Arg(1) + + if root == "" { + Usage() + os.Exit(1) + } + + var db DB = XattrDB{} + if *dbPath != "" { + db, err = OpenSqliteDB(*dbPath, root) + if err != nil { + Fatalf("%q: %v", *dbPath, err) + } + } + defer db.Close() + + switch op { + case "generate": + err = generate(db, root) + case "verify": + err = verify(db, root) + case "update": + err = update(db, root) + default: + Fatalf("unknown command %q", op) + } + + if err != nil { + Fatalf("%v", err) + } +} + +var crc32c = crc32.MakeTable(crc32.Castagnoli) + +type ChecksumV1 struct { + // CRC32C of the file contents. + CRC32C uint32 + + // Modification time of the file when the checksum was computed. + // In Unix microseconds. + ModTimeUsec int64 +} + +func isFileRelevant(path string, d fs.DirEntry, err error) bool { + if err != nil { + return false + } + if d.IsDir() { + return false + } + return d.Type().IsRegular() +} + +func openAndInfo(path string, d fs.DirEntry) (*os.File, fs.FileInfo, error) { + info, err := d.Info() + if err != nil { + return nil, nil, err + } + fd, err := os.Open(path) + if err != nil { + return nil, nil, err + } + + return fd, info, nil +} + +func generate(db DB, root string) error { + var total int64 + fn := func(path string, d fs.DirEntry, err error) error { + if !isFileRelevant(path, d, err) { + return err + } + + fd, info, err := openAndInfo(path, d) + if err != nil { + return err + } + defer fd.Close() + + h := crc32.New(crc32c) + _, err = io.Copy(h, fd) + if err != nil { + return err + } + + csum := ChecksumV1{ + CRC32C: h.Sum32(), + ModTimeUsec: info.ModTime().UnixMicro(), + } + + err = db.Write(fd, csum) + if err != nil { + return err + } + + total++ + return nil + } + + err := filepath.WalkDir(root, fn) + PrintWritten(total) + return err +} + +func verify(db DB, root string) error { + var missing, modified, corrupted, matched int64 + fn := func(path string, d fs.DirEntry, err error) error { + if !isFileRelevant(path, d, err) { + return err + } + + fd, info, err := openAndInfo(path, d) + if err != nil { + return err + } + defer fd.Close() + + hasAttr, err := db.Has(fd) + if err != nil { + return err + } + if !hasAttr { + PrintMissing(path) + missing++ + return nil + } + + csumFromFile, err := db.Read(fd) + if err != nil { + return err + } + + h := crc32.New(crc32c) + _, err = io.Copy(h, fd) + if err != nil { + return err + } + + csumComputed := ChecksumV1{ + CRC32C: h.Sum32(), + ModTimeUsec: info.ModTime().UnixMicro(), + } + + if csumFromFile.ModTimeUsec != csumComputed.ModTimeUsec { + PrintModified(path) + modified++ + } else if csumFromFile.CRC32C != csumComputed.CRC32C { + PrintCorrupted(path, csumFromFile, csumComputed) + corrupted++ + } else { + PrintMatched(path) + matched++ + } + + return nil + } + + err := filepath.WalkDir(root, fn) + PrintSummary(matched, modified, missing, corrupted) + + if corrupted > 0 && err == nil { + err = fmt.Errorf("detected %d corrupted files", corrupted) + } + return err +} + +func update(db DB, root string) error { + var missing, modified, corrupted, matched int64 + fn := func(path string, d fs.DirEntry, err error) error { + if !isFileRelevant(path, d, err) { + return err + } + + fd, info, err := openAndInfo(path, d) + if err != nil { + return err + } + defer fd.Close() + + // Compute checksum from the current state. + h := crc32.New(crc32c) + _, err = io.Copy(h, fd) + if err != nil { + return err + } + + csumComputed := ChecksumV1{ + CRC32C: h.Sum32(), + ModTimeUsec: info.ModTime().UnixMicro(), + } + + // Read the saved checksum (if any). + hasAttr, err := db.Has(fd) + if err != nil { + return err + } + if !hasAttr { + // Attribute is missing. Expected for newly created files. + PrintMissing(path) + missing++ + return db.Write(fd, csumComputed) + } + + csumFromFile, err := db.Read(fd) + if err != nil { + return err + } + + if csumFromFile.ModTimeUsec != csumComputed.ModTimeUsec { + // File modified. Expected for updated files. + PrintModified(path) + modified++ + return db.Write(fd, csumComputed) + } else if csumFromFile.CRC32C != csumComputed.CRC32C { + PrintCorrupted(path, csumFromFile, csumComputed) + corrupted++ + } else { + PrintMatched(path) + matched++ + } + + return nil + } + + err := filepath.WalkDir(root, fn) + PrintSummary(matched, modified, missing, corrupted) + + if corrupted > 0 && err == nil { + err = fmt.Errorf("detected %d corrupted files", corrupted) + } + return err +} diff --git a/test/access.t b/test/access.t new file mode 100644 index 0000000..c7d28ef --- /dev/null +++ b/test/access.t @@ -0,0 +1,20 @@ + +Tests for how to handle file access issues. +Note we put our test paths in "root/" so the database doesn't +interfere. + + $ alias summer="$TESTDIR/../summer" + $ mkdir root + $ touch root/empty + $ echo marola > root/hola + + $ summer -db=db.sqlite3 generate root/ + 2 checksums written + + $ summer -db=db.sqlite3 verify root/ + 2 matched, 0 modified, 0 new, 0 corrupted + $ chmod 0000 root/empty + $ summer -db=db.sqlite3 verify root/ + 0 matched, 0 modified, 0 new, 0 corrupted + open root/empty: permission denied + [1] diff --git a/test/basic.t b/test/basic.t new file mode 100644 index 0000000..5e3d48c --- /dev/null +++ b/test/basic.t @@ -0,0 +1,85 @@ +Use the prebuilt summer binary. + + $ alias summer="$TESTDIR/../summer" + +Generate test data. + + $ touch empty + $ echo marola > hola + +Generate and verify. + + $ summer generate . + 2 checksums written + $ summer verify . + 2 matched, 0 modified, 0 new, 0 corrupted + +Check handling of new and updated files. + + $ echo trova > nueva + $ touch empty + $ summer verify . + 1 matched, 1 modified, 1 new, 0 corrupted + $ summer update . + 1 matched, 1 modified, 1 new, 0 corrupted + $ summer verify . + 3 matched, 0 modified, 0 new, 0 corrupted + +Corrupt a file by changing its contents without changing the mtime. + + $ OLD_MTIME=`stat -c "%y" hola` + $ echo sospechoso >> hola + $ summer verify . + 2 matched, 1 modified, 0 new, 0 corrupted + $ touch --date="$OLD_MTIME" hola + + $ summer verify . + "hola": FILE CORRUPTED - expected:239059f6, got:916db13f + 2 matched, 0 modified, 0 new, 1 corrupted + detected 1 corrupted files + [1] + +Check that "update" also detects the corruption, and doesn't just step over +it. + + $ summer update . + "hola": FILE CORRUPTED - expected:239059f6, got:916db13f + 2 matched, 0 modified, 0 new, 1 corrupted + detected 1 corrupted files + [1] + +But "generate" does override it. + + $ summer generate . + 3 checksums written + $ summer verify . + 3 matched, 0 modified, 0 new, 0 corrupted + +Check verbose and quiet. + + $ summer -v verify . + "empty": match + "hola": match + "nueva": match + 3 matched, 0 modified, 0 new, 0 corrupted + $ summer -q verify . + $ summer -q generate . + $ summer -q update . + $ summer -q verify . + +Check that symlinks are ignored. + + $ ln -s hola thisisasymlink + $ summer -v verify . + "empty": match + "hola": match + "nueva": match + 3 matched, 0 modified, 0 new, 0 corrupted + +Check that the root path doesn't confuse us. + + $ summer -v verify $PWD + "/.*/empty": match (re) + "/.*/hola": match (re) + "/.*/nueva": match (re) + 3 matched, 0 modified, 0 new, 0 corrupted diff --git a/test/cover.sh b/test/cover.sh new file mode 100755 index 0000000..8f7154d --- /dev/null +++ b/test/cover.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e +cd $(realpath "$(dirname "$0")" ) + +export GOCOVERDIR="$PWD/.cover/" +rm -rf "${GOCOVERDIR?}" +mkdir -p "${GOCOVERDIR?}" + +export BUILDARGS="-cover" + +# Coverage tests require Go >= 1.20. +go version + +./test.sh + +go tool covdata percent -i="${GOCOVERDIR?}" +go tool covdata textfmt \ + -i="${GOCOVERDIR?}" -o="${GOCOVERDIR?}/cover.txt" +go tool cover \ + -html="${GOCOVERDIR?}/cover.txt" -o="${GOCOVERDIR?}/summer.html" + +echo "file://${GOCOVERDIR?}/summer.html" + diff --git a/test/help.t b/test/help.t new file mode 100644 index 0000000..fbbb18f --- /dev/null +++ b/test/help.t @@ -0,0 +1,96 @@ + + $ alias summer="$TESTDIR/../summer" + + +No arguments. + + $ summer + # summer 🌞 🏖 + + Utility to detect accidental data corruption (e.g. bitrot, storage media + problems). Not intended to detect malicious modification. + + Checksums are written to/read from each files' extended attributes by default, + or to a separate database file (with the -db flag). + + Usage: + summer update <dir> + Verify checksums in the given directory, and update them for new or + changed files. + summer verify <dir> + Verify checksums in the given directory. + summer generate <dir> + Write checksums for the given directory. Pre-existing checksums are + overwritten without verification. + + Flags: + -db string + \tdatabase to read from/write to (esc) + -q\tquiet mode (esc) + -v\tverbose mode (list each file) (esc) + [1] + + +Too few arguments. + + $ summer lskfmsl + # summer 🌞 🏖 + + Utility to detect accidental data corruption (e.g. bitrot, storage media + problems). Not intended to detect malicious modification. + + Checksums are written to/read from each files' extended attributes by default, + or to a separate database file (with the -db flag). + + Usage: + summer update <dir> + Verify checksums in the given directory, and update them for new or + changed files. + summer verify <dir> + Verify checksums in the given directory. + summer generate <dir> + Write checksums for the given directory. Pre-existing checksums are + overwritten without verification. + + Flags: + -db string + \tdatabase to read from/write to (esc) + -q\tquiet mode (esc) + -v\tverbose mode (list each file) (esc) + [1] + + +No valid path (the argument is given, but it is empty). + + $ summer weifmws "" + # summer 🌞 🏖 + + Utility to detect accidental data corruption (e.g. bitrot, storage media + problems). Not intended to detect malicious modification. + + Checksums are written to/read from each files' extended attributes by default, + or to a separate database file (with the -db flag). + + Usage: + summer update <dir> + Verify checksums in the given directory, and update them for new or + changed files. + summer verify <dir> + Verify checksums in the given directory. + summer generate <dir> + Write checksums for the given directory. Pre-existing checksums are + overwritten without verification. + + Flags: + -db string + \tdatabase to read from/write to (esc) + -q\tquiet mode (esc) + -v\tverbose mode (list each file) (esc) + [1] + + +Unknown command. + + $ summer badcommand . + unknown command "badcommand" + [1] diff --git a/test/sqlite.t b/test/sqlite.t new file mode 100644 index 0000000..66fae16 --- /dev/null +++ b/test/sqlite.t @@ -0,0 +1,40 @@ + +Basic tests but storing in a sqlite3 database. +This is enough to exercise the backend. + + $ alias summer="$TESTDIR/../summer" + $ touch empty + $ echo marola > hola + + $ summer -db=db.sqlite3 generate . + 3 checksums written + $ summer -db=db.sqlite3 verify . + 2 matched, 1 modified, 0 new, 0 corrupted + $ summer -db=db.sqlite3 update . + 2 matched, 1 modified, 0 new, 0 corrupted + +Check that the root path doesn't confuse us. + + $ summer -db=db.sqlite3 -v verify $PWD + ".*/db.sqlite3": file modified \(not corrupted\), updating (re) + ".*/empty": match (re) + ".*/hola": match (re) + 2 matched, 1 modified, 0 new, 0 corrupted + +Force a write error to check it is appropriately handled. + + $ summer "-db=file:db.sqlite3?mode=ro" generate . + . checksums written (re) + attempt to write a readonly database + [1] + +Check errors when we cannot open the database file. + + $ summer -db=/proc/doesnotexist verify . + "/proc/doesnotexist": unable to open database file: no such file or directory + [1] + + $ summer -db=/dev/null verify . + "/dev/null": attempt to write a readonly database + [1] + diff --git a/test/test.sh b/test/test.sh new file mode 100755 index 0000000..c52bf39 --- /dev/null +++ b/test/test.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e +cd $(realpath "$(dirname "$0")" ) + +# shellcheck disable=SC2086 +( cd ..; go build $BUILDARGS -o summer . ) + +cram3 ./*.t diff --git a/ui.go b/ui.go new file mode 100644 index 0000000..32b0b56 --- /dev/null +++ b/ui.go @@ -0,0 +1,55 @@ +package main + +import ( + "flag" + "fmt" + "os" +) + +var ( + verbose = flag.Bool("v", false, "verbose mode (list each file)") + quiet = flag.Bool("q", false, "quiet mode") +) + +func Verbosef(format string, args ...interface{}) { + if *verbose { + fmt.Printf(format+"\n", args...) + } +} + +func Printf(format string, args ...interface{}) { + if !*quiet { + fmt.Printf(format+"\n", args...) + } +} + +func Fatalf(format string, args ...interface{}) { + fmt.Printf(format+"\n", args...) + os.Exit(1) +} + +func PrintWritten(written int64) { + Printf("%d checksums written", written) +} + +func PrintSummary(matched, modified, missing, corrupted int64) { + Printf("%d matched, %d modified, %d new, %d corrupted", + matched, modified, missing, corrupted) +} + +func PrintCorrupted(path string, expected, got ChecksumV1) { + Printf("%q: FILE CORRUPTED - expected:%x, got:%x", + path, expected.CRC32C, got.CRC32C) +} + +func PrintMissing(path string) { + Verbosef("%q: missing checksum attribute, adding it", path) +} + +func PrintModified(path string) { + Verbosef("%q: file modified (not corrupted), updating", path) +} + +func PrintMatched(path string) { + Verbosef("%q: match", path) +}