git » gofer » commit 6d43dff

Implement regexp-based redirects (redirect_re)

author Alberto Bertogli
2023-09-24 10:56:44 UTC
committer Alberto Bertogli
2023-09-24 11:26:36 UTC
parent a3b457b68f8fa9391d4b9439f17645ae0ac6dc2e

Implement regexp-based redirects (redirect_re)

This patch implements regexp-based redirects, using the redirect_re
option.

It enables users to provide a list of regexps, which will be processed
in order, and if they match, gofer returns a redirect to the given
replacement path or URL.

config/config.go +41 -7
config/config_test.go +35 -0
config/gofer.schema.cue +7 -0
config/gofer.yaml +14 -0
server/http.go +31 -0
test/01-fe.yaml +7 -1
test/test.sh +7 -0

diff --git a/config/config.go b/config/config.go
index 982a743..5447c9d 100644
--- a/config/config.go
+++ b/config/config.go
@@ -52,13 +52,14 @@ type AutoCerts struct {
 }
 
 type Route struct {
-	Dir      string   `yaml:",omitempty"`
-	File     string   `yaml:",omitempty"`
-	Proxy    *URL     `yaml:",omitempty"`
-	Redirect *URL     `yaml:",omitempty"`
-	CGI      []string `yaml:",omitempty"`
-	Status   int      `yaml:",omitempty"`
-	DirOpts  DirOpts  `yaml:",omitempty"`
+	Dir        string   `yaml:",omitempty"`
+	File       string   `yaml:",omitempty"`
+	Proxy      *URL     `yaml:",omitempty"`
+	Redirect   *URL     `yaml:",omitempty"`
+	RedirectRe []RePair `yaml:"redirect_re,omitempty"`
+	CGI        []string `yaml:",omitempty"`
+	Status     int      `yaml:",omitempty"`
+	DirOpts    DirOpts  `yaml:",omitempty"`
 }
 
 type DirOpts struct {
@@ -89,6 +90,12 @@ type RateLimit struct {
 	Rate48 Rate `yaml:",omitempty"`
 }
 
+type RePair struct {
+	From   *regexp.Regexp
+	To     string
+	Status int
+}
+
 func (c Config) String() string {
 	d, err := yaml.Marshal(&c)
 	if err != nil {
@@ -147,6 +154,7 @@ func (h HTTP) Check(c Config, addr string) []error {
 			r.File != "",
 			r.Proxy != nil,
 			r.Redirect != nil,
+			len(r.RedirectRe) > 0,
 			len(r.CGI) > 0,
 			r.Status > 0)
 		if nSet > 1 {
@@ -200,6 +208,32 @@ func LoadString(contents string) (*Config, error) {
 }
 
 // Wrapper to simplify regexp in configuration.
+type Regexp struct {
+	*regexp.Regexp
+}
+
+func (re *Regexp) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	var s string
+	if err := unmarshal(&s); err != nil {
+		return err
+	}
+
+	rx, err := regexp.Compile(s)
+	if err != nil {
+		return err
+	}
+
+	re.Regexp = rx
+	return nil
+}
+
+func (re Regexp) MarshalYAML() (interface{}, error) {
+	return re.String(), nil
+}
+
+// Wrapper to simplify regexp in configuration. This is specifically for use
+// on regexp paths, which are always anchored to the beginning and end of the
+// string for ease of use.
 type PathRegexp struct {
 	orig string
 	*regexp.Regexp
diff --git a/config/config_test.go b/config/config_test.go
index d2d6d50..8b564e1 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -241,6 +241,41 @@ func expectErrs(t *testing.T, want string, got []error) {
 	}
 }
 
+func TestRegexp(t *testing.T) {
+	re := Regexp{}
+	err := yaml.Unmarshal([]byte(`"ab.d"`), &re)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	expected := Regexp{
+		regexp.MustCompile("ab.d"),
+	}
+	opts := cmp.Comparer(func(x, y Regexp) bool {
+		return x.String() == y.String()
+	})
+	if diff := cmp.Diff(expected, re, opts); diff != "" {
+		t.Errorf("unexpected regexp result (-want +got):\n%s", diff)
+	}
+
+	// Error: invalid regexp.
+	err = yaml.Unmarshal([]byte(`"*"`), &re)
+	if !strings.Contains(err.Error(), "error parsing regexp:") {
+		t.Errorf("expected error parsing regexp, got %v", err)
+	}
+
+	// Test handling unmarshal error.
+	err = re.UnmarshalYAML(func(interface{}) error { return unmarshalErr })
+	if err != unmarshalErr {
+		t.Errorf("expected unmarshalErr, got %v", err)
+	}
+
+	// Test marshalling.
+	s, err := expected.MarshalYAML()
+	if !(s == "ab.d" && err == nil) {
+		t.Errorf(`expected "ab.d" / nil, got %q / %v`, s, err)
+	}
+}
+
 func TestPathRegexp(t *testing.T) {
 	re := PathRegexp{}
 	err := yaml.Unmarshal([]byte(`"ab.d"`), &re)
diff --git a/config/gofer.schema.cue b/config/gofer.schema.cue
index f4aed6a..30932f5 100644
--- a/config/gofer.schema.cue
+++ b/config/gofer.schema.cue
@@ -47,6 +47,7 @@ https?:
 		redirect?: string
 		cgi?: [string, ...string]
 		status?: int
+		redirect_re?: [#redirect_re, ...#redirect_re]
 
 		// TODO: Check that only one of the above is set.
 
@@ -72,6 +73,12 @@ https?:
 	...
 }
 
+#redirect_re: {
+	from: string
+	to: string
+	status?: int
+}
+
 raw?:
 	[string]: close({
 		certs?:  string
diff --git a/config/gofer.yaml b/config/gofer.yaml
index 0e5a10a..007266f 100644
--- a/config/gofer.yaml
+++ b/config/gofer.yaml
@@ -76,6 +76,20 @@ http:
         # Redirect to a different URL.
         #redirect: "https://wikipedia.org"
 
+        # Redirect with a regexp-based rewrite.
+        # Regexps are matched in order, and the first match is used.
+        # They are matched against the full path, and the replacement is a URL
+        # template that can refer to submatches as $1, $2, etc.
+        # By default, a 307 (temporary redirect) is used. You can change it by
+        # setting the "status" field.
+        # Anything that does not match will return a 404.
+        #redirect_re:
+        #  - from: "^/foo/(.*)"
+        #    to: "https://bar.com/$1"
+        #  - from: "^/baz/(.*)"
+        #    to: "https://qux.com/$1"
+        #    status: 301
+
         # Execute a CGI.
         #cgi: ["/usr/share/gitweb/gitweb.cgi"]
 
diff --git a/server/http.go b/server/http.go
index 2401984..35c0617 100644
--- a/server/http.go
+++ b/server/http.go
@@ -55,6 +55,10 @@ func httpServer(addr string, conf config.HTTP) (*http.Server, error) {
 		} else if r.Redirect != nil {
 			log.Infof("%s route %q -> redirect %s", srv.Addr, path, r.Redirect)
 			mux.Handle(path, makeRedirect(path, r.Redirect.URL()))
+		} else if len(r.RedirectRe) > 0 {
+			log.Infof("%s route %q -> redirect_re %q",
+				srv.Addr, path, r.RedirectRe)
+			mux.Handle(path, makeRedirectRe(r.RedirectRe))
 		} else if len(r.CGI) > 0 {
 			log.Infof("%s route %q -> cgi %q", srv.Addr, path, r.CGI)
 			mux.Handle(path, makeCGI(path, r.CGI))
@@ -294,6 +298,33 @@ func makeRedirect(path string, to url.URL) http.Handler {
 	})
 }
 
+func makeRedirectRe(rxs []config.RePair) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		tr, _ := trace.FromContext(r.Context())
+
+		for _, rx := range rxs {
+			if !rx.From.MatchString(r.URL.Path) {
+				continue
+			}
+
+			target := rx.From.ReplaceAllString(r.URL.Path, rx.To)
+			status := rx.Status
+			if status == 0 {
+				status = http.StatusTemporaryRedirect
+			}
+
+			tr.Printf("matched %q, %d redirect to %q",
+				rx.From, status, target)
+			http.Redirect(w, r, target, status)
+			return
+		}
+
+		// No regexp matched, return 404.
+		tr.Printf("no regexp matched")
+		http.NotFound(w, r)
+	})
+}
+
 func makeStatus(from string, status int) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		tr, _ := trace.FromContext(r.Context())
diff --git a/test/01-fe.yaml b/test/01-fe.yaml
index 399898b..e0cf146 100644
--- a/test/01-fe.yaml
+++ b/test/01-fe.yaml
@@ -24,7 +24,13 @@ _routes: &routes
     redirect: "https://google.com"
   "/rlme/":
     proxy: "http://localhost:8450/cgi/"
-
+  "/rere/":
+    redirect_re:
+      - from: "/rere/a/(..)/x"
+        to: "/dst/a/$1/z"
+      - from: "/rere/(.*)/zzz/(.*)"
+        to: "http://example.com/dst/z/$2/z/$1"
+        status: 308
 
 reqlog:
   "requests":
diff --git a/test/test.sh b/test/test.sh
index 92f9c75..c8309c0 100755
--- a/test/test.sh
+++ b/test/test.sh
@@ -141,6 +141,13 @@ do
 
 	exp $base/status/543 -status 543
 
+	# Regexp-based redirects.
+	exp $base/rere/x -status 404
+	exp $base/rere/a/bc/x -status 307 -redir /dst/a/bc/z
+	exp $base/rere/a/b/x -status 404
+	exp $base/rere/1/2/zzz/3/4 -status 308 \
+			-redir http://example.com/dst/z/3/4/z/1/2
+
 	# Test that the FE doesn't forward this - it exists on the BE, but the
 	# route doesn't end in a / so it shouldn't be forwarded.
 	exp $base/file/second -status 404