git » gofer » commit eb501ed

http proxy: Use Rewrite-based logic, improve forwarding headers

author Alberto Bertogli
2023-08-20 12:07:24 UTC
committer Alberto Bertogli
2023-09-03 13:10:13 UTC
parent 68f5b115c64f4735ea3cdd89f3fd3d2f29efec9d

http proxy: Use Rewrite-based logic, improve forwarding headers

This patch updates the HTTP Proxy to use Rewrite instead of Director,
which is safer and more standard compliant, and has some nice
side-effects like improving the forwarding headers and logic for CGI
targets.

It also allow us to remove a fair amount of wrapping code that is no
longer needed.

It has one external behaviour change: the outgoing Host header used to
be set to the incoming Host header, but is now set to the proxy url, and
the original one is now available in the X-Forwarded-Host and Forwarded
headers.

server/http.go +34 -58
test/01-fe.yaml +2 -0
test/test.sh +20 -2

diff --git a/server/http.go b/server/http.go
index 2a6a853..2401984 100644
--- a/server/http.go
+++ b/server/http.go
@@ -304,9 +304,9 @@ func makeStatus(from string, status int) http.Handler {
 
 func makeProxy(path string, to url.URL) http.Handler {
 	proxy := &httputil.ReverseProxy{}
-	proxy.Transport = &proxyTransport{}
+	proxy.ErrorHandler = proxyErrorHandler
 
-	// Director that strips "path" from the request path, so that if we have
+	// Rewrite that strips "path" from the request path, so that if we have
 	// this config:
 	//
 	//   /a/ -> http://dst/b
@@ -320,68 +320,44 @@ func makeProxy(path string, to url.URL) http.Handler {
 	// router, but to us is irrelevant.
 	path = stripDomain(path)
 
-	proxy.Director = func(req *http.Request) {
-		req.URL.Scheme = to.Scheme
-		req.URL.Host = to.Host
-		req.URL.Path = adjustPath(req.URL.Path, path, to.Path)
-
-		// If the user agent is not set, prevent a fall back to the default value.
-		if _, ok := req.Header["User-Agent"]; !ok {
-			req.Header.Set("User-Agent", "")
+	proxy.Rewrite = func(r *httputil.ProxyRequest) {
+		// This sets the Forwarded-For, X-Forwarded-Host, and
+		// X-Forwarded-Proto headers of the outbound request.
+		// The inbound request's X-Forwarded-For header is ignored.
+		r.SetXForwarded()
+
+		// Set the Forwarded header, which is a standardized version of the
+		// above, and not yet set by SetXForwarded.
+		r.Out.Header.Set("Forwarded",
+			fmt.Sprintf("for=%q;host=%q;proto=%s",
+				r.Out.Header.Get("X-Forwarded-For"),
+				r.Out.Header.Get("X-Forwarded-Host"),
+				r.Out.Header.Get("X-Forwarded-Proto")))
+
+		// Set the outbound URL based on the target.
+		// We use our own path adjustment since the default behaviour doesn't
+		// do what we want (see above).
+		// Note r.SetURL will merge the two query strings appropriately. It
+		// also may strip parts of the query if ';' is used as a separator, as
+		// per golang.org/issue/25192. This is fine for us.
+		r.SetURL(&to)
+		r.Out.URL.Path = adjustPath(r.In.URL.Path, path, to.Path)
+
+		// If the user agent is not set, prevent a fall back to the default
+		// value.
+		if _, ok := r.Out.Header["User-Agent"]; !ok {
+			r.Out.Header.Set("User-Agent", "")
 		}
 
-		// Strip X-Forwarded-For header, since we don't trust what the client
-		// sent, and the reverse proxy will append to.
-		req.Header.Del("X-Forwarded-For")
-
-		// Note we don't do this so we can have routes independent of virtual
-		// hosts. The downside is that if the destination scheme is HTTPS,
-		// this causes issues with the TLS SNI negotiation.
-		//req.Host = to.Host
+		tr, _ := trace.FromContext(r.In.Context())
+		tr.Printf("proxy to: %s %s %s",
+			r.Out.Proto, r.Out.Method, r.Out.URL.String())
 	}
 
-	return newReverseProxy(proxy)
-}
-
-type proxyTransport struct{}
-
-func (t *proxyTransport) RoundTrip(req *http.Request) (*http.Response, error) {
-	tr, _ := trace.FromContext(req.Context())
-	tr.Printf("proxy to: %s %s %s",
-		req.Proto, req.Method, req.URL.String())
-
-	response, err := http.DefaultTransport.RoundTrip(req)
-	if err == nil {
-		tr.Printf("backend response: %s, %d bytes",
-			response.Status, response.ContentLength)
-		if response.StatusCode >= 400 && response.StatusCode != 404 {
-			tr.SetError()
-		}
-	} else {
-		// errorHandler will be invoked when err != nil, avoid double error
-		// logging.
-	}
-
-	return response, err
-}
-
-type reverseProxy struct {
-	rp *httputil.ReverseProxy
-}
-
-func newReverseProxy(rp *httputil.ReverseProxy) http.Handler {
-	p := &reverseProxy{
-		rp: rp,
-	}
-	rp.ErrorHandler = p.errorHandler
-	return p
-}
-
-func (p *reverseProxy) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
-	p.rp.ServeHTTP(rw, req)
+	return proxy
 }
 
-func (p *reverseProxy) errorHandler(w http.ResponseWriter, r *http.Request, err error) {
+func proxyErrorHandler(w http.ResponseWriter, r *http.Request, err error) {
 	tr, _ := trace.FromContext(r.Context())
 	tr.Printf("backend error: %v", err)
 
diff --git a/test/01-fe.yaml b/test/01-fe.yaml
index 992688e..399898b 100644
--- a/test/01-fe.yaml
+++ b/test/01-fe.yaml
@@ -10,6 +10,8 @@ _routes: &routes
     proxy: "http://localhost:8450/file"
   "/cgi/":
     proxy: "http://localhost:8450/cgi/"
+  "/cgiwithq/":
+    proxy: "http://localhost:8450/cgi/?x=1&y=2"
   "/status/":
     proxy: "http://localhost:8450/status/"
   "/bad/unreacheable":
diff --git a/test/test.sh b/test/test.sh
index 8362ee2..92f9c75 100755
--- a/test/test.sh
+++ b/test/test.sh
@@ -124,7 +124,13 @@ do
 
 	exp $base/cgi/ -bodyre '"param 1" "param 2"'
 	exp $base/cgi/lala -bodyre '"param 1" "param 2"'
-	exp "$base/cgi/?cucu=melo&a;b" -bodyre 'QUERY_STRING=cucu=melo&a;b\n'
+	exp "$base/cgi/?cucu=melo&a=b" -bodyre 'QUERY_STRING=cucu=melo&a=b\n'
+	exp "$base/cgiwithq/?cucu=melo&a=b" \
+			-bodyre 'QUERY_STRING=x=1&y=2&cucu=melo&a=b\n'
+
+	# The proxy will strip parts of the query when using ";" by default, for
+	# safety reasons.
+	exp "$base/cgi/?cucu=melo&a;b" -bodyre 'QUERY_STRING=cucu=melo\n'
 
 	exp $base/gogo/ -status 307 -redir https://google.com/
 	exp $base/gogo/gaga -status 307 -redir https://google.com/gaga
@@ -184,6 +190,16 @@ do
 done
 
 
+echo "### Forwarding headers"
+exp http://localhost:8441/cgi/ -bodyre 'HTTP_X_FORWARDED_HOST=localhost:8441\n'
+exp http://localhost:8441/cgi/ -bodyre 'HTTP_X_FORWARDED_FOR='
+exp http://localhost:8441/cgi/ -bodyre 'HTTP_HOST=localhost:8450\n'
+exp http://localhost:8441/cgi/ -bodyre 'HTTP_X_FORWARDED_PROTO=http\n'
+exp http://localhost:8441/cgi/ \
+		-bodyre 'HTTP_FORWARDED=for=".+";host="localhost:8441";proto=http\n'
+exp https://localhost:8442/cgi/ -bodyre 'HTTP_X_FORWARDED_PROTO=https\n'
+
+
 echo "### Autocert"
 # Launch the test ACME server.
 acmesrv &
@@ -218,7 +234,7 @@ echo "### Request log"
 function logtest() {
 	exp http://localhost:8441/cgi/logtest
 	for f in .01-be.requests.log .01-fe.requests.log; do
-		EXPECT='localhost:8441 GET /cgi/logtest "" "Go-http-client/1.1" = 200'
+		EXPECT='localhost:84.. GET /cgi/logtest "" "Go-http-client/1.1" = 200'
 		if ! waitgrep -q "$EXPECT" $f; then
 			echo "$f: log entry not found"
 			exit 1
@@ -310,6 +326,8 @@ if [ $NSUCCESS -lt 1 ] || [ $NERR -lt 3 ]; then
 	exit 1
 fi
 
+# Snoop here because the next script will kill the test servers.
+snoop
 
 echo "### Checking examples from doc/examples.md"
 ./util/check-examples.sh