git » chasquid » commit 112e492

Normalize domains

author Alberto Bertogli
2016-10-09 15:05:25 UTC
committer Alberto Bertogli
2016-10-09 23:51:05 UTC
parent ad25706d72bb04432c04da235161ac30a21d202c

Normalize domains

We should ignore the domains' case, and treat them uniformly, specially when it
comes to local domains.

This patch extends the existing normalization (IDNA, keeping domains as
UTF8 internally) to include case conversion and NFC form for
consistency.

chasquid.go +8 -5
internal/auth/auth.go +3 -4
internal/envelope/envelope.go +0 -26
internal/normalize/normalize.go +46 -3
internal/normalize/normalize_test.go +32 -0
test/t-07-smtputf8/content +2 -2
test/t-07-smtputf8/run.sh +11 -1

diff --git a/chasquid.go b/chasquid.go
index e7b39ad..a747bc6 100644
--- a/chasquid.go
+++ b/chasquid.go
@@ -114,9 +114,12 @@ func main() {
 	// Load domains from "domains/".
 	glog.Infof("Domain config paths:")
 	for _, info := range mustReadDir("domains/") {
-		name := info.Name()
-		dir := filepath.Join("domains", name)
-		loadDomain(name, dir, s)
+		domain, err := normalize.Domain(info.Name())
+		if err != nil {
+			glog.Fatalf("Invalid name %+q: %v", info.Name(), err)
+		}
+		dir := filepath.Join("domains", info.Name())
+		loadDomain(domain, dir, s)
 	}
 
 	// Always include localhost as local domain.
@@ -665,7 +668,7 @@ func (c *Conn) MAIL(params string) (code int, msg string) {
 				"SPF check failed: %v", c.spfError)
 		}
 
-		addr, err = envelope.IDNAToUnicode(addr)
+		addr, err = normalize.DomainToUnicode(addr)
 		if err != nil {
 			return 501, "malformed address (IDNA conversion failed)"
 		}
@@ -724,7 +727,7 @@ func (c *Conn) RCPT(params string) (code int, msg string) {
 		return 501, "malformed address"
 	}
 
-	addr, err := envelope.IDNAToUnicode(e.Address)
+	addr, err := normalize.DomainToUnicode(e.Address)
 	if err != nil {
 		return 501, "malformed address (IDNA conversion failed)"
 	}
diff --git a/internal/auth/auth.go b/internal/auth/auth.go
index 2c7caec..0beaf82 100644
--- a/internal/auth/auth.go
+++ b/internal/auth/auth.go
@@ -8,8 +8,6 @@ import (
 	"strings"
 	"time"
 
-	"golang.org/x/net/idna"
-
 	"blitiri.com.ar/go/chasquid/internal/normalize"
 	"blitiri.com.ar/go/chasquid/internal/userdb"
 )
@@ -77,12 +75,13 @@ func DecodeResponse(response string) (user, domain, passwd string, err error) {
 
 	// Normalize the user and domain. This is so users can write the username
 	// in their own style and still can log in.  For the domain, we use IDNA
-	// to turn it to utf8 which is what we use internally.
+	// and relevant transformations to turn it to utf8 which is what we use
+	// internally.
 	user, err = normalize.User(user)
 	if err != nil {
 		return
 	}
-	domain, err = idna.ToUnicode(domain)
+	domain, err = normalize.Domain(domain)
 	if err != nil {
 		return
 	}
diff --git a/internal/envelope/envelope.go b/internal/envelope/envelope.go
index c05ec6e..d0eb1d2 100644
--- a/internal/envelope/envelope.go
+++ b/internal/envelope/envelope.go
@@ -6,8 +6,6 @@ import (
 	"fmt"
 	"strings"
 
-	"golang.org/x/net/idna"
-
 	"blitiri.com.ar/go/chasquid/internal/set"
 )
 
@@ -50,27 +48,3 @@ func AddHeader(data []byte, k, v string) []byte {
 	header := []byte(fmt.Sprintf("%s: %s\n", k, v))
 	return append(header, data...)
 }
-
-// Take an address with a potentially unicode domain, and convert it to ASCII
-// as per IDNA.
-// The user part is unchanged.
-func IDNAToASCII(addr string) (string, error) {
-	if addr == "<>" {
-		return addr, nil
-	}
-	user, domain := Split(addr)
-	domain, err := idna.ToASCII(domain)
-	return user + "@" + domain, err
-}
-
-// Take an address with an ASCII domain, and convert it to Unicode as per
-// IDNA.
-// The user part is unchanged.
-func IDNAToUnicode(addr string) (string, error) {
-	if addr == "<>" {
-		return addr, nil
-	}
-	user, domain := Split(addr)
-	domain, err := idna.ToUnicode(domain)
-	return user + "@" + domain, err
-}
diff --git a/internal/normalize/normalize.go b/internal/normalize/normalize.go
index 1dce214..569dc76 100644
--- a/internal/normalize/normalize.go
+++ b/internal/normalize/normalize.go
@@ -1,12 +1,17 @@
-// Package normalize contains functions to normalize usernames and addresses.
+// Package normalize contains functions to normalize usernames, domains and
+// addresses.
 package normalize
 
 import (
+	"strings"
+
 	"blitiri.com.ar/go/chasquid/internal/envelope"
+	"golang.org/x/net/idna"
 	"golang.org/x/text/secure/precis"
+	"golang.org/x/text/unicode/norm"
 )
 
-// User normalices an username using PRECIS.
+// User normalizes an username using PRECIS.
 // On error, it will also return the original username to simplify callers.
 func User(user string) (string, error) {
 	norm, err := precis.UsernameCaseMapped.String(user)
@@ -17,7 +22,27 @@ func User(user string) (string, error) {
 	return norm, nil
 }
 
-// Name normalices an email address using PRECIS.
+// Domain normalizes a DNS domain into a cleaned UTF-8 form.
+// On error, it will also return the original domain to simplify callers.
+func Domain(domain string) (string, error) {
+	// For now, we just convert them to lower case and make sure it's in NFC
+	// form for consistency.
+	// There are other possible transformations (like nameprep) but for our
+	// purposes these should be enough.
+	// https://tools.ietf.org/html/rfc5891#section-5.2
+	// https://blog.golang.org/normalization
+	d, err := idna.ToUnicode(domain)
+	if err != nil {
+		return domain, err
+	}
+
+	d = norm.NFC.String(d)
+	d = strings.ToLower(d)
+	return d, nil
+}
+
+// Name normalizes an email address, applying User and Domain to its
+// respective components.
 // On error, it will also return the original address to simplify callers.
 func Addr(addr string) (string, error) {
 	user, domain := envelope.Split(addr)
@@ -27,5 +52,23 @@ func Addr(addr string) (string, error) {
 		return addr, err
 	}
 
+	domain, err = Domain(domain)
+	if err != nil {
+		return addr, err
+	}
+
 	return user + "@" + domain, nil
 }
+
+// Take an address with an ASCII domain, and convert it to Unicode as per
+// IDNA, including basic normalization.
+// The user part is unchanged.
+func DomainToUnicode(addr string) (string, error) {
+	if addr == "<>" {
+		return addr, nil
+	}
+	user, domain := envelope.Split(addr)
+
+	domain, err := Domain(domain)
+	return user + "@" + domain, err
+}
diff --git a/internal/normalize/normalize_test.go b/internal/normalize/normalize_test.go
index 96f0300..dd59a29 100644
--- a/internal/normalize/normalize_test.go
+++ b/internal/normalize/normalize_test.go
@@ -33,10 +33,42 @@ func TestUser(t *testing.T) {
 	}
 }
 
+func TestDomain(t *testing.T) {
+	valid := []struct{ user, norm string }{
+		{"ÑAndÚ", "ñandú"},
+		{"Pingüino", "pingüino"},
+		{"xn--aca-6ma", "ñaca"},
+		{"xn--lca", "ñ"}, // Punycode is for 'Ñ'.
+		{"e\u0301", "é"}, // Transform to NFC form.
+	}
+	for _, c := range valid {
+		nu, err := Domain(c.user)
+		if nu != c.norm {
+			t.Errorf("%q normalized to %q, expected %q", c.user, nu, c.norm)
+		}
+		if err != nil {
+			t.Errorf("%q error: %v", c.user, err)
+		}
+
+	}
+
+	invalid := []string{"xn---", "xn--xyz-ñ"}
+	for _, u := range invalid {
+		nu, err := Domain(u)
+		if err == nil {
+			t.Errorf("expected Domain(%+q) to fail, but did not", u)
+		}
+		if nu != u {
+			t.Errorf("%+q failed norm, but returned %+q", u, nu)
+		}
+	}
+}
+
 func TestAddr(t *testing.T) {
 	valid := []struct{ user, norm string }{
 		{"ÑAndÚ@pampa", "ñandú@pampa"},
 		{"Pingüino@patagonia", "pingüino@patagonia"},
+		{"pe\u0301@le\u0301a", "pé@léa"}, // Transform to NFC form.
 	}
 	for _, c := range valid {
 		nu, err := Addr(c.user)
diff --git a/test/t-07-smtputf8/content b/test/t-07-smtputf8/content
index 8fbedb8..349de60 100644
--- a/test/t-07-smtputf8/content
+++ b/test/t-07-smtputf8/content
@@ -1,5 +1,5 @@
-From: ñandú@ñoños
-To: ñangapirí@ñoños
+From: ñandú@ñoÑos
+To: Ñangapirí@Ñoños
 Subject: Arañando el test
 
 Crece desde el test el futuro
diff --git a/test/t-07-smtputf8/run.sh b/test/t-07-smtputf8/run.sh
index 9c2a650..d79ca30 100755
--- a/test/t-07-smtputf8/run.sh
+++ b/test/t-07-smtputf8/run.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+# Test UTF8 support, including usernames and domains.
+# Also test normalization: the destinations will have non-matching
+# capitalizations.
+
 set -e
 . $(dirname ${0})/../util/lib.sh
 
@@ -8,7 +12,10 @@ init
 skip_if_python_is_too_old
 
 generate_certs_for ñoños
-add_user ñoños ñangapirí antaño
+
+# Intentionally have a config directory for upper case; this should be
+# normalized to lowercase internally (and match the cert accordingly).
+add_user ñoñOS ñangapirí antaño
 
 # Python doesn't support UTF8 for auth, use an ascii user and domain.
 add_user nada nada nada
@@ -17,9 +24,12 @@ mkdir -p .logs
 chasquid -v=2 --log_dir=.logs --config_dir=config &
 wait_until_ready 1025
 
+# The envelope from and to are taken from the content, and use a mix of upper
+# and lower case.
 smtpc.py --server=localhost:1025 --user=nada@nada --password=nada \
 	< content
 
+# The MDA should see the normalized users and domains, in lower case.
 wait_for_file .mail/ñangapirí@ñoños
 mail_diff content .mail/ñangapirí@ñoños