author | Alberto Bertogli
<albertito@blitiri.com.ar> 2023-12-23 02:38:07 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2023-12-26 18:08:11 UTC |
parent | 5c4d2f980859e7e42b4da2bea19b04bb79eedd54 |
internal/courier/mda.go | +7 | -1 |
internal/normalize/normalize.go | +21 | -0 |
internal/normalize/normalize_test.go | +16 | -0 |
internal/smtpsrv/conn.go | +16 | -29 |
internal/smtpsrv/conn_test.go | +0 | -53 |
internal/smtpsrv/dotreader.go | +111 | -0 |
internal/smtpsrv/dotreader_test.go | +89 | -0 |
test/t-12-minor_dialogs/bad_data_dot.cmy | +29 | -0 |
test/t-12-minor_dialogs/bad_data_dot_2.cmy | +29 | -0 |
test/t-12-minor_dialogs/bad_data_dot_on_message_too_big.cmy | +37 | -0 |
test/t-12-minor_dialogs/config/chasquid.conf | +3 | -0 |
test/t-12-minor_dialogs/data_dot_stuffing.cmy | +28 | -0 |
test/t-12-minor_dialogs/data_dot_stuffing.cmy.verify | +11 | -0 |
test/t-12-minor_dialogs/message_too_big.cmy | +28 | -0 |
test/util/chamuyero | +6 | -3 |
diff --git a/internal/courier/mda.go b/internal/courier/mda.go index 72d3454..269f7fa 100644 --- a/internal/courier/mda.go +++ b/internal/courier/mda.go @@ -11,6 +11,7 @@ import ( "unicode" "blitiri.com.ar/go/chasquid/internal/envelope" + "blitiri.com.ar/go/chasquid/internal/normalize" "blitiri.com.ar/go/chasquid/internal/trace" ) @@ -60,7 +61,12 @@ func (p *MDA) Deliver(from string, to string, data []byte) (error, bool) { ctx, cancel := context.WithTimeout(context.Background(), p.Timeout) defer cancel() cmd := exec.CommandContext(ctx, p.Binary, args...) - cmd.Stdin = bytes.NewReader(data) + + // Pass the email data via stdin. Normalize it to CRLF which is what the + // RFC-compliant representation require. By doing this at this end, we can + // keep a simpler internal representation and ensure there won't be any + // inconsistencies in newlines within the message (e.g. added headers). + cmd.Stdin = bytes.NewReader(normalize.ToCRLF(data)) output, err := cmd.CombinedOutput() if ctx.Err() == context.DeadlineExceeded { diff --git a/internal/normalize/normalize.go b/internal/normalize/normalize.go index df68226..ef29066 100644 --- a/internal/normalize/normalize.go +++ b/internal/normalize/normalize.go @@ -3,6 +3,7 @@ package normalize import ( + "bytes" "strings" "blitiri.com.ar/go/chasquid/internal/envelope" @@ -72,3 +73,23 @@ func DomainToUnicode(addr string) (string, error) { domain, err := Domain(domain) return user + "@" + domain, err } + +// ToCRLF converts the given buffer to CRLF line endings. If a line has a +// preexisting CRLF, it leaves it be. It assumes that CR is never used on its +// own. +func ToCRLF(in []byte) []byte { + b := bytes.NewBuffer(nil) + b.Grow(len(in)) + for _, c := range in { + switch c { + case '\r': + // Ignore CR, we'll add it back later. It should never appear + // alone in the contexts where this function is used. + case '\n': + b.Write([]byte("\r\n")) + default: + b.WriteByte(c) + } + } + return b.Bytes() +} diff --git a/internal/normalize/normalize_test.go b/internal/normalize/normalize_test.go index 5b2bd87..b1c5215 100644 --- a/internal/normalize/normalize_test.go +++ b/internal/normalize/normalize_test.go @@ -129,6 +129,22 @@ func TestDomainToUnicode(t *testing.T) { } } +func TestToCRLF(t *testing.T) { + cases := []struct { + in, out string + }{ + {"", ""}, + {"a\nb", "a\r\nb"}, + {"a\r\nb", "a\r\nb"}, + } + for _, c := range cases { + got := string(ToCRLF([]byte(c.in))) + if got != c.out { + t.Errorf("ToCRLF(%q) = %q, expected %q", c.in, got, c.out) + } + } +} + func FuzzUser(f *testing.F) { f.Fuzz(func(t *testing.T, user string) { User(user) diff --git a/internal/smtpsrv/conn.go b/internal/smtpsrv/conn.go index f58bc3f..78ef7a6 100644 --- a/internal/smtpsrv/conn.go +++ b/internal/smtpsrv/conn.go @@ -11,7 +11,6 @@ import ( "math/rand" "net" "net/mail" - "net/textproto" "os" "os/exec" "strconv" @@ -312,6 +311,12 @@ loop: if err != nil { break } + } else if code < 0 { + // Negative code means that we have to break the connection. + // TODO: This is hacky, it's probably worth it at this point to + // refactor this into using a custom response type. + c.tr.Errorf("%s closed the connection: %s", cmd, msg) + break } } @@ -638,19 +643,19 @@ func (c *Conn) DATA(params string) (code int, msg string) { // one, we don't want the command timeout to interfere. c.conn.SetDeadline(c.deadline) - // Create a dot reader, limited to the maximum size. - dotr := textproto.NewReader(bufio.NewReader( - io.LimitReader(c.reader, c.maxDataSize))).DotReader() - c.data, err = io.ReadAll(dotr) + // Read the data. Enforce CRLF correctness, and maximum size. + c.data, err = readUntilDot(c.reader, c.maxDataSize) if err != nil { - if err == io.ErrUnexpectedEOF { - // Message is too big already. But we need to keep reading until we see - // the "\r\n.\r\n", otherwise we will treat the remanent data that - // the user keeps sending as commands, and that's a security - // issue. - readUntilDot(c.reader) + if err == errMessageTooLarge { + // Message is too big; excess data has already been discarded. return 552, "5.3.4 Message too big" } + if err == errInvalidLineEnding { + // We can't properly recover from this, so we have to drop the + // connection. + c.writeResponse(521, "5.5.2 Error reading DATA: invalid line ending") + return -1, "Invalid line ending, closing connection" + } return 554, fmt.Sprintf("5.4.0 Error reading DATA: %v", err) } @@ -952,24 +957,6 @@ func boolToStr(b bool) string { return "0" } -func readUntilDot(r *bufio.Reader) { - prevMore := false - for { - // The reader will not read more than the size of the buffer, - // so this doesn't cause increased memory consumption. - // The reader's data deadline will prevent this from continuing - // forever. - l, more, err := r.ReadLine() - if err != nil { - break - } - if !more && !prevMore && string(l) == "." { - break - } - prevMore = more - } -} - // STARTTLS SMTP command handler. func (c *Conn) STARTTLS(params string) (code int, msg string) { if c.onTLS { diff --git a/internal/smtpsrv/conn_test.go b/internal/smtpsrv/conn_test.go index 17412a2..0453fe2 100644 --- a/internal/smtpsrv/conn_test.go +++ b/internal/smtpsrv/conn_test.go @@ -1,10 +1,8 @@ package smtpsrv import ( - "bufio" "net" "os" - "strings" "testing" "blitiri.com.ar/go/chasquid/internal/domaininfo" @@ -87,57 +85,6 @@ func TestIsHeader(t *testing.T) { } } -func TestReadUntilDot(t *testing.T) { - // This must be > than the minimum buffer size for bufio.Reader, which - // unfortunately is not available to us. The current value is 16, these - // tests will break if it gets increased, and the nonfinal cases will need - // to be adjusted. - size := 20 - xs := "12345678901234567890" - - final := []string{ - "", ".", "..", - ".\r\n", "\r\n.", "\r\n.\r\n", - ".\n", "\n.", "\n.\n", - ".\r", "\r.", "\r.\r", - xs + "\r\n.\r\n", - xs + "1234\r\n.\r\n", - xs + xs + "\r\n.\r\n", - xs + xs + xs + "\r\n.\r\n", - xs + "." + xs + "\n.", - xs + ".\n" + xs + "\n.", - } - for _, s := range final { - t.Logf("testing %q", s) - buf := bufio.NewReaderSize(strings.NewReader(s), size) - readUntilDot(buf) - if r := buf.Buffered(); r != 0 { - t.Errorf("%q: there are %d remaining bytes", s, r) - } - } - - nonfinal := []struct { - s string - r int - }{ - {".\na", 1}, - {"\n.\na", 1}, - {"\n.\nabc", 3}, - {"\n.\n12345678", 8}, - {"\n.\n" + xs, size - 3}, - {"\n.\n" + xs + xs, size - 3}, - {"\n.\n.\n", 2}, - } - for _, c := range nonfinal { - t.Logf("testing %q", c.s) - buf := bufio.NewReaderSize(strings.NewReader(c.s), size) - readUntilDot(buf) - if r := buf.Buffered(); r != c.r { - t.Errorf("%q: expected %d remaining bytes, got %d", c.s, c.r, r) - } - } -} - func TestAddrLiteral(t *testing.T) { // TCP addresses. casesTCP := []struct { diff --git a/internal/smtpsrv/dotreader.go b/internal/smtpsrv/dotreader.go new file mode 100644 index 0000000..846d9eb --- /dev/null +++ b/internal/smtpsrv/dotreader.go @@ -0,0 +1,111 @@ +package smtpsrv + +import ( + "bufio" + "bytes" + "errors" + "io" +) + +var ( + // TODO: Include the line number and specific error, and have the + // caller add them to the trace. + errMessageTooLarge = errors.New("message too large") + errInvalidLineEnding = errors.New("invalid line ending") +) + +// readUntilDot reads from r until it encounters a dot-terminated line, or we +// read max bytes. It enforces that input lines are terminated by "\r\n", and +// that there are not "lonely" "\r" or "\n"s in the input. +// It returns \n-terminated lines, which is what we use for our internal +// representation for convenience (same as textproto DotReader does). +func readUntilDot(r *bufio.Reader, max int64) ([]byte, error) { + buf := make([]byte, 0, 1024) + n := int64(0) + + // Little state machine. + const ( + prevOther = iota + prevCR + prevCRLF + ) + // Start as if we just came from a '\r\n'; that way we avoid the need + // for special-casing the dot-stuffing at the very beginning. + prev := prevCRLF + last4 := make([]byte, 4) + skip := false + +loop: + for { + b, err := r.ReadByte() + if err == io.EOF { + return buf, io.ErrUnexpectedEOF + } else if err != nil { + return buf, err + } + n++ + + switch b { + case '\r': + if prev == prevCR { + return buf, errInvalidLineEnding + } + prev = prevCR + // We return a LF-terminated line, so skip the CR. This simplifies + // internal representation and makes it easier/less error prone to + // work with. It is converted back to CRLF on endpoints (e.g. in + // the couriers). + skip = true + case '\n': + if prev != prevCR { + return buf, errInvalidLineEnding + } + // If we come from a '\r\n.\r', we're done. + if bytes.Equal(last4, []byte("\r\n.\r")) { + break loop + } + + // If we are only starting and see ".\r\n", we're also done; in + // that case the message is empty. + if n == 3 && bytes.Equal(last4, []byte("\x00\x00.\r")) { + return []byte{}, nil + } + prev = prevCRLF + default: + if prev == prevCR { + return buf, errInvalidLineEnding + } + if b == '.' && prev == prevCRLF { + // We come from "\r\n" and got a "."; as per dot-stuffing + // rules, we should skip that '.' in the output. + // https://www.rfc-editor.org/rfc/rfc5321#section-4.5.2 + skip = true + } + prev = prevOther + } + + // Keep the last 4 bytes separately, because they may not be in buf on + // messages that are too large. + copy(last4, last4[1:]) + last4[3] = b + + if int64(len(buf)) < max && !skip { + buf = append(buf, b) + } + skip = false + } + + // Return an error if the message is too large. It is important to do this + // _outside_ the loop, because we need to keep reading until we get to the + // final "." before we return an error, so the SMTP dialog can continue + // properly after that. + // If we return too early, the remainder of the email is interpreted as + // part of the SMTP dialog (and exposing ourselves to smuggling attacks). + if n > max { + return buf, errMessageTooLarge + } + + // If we made it this far, buf naturally ends in "\n" because we skipped + // the '.' due to dot-stuffing, and skip "\r"s. + return buf, nil +} diff --git a/internal/smtpsrv/dotreader_test.go b/internal/smtpsrv/dotreader_test.go new file mode 100644 index 0000000..bd31f3f --- /dev/null +++ b/internal/smtpsrv/dotreader_test.go @@ -0,0 +1,89 @@ +package smtpsrv + +import ( + "bufio" + "bytes" + "io" + "strings" + "testing" +) + +func TestReadUntilDot(t *testing.T) { + cases := []struct { + input string + max int64 + want string + wantErr error + }{ + // EOF before any input -> unexpected EOF. + {"", 0, "", io.ErrUnexpectedEOF}, + {"", 1, "", io.ErrUnexpectedEOF}, + + // EOF after exceeding max -> unexpected EOF. + {"abcdef", 2, "ab", io.ErrUnexpectedEOF}, + + // \n at the beginning of the buffer are just as invalid, and the + // error takes precedence over the unexpected EOF. + {"\n", 0, "", errInvalidLineEnding}, + {"\n", 1, "", errInvalidLineEnding}, + {"\n", 2, "", errInvalidLineEnding}, + {"\n\r\n.\r\n", 10, "", errInvalidLineEnding}, + + // \r and then EOF -> unexpected EOF, because we never had a chance to + // assess if the line ending is valid or not. + {"\r", 2, "", io.ErrUnexpectedEOF}, + + // Lonely \r -> invalid line ending. + {"abc\rdef", 10, "abc", errInvalidLineEnding}, + {"abc\r\rdef", 10, "abc", errInvalidLineEnding}, + + // Lonely \n -> invalid line ending. + {"abc\ndef", 10, "abc", errInvalidLineEnding}, + + // Various valid cases. + {"abc\r\n.\r\n", 10, "abc\n", nil}, + {"\r\n.\r\n", 10, "\n", nil}, + + // Start with the final dot - the smallest "message" (empty). + {".\r\n", 10, "", nil}, + + // Max bytes reached -> message too large. + {"abc\r\n.\r\n", 5, "abc\n", errMessageTooLarge}, + {"abcdefg\r\n.\r\n", 5, "abcde", errMessageTooLarge}, + {"ab\r\ncdefg\r\n.\r\n", 5, "ab\ncd", errMessageTooLarge}, + + // Dot-stuffing. + // https://www.rfc-editor.org/rfc/rfc5321#section-4.5.2 + {"abc\r\n.def\r\n.\r\n", 20, "abc\ndef\n", nil}, + {"abc\r\n..def\r\n.\r\n", 20, "abc\n.def\n", nil}, + {"abc\r\n..\r\n.\r\n", 20, "abc\n.\n", nil}, + {".x\r\n.\r\n", 20, "x\n", nil}, + {"..\r\n.\r\n", 20, ".\n", nil}, + } + + for i, c := range cases { + r := bufio.NewReader(strings.NewReader(c.input)) + got, err := readUntilDot(r, c.max) + if err != c.wantErr { + t.Errorf("case %d %q: got error %v, want %v", i, c.input, err, c.wantErr) + } + if !bytes.Equal(got, []byte(c.want)) { + t.Errorf("case %d %q: got %q, want %q", i, c.input, got, c.want) + } + } +} + +type badBuffer bytes.Buffer + +func (b *badBuffer) Read(p []byte) (int, error) { + // Return an arbitrary non-EOF error for testing. + return 0, io.ErrNoProgress +} + +func TestReadUntilDotReadError(t *testing.T) { + r := bufio.NewReader(&badBuffer{}) + _, err := readUntilDot(r, 10) + if err != io.ErrNoProgress { + t.Errorf("got error %v, want %v", err, io.ErrNoProgress) + } +} diff --git a/test/t-12-minor_dialogs/bad_data_dot.cmy b/test/t-12-minor_dialogs/bad_data_dot.cmy new file mode 100644 index 0000000..42d1b57 --- /dev/null +++ b/test/t-12-minor_dialogs/bad_data_dot.cmy @@ -0,0 +1,29 @@ + +c tcp_connect localhost:1025 + +c <~ 220 +c -> EHLO localhost +c <... 250 HELP +c -> MAIL FROM: <> +c <~ 250 +c -> RCPT TO: user@testserver +c <~ 250 +c -> DATA +c <~ 354 +c -> From: Mailer daemon <somewhere@horns.com> +c -> Subject: I've come to haunt you +c -> +c -> Muahahahaha +c -> + +# An MTA must not accept isolated line breaks, otherwise it may fall victim to +# an SMTP smuggling attack. See readUntilDot for more details. +# This test triggers that condition with an invalid dot-ending, so we verify +# the server returns an error in this case. +c ~> '.\n' + +c -> That was a bad line ending, this is a good one. +c ~> 'xxx\r\n.\r\n' + +c <- 521 5.5.2 Error reading DATA: invalid line ending + diff --git a/test/t-12-minor_dialogs/bad_data_dot_2.cmy b/test/t-12-minor_dialogs/bad_data_dot_2.cmy new file mode 100644 index 0000000..f76514c --- /dev/null +++ b/test/t-12-minor_dialogs/bad_data_dot_2.cmy @@ -0,0 +1,29 @@ + +c tcp_connect localhost:1025 + +c <~ 220 +c -> EHLO localhost +c <... 250 HELP +c -> MAIL FROM: <> +c <~ 250 +c -> RCPT TO: user@testserver +c <~ 250 +c -> DATA +c <~ 354 +c -> From: Mailer daemon <somewhere@horns.com> +c -> Subject: I've come to haunt you +c -> +c -> Muahahahaha +c -> + +# An MTA must not accept isolated line breaks, otherwise it may fall victim to +# an SMTP smuggling attack. See readUntilDot for more details. +# This test triggers that condition with an invalid dot-ending, so we verify +# the server returns an error in this case. +c ~> 'xxx\n.\n' + +c -> That was a bad line ending, this is a good one. +c ~> '\r\n.\r\n' + +c <- 521 5.5.2 Error reading DATA: invalid line ending + diff --git a/test/t-12-minor_dialogs/bad_data_dot_on_message_too_big.cmy b/test/t-12-minor_dialogs/bad_data_dot_on_message_too_big.cmy new file mode 100644 index 0000000..4b8a6f6 --- /dev/null +++ b/test/t-12-minor_dialogs/bad_data_dot_on_message_too_big.cmy @@ -0,0 +1,37 @@ + +c tcp_connect localhost:1025 + +c <~ 220 +c -> EHLO localhost +c <... 250 HELP +c -> MAIL FROM: <> +c <~ 250 +c -> RCPT TO: user@testserver +c <~ 250 +c -> DATA +c <~ 354 +c -> Subject: Message too big +c -> + +# Max message size is 1 MiB. Note this includes line endings but converted to +# \n (as per textproto.DotReader), and excluding the final ".". +# We already sent (in the header) 26. +# Send lines of len 900 to stay under the limit. +# (1024 * 1024 - 26) - (900 * 1165) = 50 +c ~> ('a' * 899 + '\r\n') * 1165 + +# We have 50 characters left before the message is too big. +c ~> 'b' * 55 + '\r\n' + +# At this point the message is too big. The remainder data should be +# discarded. +# We use a "bad ." to try to do an SMTP smuggling attack. +c ~> '.\n' +c -> HELP +c -> HELP + +# And now the "good .". +c -> . + +c <- 521 5.5.2 Error reading DATA: invalid line ending + diff --git a/test/t-12-minor_dialogs/config/chasquid.conf b/test/t-12-minor_dialogs/config/chasquid.conf index 9a35641..3680c99 100644 --- a/test/t-12-minor_dialogs/config/chasquid.conf +++ b/test/t-12-minor_dialogs/config/chasquid.conf @@ -13,3 +13,6 @@ mail_log_path: "../.logs/mail_log" suffix_separators: "+-" drop_characters: "._" + +# Small max data size so we can reach it more easily in the tests. +max_data_size_mb: 1 diff --git a/test/t-12-minor_dialogs/data_dot_stuffing.cmy b/test/t-12-minor_dialogs/data_dot_stuffing.cmy new file mode 100644 index 0000000..c4ef68d --- /dev/null +++ b/test/t-12-minor_dialogs/data_dot_stuffing.cmy @@ -0,0 +1,28 @@ + +c tcp_connect localhost:1025 + +c <~ 220 +c -> EHLO localhost +c <... 250 HELP +c -> MAIL FROM: <> +c <~ 250 +c -> RCPT TO: user@testserver +c <~ 250 +c -> DATA +c <~ 354 +c -> .From: Mailer daemon <somewhere@horns.com> +c -> Subject: I've come to haunt you +c -> +c -> .Muahahahaha +c -> +c -> ..x +c -> +c -> .. +c -> +c -> .This is stuffy. +c -> +c -> . +c <~ 250 +c -> QUIT +c <~ 221 + diff --git a/test/t-12-minor_dialogs/data_dot_stuffing.cmy.verify b/test/t-12-minor_dialogs/data_dot_stuffing.cmy.verify new file mode 100644 index 0000000..541354d --- /dev/null +++ b/test/t-12-minor_dialogs/data_dot_stuffing.cmy.verify @@ -0,0 +1,11 @@ +From: Mailer daemon <somewhere@horns.com> +Subject: I've come to haunt you + +Muahahahaha + +.x + +. + +This is stuffy. + diff --git a/test/t-12-minor_dialogs/message_too_big.cmy b/test/t-12-minor_dialogs/message_too_big.cmy new file mode 100644 index 0000000..ae15d41 --- /dev/null +++ b/test/t-12-minor_dialogs/message_too_big.cmy @@ -0,0 +1,28 @@ + +c tcp_connect localhost:1025 + +c <~ 220 +c -> EHLO localhost +c <... 250 HELP +c -> MAIL FROM: <> +c <~ 250 +c -> RCPT TO: user@testserver +c <~ 250 +c -> DATA +c <~ 354 +c -> Subject: Message too big +c -> + +# Max message size is 1 MiB. Note this includes line endings but converted to +# \n (as per textproto.DotReader), and excluding the final ".". +# We already sent (in the header) 26. +# Send lines of len 900 to stay under the limit. +# (1024 * 1024 - 26) - (900 * 1166) = -850 +c ~> ('a' * 899 + '\r\n') * 1166 + +c -> . + +c <~ 552 5.3.4 Message too big +c -> QUIT +c <~ 221 + diff --git a/test/util/chamuyero b/test/util/chamuyero index 0c5bc56..3f41d4f 100755 --- a/test/util/chamuyero +++ b/test/util/chamuyero @@ -204,12 +204,15 @@ class Interpreter (object): sock.connect() self.procs[proc] = sock - # -> Send to a process stdin, with a \n at the end. - # .> Send to a process stdin, no \n at the end. + # -> Send to a process stdin, with a \r\n at the end. + # .> Send to a process stdin, no \r\n at the end. + # ~> Send to a process stdin, string is python-evaluated. elif op == "->": - self.procs[proc].write(params + "\n") + self.procs[proc].write(params + "\r\n") elif op == ".>": self.procs[proc].write(params) + elif op == "~>": + self.procs[proc].write(eval(params)) # <- Read from the process, expect matching input. # <~ Read from the process, match input using regexp.