diff --git a/smx509/parser.go b/smx509/parser.go index c412d57..95671a5 100644 --- a/smx509/parser.go +++ b/smx509/parser.go @@ -58,7 +58,21 @@ func isPrintable(b byte) bool { func parseASN1String(tag cryptobyte_asn1.Tag, value []byte) (string, error) { switch tag { case cryptobyte_asn1.T61String: - return string(value), nil + // T.61 is a defunct ITU 8-bit character encoding which preceded Unicode. + // T.61 uses a code page layout that _almost_ exactly maps to the code + // page layout of the ISO 8859-1 (Latin-1) character encoding, with the + // exception that a number of characters in Latin-1 are not present + // in T.61. + // + // Instead of mapping which characters are present in Latin-1 but not T.61, + // we just treat these strings as being encoded using Latin-1. This matches + // what most of the world does, including BoringSSL. + buf := make([]byte, 0, len(value)) + for _, v := range value { + // All the 1-byte UTF-8 runes map 1-1 with Latin-1. + buf = utf8.AppendRune(buf, rune(v)) + } + return string(buf), nil case cryptobyte_asn1.PrintableString: for _, b := range value { if !isPrintable(b) { @@ -72,6 +86,14 @@ func parseASN1String(tag cryptobyte_asn1.Tag, value []byte) (string, error) { } return string(value), nil case cryptobyte_asn1.Tag(asn1.TagBMPString): + // BMPString uses the defunct UCS-2 16-bit character encoding, which + // covers the Basic Multilingual Plane (BMP). UTF-16 was an extension of + // UCS-2, containing all of the same code points, but also including + // multi-code point characters (by using surrogate code points). We can + // treat a UCS-2 encoded string as a UTF-16 encoded string, as long as + // we reject out the UTF-16 specific code points. This matches the + // BoringSSL behavior. + if len(value)%2 != 0 { return "", errors.New("invalid BMPString") } @@ -83,7 +105,16 @@ func parseASN1String(tag cryptobyte_asn1.Tag, value []byte) (string, error) { s := make([]uint16, 0, len(value)/2) for len(value) > 0 { - s = append(s, uint16(value[0])<<8+uint16(value[1])) + point := uint16(value[0])<<8 + uint16(value[1]) + // Reject UTF-16 code points that are permanently reserved + // noncharacters (0xfffe, 0xffff, and 0xfdd0-0xfdef) and surrogates + // (0xd800-0xdfff). + if point == 0xfffe || point == 0xffff || + (point >= 0xfdd0 && point <= 0xfdef) || + (point >= 0xd800 && point <= 0xdfff) { + return "", errors.New("invalid BMPString") + } + s = append(s, point) value = value[2:] } diff --git a/smx509/parser_test.go b/smx509/parser_test.go index be0e707..fb97025 100644 --- a/smx509/parser_test.go +++ b/smx509/parser_test.go @@ -19,8 +19,8 @@ func TestParseASN1String(t *testing.T) { { name: "T61String", tag: cryptobyte_asn1.T61String, - value: []byte{80, 81, 82}, - expected: string("PQR"), + value: []byte{0xbf, 0x61, 0x3f}, + expected: string("¿a?"), }, { name: "PrintableString", @@ -58,6 +58,30 @@ func TestParseASN1String(t *testing.T) { value: []byte{255}, expectedErr: "invalid BMPString", }, + { + name: "BMPString (invalid surrogate)", + tag: cryptobyte_asn1.Tag(asn1.TagBMPString), + value: []byte{80, 81, 216, 1}, + expectedErr: "invalid BMPString", + }, + { + name: "BMPString (invalid noncharacter 0xfdd1)", + tag: cryptobyte_asn1.Tag(asn1.TagBMPString), + value: []byte{80, 81, 253, 209}, + expectedErr: "invalid BMPString", + }, + { + name: "BMPString (invalid noncharacter 0xffff)", + tag: cryptobyte_asn1.Tag(asn1.TagBMPString), + value: []byte{80, 81, 255, 255}, + expectedErr: "invalid BMPString", + }, + { + name: "BMPString (invalid noncharacter 0xfffe)", + tag: cryptobyte_asn1.Tag(asn1.TagBMPString), + value: []byte{80, 81, 255, 254}, + expectedErr: "invalid BMPString", + }, { name: "IA5String", tag: cryptobyte_asn1.IA5String,