// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package strings_test import ( "bytes" "fmt" . "strings" "testing" ) var htmlEscaper = NewReplacer( "&", "&", "<", "<", ">", ">", `"`, """, "'", "'", ) var htmlUnescaper = NewReplacer( "&", "&", "<", "<", ">", ">", """, `"`, "'", "'", ) // The http package's old HTML escaping function. func oldHTMLEscape(s string) string { s = Replace(s, "&", "&", -1) s = Replace(s, "<", "<", -1) s = Replace(s, ">", ">", -1) s = Replace(s, `"`, """, -1) s = Replace(s, "'", "'", -1) return s } var capitalLetters = NewReplacer("a", "A", "b", "B") // TestReplacer tests the replacer implementations. func TestReplacer(t *testing.T) { type testCase struct { r *Replacer in, out string } var testCases []testCase // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. str := func(b byte) string { return string([]byte{b}) } var s []string // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". s = nil for i := 0; i < 256; i++ { s = append(s, str(byte(i)), str(byte(i+1))) } inc := NewReplacer(s...) // Test cases with 1-byte old strings, 1-byte new strings. testCases = append(testCases, testCase{capitalLetters, "brad", "BrAd"}, testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, testCase{capitalLetters, "", ""}, testCase{inc, "brad", "csbe"}, testCase{inc, "\x00\xff", "\x01\x00"}, testCase{inc, "", ""}, testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, ) // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... s = nil for i := 0; i < 256; i++ { n := i + 1 - 'a' if n < 1 { n = 1 } s = append(s, str(byte(i)), Repeat(str(byte(i)), n)) } repeat := NewReplacer(s...) // Test cases with 1-byte old strings, variable length new strings. testCases = append(testCases, testCase{htmlEscaper, "No changes", "No changes"}, testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, testCase{htmlEscaper, "&&&", "&&&"}, testCase{htmlEscaper, "", ""}, testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, testCase{repeat, "abba", "abbbba"}, testCase{repeat, "", ""}, testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, ) // The remaining test cases have variable length old strings. testCases = append(testCases, testCase{htmlUnescaper, "&amp;", "&"}, testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, testCase{htmlUnescaper, "", ""}, testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, ) // gen1 has multiple old strings of variable length. There is no // overall non-empty common prefix, but some pairwise common prefixes. gen1 := NewReplacer( "aaa", "3[aaa]", "aa", "2[aa]", "a", "1[a]", "i", "i", "longerst", "most long", "longer", "medium", "long", "short", "xx", "xx", "x", "X", "X", "Y", "Y", "Z", ) testCases = append(testCases, testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, testCase{gen1, "long, longerst, longer", "short, most long, medium"}, testCase{gen1, "xxxxx", "xxxxX"}, testCase{gen1, "XiX", "YiY"}, testCase{gen1, "", ""}, ) // gen2 has multiple old strings with no pairwise common prefix. gen2 := NewReplacer( "roses", "red", "violets", "blue", "sugar", "sweet", ) testCases = append(testCases, testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, testCase{gen2, "", ""}, ) // gen3 has multiple old strings with an overall common prefix. gen3 := NewReplacer( "abracadabra", "poof", "abracadabrakazam", "splat", "abraham", "lincoln", "abrasion", "scrape", "abraham", "isaac", ) testCases = append(testCases, testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, testCase{gen3, "abrasion abracad", "scrape abracad"}, testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, testCase{gen3, "", ""}, ) // foo{1,2,3,4} have multiple old strings with an overall common prefix // and 1- or 2- byte extensions from the common prefix. foo1 := NewReplacer( "foo1", "A", "foo2", "B", "foo3", "C", ) foo2 := NewReplacer( "foo1", "A", "foo2", "B", "foo31", "C", "foo32", "D", ) foo3 := NewReplacer( "foo11", "A", "foo12", "B", "foo31", "C", "foo32", "D", ) foo4 := NewReplacer( "foo12", "B", "foo32", "D", ) testCases = append(testCases, testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, testCase{foo1, "", ""}, testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, testCase{foo2, "", ""}, testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, testCase{foo3, "", ""}, testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, testCase{foo4, "", ""}, ) // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. allBytes := make([]byte, 256) for i := range allBytes { allBytes[i] = byte(i) } allString := string(allBytes) genAll := NewReplacer( allString, "[all]", "\xff", "[ff]", "\x00", "[00]", ) testCases = append(testCases, testCase{genAll, allString, "[all]"}, testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, testCase{genAll, "", ""}, ) // Test cases with empty old strings. blankToX1 := NewReplacer("", "X") blankToX2 := NewReplacer("", "X", "", "") blankHighPriority := NewReplacer("", "X", "o", "O") blankLowPriority := NewReplacer("o", "O", "", "X") blankNoOp1 := NewReplacer("", "") blankNoOp2 := NewReplacer("", "", "", "A") blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z") testCases = append(testCases, testCase{blankToX1, "foo", "XfXoXoX"}, testCase{blankToX1, "", "X"}, testCase{blankToX2, "foo", "XfXoXoX"}, testCase{blankToX2, "", "X"}, testCase{blankHighPriority, "oo", "XOXOX"}, testCase{blankHighPriority, "ii", "XiXiX"}, testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, testCase{blankHighPriority, "", "X"}, testCase{blankLowPriority, "oo", "OOX"}, testCase{blankLowPriority, "ii", "XiXiX"}, testCase{blankLowPriority, "oiio", "OXiXiOX"}, testCase{blankLowPriority, "iooi", "XiOOXiX"}, testCase{blankLowPriority, "", "X"}, testCase{blankNoOp1, "foo", "foo"}, testCase{blankNoOp1, "", ""}, testCase{blankNoOp2, "foo", "foo"}, testCase{blankNoOp2, "", ""}, testCase{blankFoo, "foobarfoobaz", "XRXZX"}, testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, testCase{blankFoo, "", "X"}, ) // single string replacer abcMatcher := NewReplacer("abc", "[match]") testCases = append(testCases, testCase{abcMatcher, "", ""}, testCase{abcMatcher, "ab", "ab"}, testCase{abcMatcher, "abc", "[match]"}, testCase{abcMatcher, "abcd", "[match]d"}, testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, ) // Issue 6659 cases (more single string replacer) noHello := NewReplacer("Hello", "") testCases = append(testCases, testCase{noHello, "Hello", ""}, testCase{noHello, "Hellox", "x"}, testCase{noHello, "xHello", "x"}, testCase{noHello, "xHellox", "xx"}, ) // No-arg test cases. nop := NewReplacer() testCases = append(testCases, testCase{nop, "abc", "abc"}, testCase{nop, "", ""}, ) // Run the test cases. for i, tc := range testCases { if s := tc.r.Replace(tc.in); s != tc.out { t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) } var buf bytes.Buffer n, err := tc.r.WriteString(&buf, tc.in) if err != nil { t.Errorf("%d. WriteString: %v", i, err) continue } got := buf.String() if got != tc.out { t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) continue } if n != len(tc.out) { t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", i, tc.in, n, len(tc.out), tc.out) } } } var algorithmTestCases = []struct { r *Replacer want string }{ {capitalLetters, "*strings.byteReplacer"}, {htmlEscaper, "*strings.byteStringReplacer"}, {NewReplacer("12", "123"), "*strings.singleStringReplacer"}, {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, {NewReplacer("", "X"), "*strings.genericReplacer"}, {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, } // TestPickAlgorithm tests that NewReplacer picks the correct algorithm. func TestPickAlgorithm(t *testing.T) { for i, tc := range algorithmTestCases { got := fmt.Sprintf("%T", tc.r.Replacer()) if got != tc.want { t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) } } } type errWriter struct{} func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } // TestWriteStringError tests that WriteString returns an error // received from the underlying io.Writer. func TestWriteStringError(t *testing.T) { for i, tc := range algorithmTestCases { n, err := tc.r.WriteString(errWriter{}, "abc") if n != 0 || err == nil || err.Error() != "unwritable" { t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err) } } } // TestGenericTrieBuilding verifies the structure of the generated trie. There // is one node per line, and the key ending with the current line is in the // trie if it ends with a "+". func TestGenericTrieBuilding(t *testing.T) { testCases := []struct{ in, out string }{ {"abc;abdef;abdefgh;xx;xy;z", `- a- .b- ..c+ ..d- ...ef+ .....gh+ x- .x+ .y+ z+ `}, {"abracadabra;abracadabrakazam;abraham;abrasion", `- a- .bra- ....c- .....adabra+ ...........kazam+ ....h- .....am+ ....s- .....ion+ `}, {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- X+ Y+ a+ .a+ ..a+ i+ l- .ong+ ....er+ ......st+ x+ .x+ `}, {"foo;;foo;foo1", `+ f- .oo+ ...1+ `}, } for _, tc := range testCases { keys := Split(tc.in, ";") args := make([]string, len(keys)*2) for i, key := range keys { args[i*2] = key } got := NewReplacer(args...).PrintTrie() // Remove tabs from tc.out wantbuf := make([]byte, 0, len(tc.out)) for i := 0; i < len(tc.out); i++ { if tc.out[i] != '\t' { wantbuf = append(wantbuf, tc.out[i]) } } want := string(wantbuf) if got != want { t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) } } } func BenchmarkGenericNoMatch(b *testing.B) { str := Repeat("A", 100) + Repeat("B", 100) generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic for i := 0; i < b.N; i++ { generic.Replace(str) } } func BenchmarkGenericMatch1(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) generic := NewReplacer("a", "A", "b", "B", "12", "123") for i := 0; i < b.N; i++ { generic.Replace(str) } } func BenchmarkGenericMatch2(b *testing.B) { str := Repeat("It's <b>HTML</b>!", 100) for i := 0; i < b.N; i++ { htmlUnescaper.Replace(str) } } func benchmarkSingleString(b *testing.B, pattern, text string) { r := NewReplacer(pattern, "[match]") b.SetBytes(int64(len(text))) b.ResetTimer() for i := 0; i < b.N; i++ { r.Replace(text) } } func BenchmarkSingleMaxSkipping(b *testing.B) { benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000)) } func BenchmarkSingleLongSuffixFail(b *testing.B) { benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002)) } func BenchmarkSingleMatch(b *testing.B) { benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000)) } func BenchmarkByteByteNoMatch(b *testing.B) { str := Repeat("A", 100) + Repeat("B", 100) for i := 0; i < b.N; i++ { capitalLetters.Replace(str) } } func BenchmarkByteByteMatch(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) for i := 0; i < b.N; i++ { capitalLetters.Replace(str) } } func BenchmarkByteStringMatch(b *testing.B) { str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" for i := 0; i < b.N; i++ { htmlEscaper.Replace(str) } } func BenchmarkHTMLEscapeNew(b *testing.B) { str := "I <3 to escape HTML & other text too." for i := 0; i < b.N; i++ { htmlEscaper.Replace(str) } } func BenchmarkHTMLEscapeOld(b *testing.B) { str := "I <3 to escape HTML & other text too." for i := 0; i < b.N; i++ { oldHTMLEscape(str) } } func BenchmarkByteStringReplacerWriteString(b *testing.B) { str := Repeat("I <3 to escape HTML & other text too.", 100) buf := new(bytes.Buffer) for i := 0; i < b.N; i++ { htmlEscaper.WriteString(buf, str) buf.Reset() } } func BenchmarkByteReplacerWriteString(b *testing.B) { str := Repeat("abcdefghijklmnopqrstuvwxyz", 100) buf := new(bytes.Buffer) for i := 0; i < b.N; i++ { capitalLetters.WriteString(buf, str) buf.Reset() } } // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. func BenchmarkByteByteReplaces(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) for i := 0; i < b.N; i++ { Replace(Replace(str, "a", "A", -1), "b", "B", -1) } } // BenchmarkByteByteMap compares byteByteImpl against Map. func BenchmarkByteByteMap(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) fn := func(r rune) rune { switch r { case 'a': return 'A' case 'b': return 'B' } return r } for i := 0; i < b.N; i++ { Map(fn, str) } } var mapdata = []struct{ name, data string }{ {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"}, {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"}, } func BenchmarkMap(b *testing.B) { mapidentity := func(r rune) rune { return r } b.Run("identity", func(b *testing.B) { for _, md := range mapdata { b.Run(md.name, func(b *testing.B) { for i := 0; i < b.N; i++ { Map(mapidentity, md.data) } }) } }) mapchange := func(r rune) rune { if 'a' <= r && r <= 'z' { return r + 'A' - 'a' } if 'α' <= r && r <= 'ω' { return r + 'Α' - 'α' } return r } b.Run("change", func(b *testing.B) { for _, md := range mapdata { b.Run(md.name, func(b *testing.B) { for i := 0; i < b.N; i++ { Map(mapchange, md.data) } }) } }) }