// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package csv import ( "io" "reflect" "strings" "testing" ) var readTests = []struct { Name string Input string Output [][]string UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 // These fields are copied into the Reader Comma rune Comment rune FieldsPerRecord int LazyQuotes bool TrailingComma bool TrimLeadingSpace bool Error string Line int // Expected error line if != 0 Column int // Expected error column if line != 0 }{ { Name: "Simple", Input: "a,b,c\n", Output: [][]string{{"a", "b", "c"}}, }, { Name: "CRLF", Input: "a,b\r\nc,d\r\n", Output: [][]string{{"a", "b"}, {"c", "d"}}, }, { Name: "BareCR", Input: "a,b\rc,d\r\n", Output: [][]string{{"a", "b\rc", "d"}}, }, { Name: "RFC4180test", UseFieldsPerRecord: true, Input: `#field1,field2,field3 "aaa","bb b","ccc" "a,a","b""bb","ccc" zzz,yyy,xxx `, Output: [][]string{ {"#field1", "field2", "field3"}, {"aaa", "bb\nb", "ccc"}, {"a,a", `b"bb`, "ccc"}, {"zzz", "yyy", "xxx"}, }, }, { Name: "NoEOLTest", Input: "a,b,c", Output: [][]string{{"a", "b", "c"}}, }, { Name: "Semicolon", Comma: ';', Input: "a;b;c\n", Output: [][]string{{"a", "b", "c"}}, }, { Name: "MultiLine", Input: `"two line","one line","three line field"`, Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, }, { Name: "BlankLine", Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, }, { Name: "BlankLineFieldCount", Input: "a,b,c\n\nd,e,f\n\n", UseFieldsPerRecord: true, Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, }, { Name: "TrimSpace", Input: " a, b, c\n", TrimLeadingSpace: true, Output: [][]string{{"a", "b", "c"}}, }, { Name: "LeadingSpace", Input: " a, b, c\n", Output: [][]string{{" a", " b", " c"}}, }, { Name: "Comment", Comment: '#', Input: "#1,2,3\na,b,c\n#comment", Output: [][]string{{"a", "b", "c"}}, }, { Name: "NoComment", Input: "#1,2,3\na,b,c", Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, }, { Name: "LazyQuotes", LazyQuotes: true, Input: `a "word","1"2",a","b`, Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, }, { Name: "BareQuotes", LazyQuotes: true, Input: `a "word","1"2",a"`, Output: [][]string{{`a "word"`, `1"2`, `a"`}}, }, { Name: "BareDoubleQuotes", LazyQuotes: true, Input: `a""b,c`, Output: [][]string{{`a""b`, `c`}}, }, { Name: "BadDoubleQuotes", Input: `a""b,c`, Error: `bare " in non-quoted-field`, Line: 1, Column: 1, }, { Name: "TrimQuote", Input: ` "a"," b",c`, TrimLeadingSpace: true, Output: [][]string{{"a", " b", "c"}}, }, { Name: "BadBareQuote", Input: `a "word","b"`, Error: `bare " in non-quoted-field`, Line: 1, Column: 2, }, { Name: "BadTrailingQuote", Input: `"a word",b"`, Error: `bare " in non-quoted-field`, Line: 1, Column: 10, }, { Name: "ExtraneousQuote", Input: `"a "word","b"`, Error: `extraneous " in field`, Line: 1, Column: 3, }, { Name: "BadFieldCount", UseFieldsPerRecord: true, Input: "a,b,c\nd,e", Error: "wrong number of fields", Line: 2, }, { Name: "BadFieldCount1", UseFieldsPerRecord: true, FieldsPerRecord: 2, Input: `a,b,c`, Error: "wrong number of fields", Line: 1, }, { Name: "FieldCount", Input: "a,b,c\nd,e", Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, }, { Name: "TrailingCommaEOF", Input: "a,b,c,", Output: [][]string{{"a", "b", "c", ""}}, }, { Name: "TrailingCommaEOL", Input: "a,b,c,\n", Output: [][]string{{"a", "b", "c", ""}}, }, { Name: "TrailingCommaSpaceEOF", TrimLeadingSpace: true, Input: "a,b,c, ", Output: [][]string{{"a", "b", "c", ""}}, }, { Name: "TrailingCommaSpaceEOL", TrimLeadingSpace: true, Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", ""}}, }, { Name: "TrailingCommaLine3", TrimLeadingSpace: true, Input: "a,b,c\nd,e,f\ng,hi,", Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, }, { Name: "NotTrailingComma3", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", " "}}, }, { Name: "CommaFieldTest", TrailingComma: true, Input: `x,y,z,w x,y,z, x,y,, x,,, ,,, "x","y","z","w" "x","y","z","" "x","y","","" "x","","","" "","","","" `, Output: [][]string{ {"x", "y", "z", "w"}, {"x", "y", "z", ""}, {"x", "y", "", ""}, {"x", "", "", ""}, {"", "", "", ""}, {"x", "y", "z", "w"}, {"x", "y", "z", ""}, {"x", "y", "", ""}, {"x", "", "", ""}, {"", "", "", ""}, }, }, { Name: "TrailingCommaIneffective1", TrailingComma: true, TrimLeadingSpace: true, Input: "a,b,\nc,d,e", Output: [][]string{ {"a", "b", ""}, {"c", "d", "e"}, }, }, { Name: "TrailingCommaIneffective2", TrailingComma: false, TrimLeadingSpace: true, Input: "a,b,\nc,d,e", Output: [][]string{ {"a", "b", ""}, {"c", "d", "e"}, }, }, } func TestRead(t *testing.T) { for _, tt := range readTests { r := NewReader(strings.NewReader(tt.Input)) r.Comment = tt.Comment if tt.UseFieldsPerRecord { r.FieldsPerRecord = tt.FieldsPerRecord } else { r.FieldsPerRecord = -1 } r.LazyQuotes = tt.LazyQuotes r.TrailingComma = tt.TrailingComma r.TrimLeadingSpace = tt.TrimLeadingSpace if tt.Comma != 0 { r.Comma = tt.Comma } out, err := r.ReadAll() perr, _ := err.(*ParseError) if tt.Error != "" { if err == nil || !strings.Contains(err.Error(), tt.Error) { t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error) } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) { t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column) } } else if err != nil { t.Errorf("%s: unexpected error %v", tt.Name, err) } else if !reflect.DeepEqual(out, tt.Output) { t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output) } } } // nTimes is an io.Reader which yields the string s n times. type nTimes struct { s string n int off int } func (r *nTimes) Read(p []byte) (n int, err error) { for { if r.n <= 0 || r.s == "" { return n, io.EOF } n0 := copy(p, r.s[r.off:]) p = p[n0:] n += n0 r.off += n0 if r.off == len(r.s) { r.off = 0 r.n-- } if len(p) == 0 { return } } } // benchmarkRead measures reading the provided CSV rows data. // initReader, if non-nil, modifies the Reader before it's used. func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { b.ReportAllocs() r := NewReader(&nTimes{s: rows, n: b.N}) if initReader != nil { initReader(r) } for { _, err := r.Read() if err == io.EOF { break } if err != nil { b.Fatal(err) } } } const benchmarkCSVData = `x,y,z,w x,y,z, x,y,, x,,, ,,, "x","y","z","w" "x","y","z","" "x","y","","" "x","","","" "","","","" ` func BenchmarkRead(b *testing.B) { benchmarkRead(b, nil, benchmarkCSVData) } func BenchmarkReadWithFieldsPerRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) } func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) } func BenchmarkReadLargeFields(b *testing.B) { benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv `, 3)) }