Golang程序  |  443行  |  12.18 KB

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package syntax

import (
	"bytes"
	"fmt"
	"os"
	"strings"
	"testing"
)

func TestScanner(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping test in short mode")
	}

	src, err := os.Open("parser.go")
	if err != nil {
		t.Fatal(err)
	}
	defer src.Close()

	var s scanner
	s.init(src, nil, 0)
	for {
		s.next()
		if s.tok == _EOF {
			break
		}
		switch s.tok {
		case _Name:
			fmt.Println(s.line, s.tok, "=>", s.lit)
		case _Operator:
			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
		default:
			fmt.Println(s.line, s.tok)
		}
	}
}

func TestTokens(t *testing.T) {
	// make source
	var buf bytes.Buffer
	for i, s := range sampleTokens {
		buf.WriteString("\t\t\t\t"[:i&3])           // leading indentation
		buf.WriteString(s.src)                      // token
		buf.WriteString("        "[:i&7])           // trailing spaces
		buf.WriteString("/*line foo:1 */ // bar\n") // comments (don't crash w/o directive handler)
	}

	// scan source
	var got scanner
	got.init(&buf, nil, 0)
	got.next()
	for i, want := range sampleTokens {
		nlsemi := false

		if got.line != uint(i+linebase) {
			t.Errorf("got line %d; want %d", got.line, i+linebase)
		}

		if got.tok != want.tok {
			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
			continue
		}

		switch want.tok {
		case _Semi:
			if got.lit != "semicolon" {
				t.Errorf("got %s; want semicolon", got.lit)
			}

		case _Name, _Literal:
			if got.lit != want.src {
				t.Errorf("got lit = %q; want %q", got.lit, want.src)
				continue
			}
			nlsemi = true

		case _Operator, _AssignOp, _IncOp:
			if got.op != want.op {
				t.Errorf("got op = %s; want %s", got.op, want.op)
				continue
			}
			if got.prec != want.prec {
				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
				continue
			}
			nlsemi = want.tok == _IncOp

		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
			nlsemi = true
		}

		if nlsemi {
			got.next()
			if got.tok != _Semi {
				t.Errorf("got tok = %s; want ;", got.tok)
				continue
			}
			if got.lit != "newline" {
				t.Errorf("got %s; want newline", got.lit)
			}
		}

		got.next()
	}

	if got.tok != _EOF {
		t.Errorf("got %q; want _EOF", got.tok)
	}
}

var sampleTokens = [...]struct {
	tok  token
	src  string
	op   Operator
	prec int
}{
	// name samples
	{_Name, "x", 0, 0},
	{_Name, "X123", 0, 0},
	{_Name, "foo", 0, 0},
	{_Name, "Foo123", 0, 0},
	{_Name, "foo_bar", 0, 0},
	{_Name, "_", 0, 0},
	{_Name, "_foobar", 0, 0},
	{_Name, "a۰۱۸", 0, 0},
	{_Name, "foo६४", 0, 0},
	{_Name, "bar9876", 0, 0},
	{_Name, "ŝ", 0, 0},
	{_Name, "ŝfoo", 0, 0},

	// literal samples
	{_Literal, "0", 0, 0},
	{_Literal, "1", 0, 0},
	{_Literal, "12345", 0, 0},
	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
	{_Literal, "01234567", 0, 0},
	{_Literal, "0x0", 0, 0},
	{_Literal, "0xcafebabe", 0, 0},
	{_Literal, "0.", 0, 0},
	{_Literal, "0.e0", 0, 0},
	{_Literal, "0.e-1", 0, 0},
	{_Literal, "0.e+123", 0, 0},
	{_Literal, ".0", 0, 0},
	{_Literal, ".0E00", 0, 0},
	{_Literal, ".0E-0123", 0, 0},
	{_Literal, ".0E+12345678901234567890", 0, 0},
	{_Literal, ".45e1", 0, 0},
	{_Literal, "3.14159265", 0, 0},
	{_Literal, "1e0", 0, 0},
	{_Literal, "1e+100", 0, 0},
	{_Literal, "1e-100", 0, 0},
	{_Literal, "2.71828e-1000", 0, 0},
	{_Literal, "0i", 0, 0},
	{_Literal, "1i", 0, 0},
	{_Literal, "012345678901234567889i", 0, 0},
	{_Literal, "123456789012345678890i", 0, 0},
	{_Literal, "0.i", 0, 0},
	{_Literal, ".0i", 0, 0},
	{_Literal, "3.14159265i", 0, 0},
	{_Literal, "1e0i", 0, 0},
	{_Literal, "1e+100i", 0, 0},
	{_Literal, "1e-100i", 0, 0},
	{_Literal, "2.71828e-1000i", 0, 0},
	{_Literal, "'a'", 0, 0},
	{_Literal, "'\\000'", 0, 0},
	{_Literal, "'\\xFF'", 0, 0},
	{_Literal, "'\\uff16'", 0, 0},
	{_Literal, "'\\U0000ff16'", 0, 0},
	{_Literal, "`foobar`", 0, 0},
	{_Literal, "`foo\tbar`", 0, 0},
	{_Literal, "`\r`", 0, 0},

	// operators
	{_Operator, "||", OrOr, precOrOr},

	{_Operator, "&&", AndAnd, precAndAnd},

	{_Operator, "==", Eql, precCmp},
	{_Operator, "!=", Neq, precCmp},
	{_Operator, "<", Lss, precCmp},
	{_Operator, "<=", Leq, precCmp},
	{_Operator, ">", Gtr, precCmp},
	{_Operator, ">=", Geq, precCmp},

	{_Operator, "+", Add, precAdd},
	{_Operator, "-", Sub, precAdd},
	{_Operator, "|", Or, precAdd},
	{_Operator, "^", Xor, precAdd},

	{_Star, "*", Mul, precMul},
	{_Operator, "/", Div, precMul},
	{_Operator, "%", Rem, precMul},
	{_Operator, "&", And, precMul},
	{_Operator, "&^", AndNot, precMul},
	{_Operator, "<<", Shl, precMul},
	{_Operator, ">>", Shr, precMul},

	// assignment operations
	{_AssignOp, "+=", Add, precAdd},
	{_AssignOp, "-=", Sub, precAdd},
	{_AssignOp, "|=", Or, precAdd},
	{_AssignOp, "^=", Xor, precAdd},

	{_AssignOp, "*=", Mul, precMul},
	{_AssignOp, "/=", Div, precMul},
	{_AssignOp, "%=", Rem, precMul},
	{_AssignOp, "&=", And, precMul},
	{_AssignOp, "&^=", AndNot, precMul},
	{_AssignOp, "<<=", Shl, precMul},
	{_AssignOp, ">>=", Shr, precMul},

	// other operations
	{_IncOp, "++", Add, precAdd},
	{_IncOp, "--", Sub, precAdd},
	{_Assign, "=", 0, 0},
	{_Define, ":=", 0, 0},
	{_Arrow, "<-", 0, 0},

	// delimiters
	{_Lparen, "(", 0, 0},
	{_Lbrack, "[", 0, 0},
	{_Lbrace, "{", 0, 0},
	{_Rparen, ")", 0, 0},
	{_Rbrack, "]", 0, 0},
	{_Rbrace, "}", 0, 0},
	{_Comma, ",", 0, 0},
	{_Semi, ";", 0, 0},
	{_Colon, ":", 0, 0},
	{_Dot, ".", 0, 0},
	{_DotDotDot, "...", 0, 0},

	// keywords
	{_Break, "break", 0, 0},
	{_Case, "case", 0, 0},
	{_Chan, "chan", 0, 0},
	{_Const, "const", 0, 0},
	{_Continue, "continue", 0, 0},
	{_Default, "default", 0, 0},
	{_Defer, "defer", 0, 0},
	{_Else, "else", 0, 0},
	{_Fallthrough, "fallthrough", 0, 0},
	{_For, "for", 0, 0},
	{_Func, "func", 0, 0},
	{_Go, "go", 0, 0},
	{_Goto, "goto", 0, 0},
	{_If, "if", 0, 0},
	{_Import, "import", 0, 0},
	{_Interface, "interface", 0, 0},
	{_Map, "map", 0, 0},
	{_Package, "package", 0, 0},
	{_Range, "range", 0, 0},
	{_Return, "return", 0, 0},
	{_Select, "select", 0, 0},
	{_Struct, "struct", 0, 0},
	{_Switch, "switch", 0, 0},
	{_Type, "type", 0, 0},
	{_Var, "var", 0, 0},
}

func TestComments(t *testing.T) {
	type comment struct {
		line, col uint // 0-based
		text      string
	}

	for _, test := range []struct {
		src  string
		want comment
	}{
		// no comments
		{"no comment here", comment{0, 0, ""}},
		{" /", comment{0, 0, ""}},
		{"\n /*/", comment{0, 0, ""}},

		//-style comments
		{"// line comment\n", comment{0, 0, "// line comment"}},
		{"package p // line comment\n", comment{0, 10, "// line comment"}},
		{"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}},
		{"\n\n//\n", comment{2, 0, "//"}},
		{"//", comment{0, 0, "//"}},

		/*-style comments */
		{"/* regular comment */", comment{0, 0, "/* regular comment */"}},
		{"package p /* regular comment", comment{0, 0, ""}},
		{"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
		{"\n\n/**/", comment{2, 0, "/**/"}},
		{"/*", comment{0, 0, ""}},
	} {
		var s scanner
		var got comment
		s.init(strings.NewReader(test.src),
			func(line, col uint, msg string) {
				if msg[0] != '/' {
					// error
					if msg != "comment not terminated" {
						t.Errorf("%q: %s", test.src, msg)
					}
					return
				}
				got = comment{line - linebase, col - colbase, msg} // keep last one
			}, comments)

		for {
			s.next()
			if s.tok == _EOF {
				break
			}
		}

		want := test.want
		if got.line != want.line || got.col != want.col {
			t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col)
		}
		if got.text != want.text {
			t.Errorf("%q: got %q; want %q", test.src, got.text, want.text)
		}
	}
}

func TestScanErrors(t *testing.T) {
	for _, test := range []struct {
		src, msg  string
		line, col uint // 0-based
	}{
		// Note: Positions for lexical errors are the earliest position
		// where the error is apparent, not the beginning of the respective
		// token.

		// rune-level errors
		{"fo\x00o", "invalid NUL character", 0, 2},
		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},

		// token-level errors
		{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},

		{"x + ~y", "invalid character U+007E '~'", 0, 4},
		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
		{"const x = 0xyz", "malformed hex constant", 0, 12},
		{"0123456789", "malformed octal constant", 0, 10},
		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
		{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},

		{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
		{"'\n", "newline in character literal", 0, 1},
		{`'\`, "invalid character literal (missing closing ')", 0, 0},
		{`'\'`, "invalid character literal (missing closing ')", 0, 0},
		{`'\x`, "invalid character literal (missing closing ')", 0, 0},
		{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
		{`'\y'`, "unknown escape sequence", 0, 2},
		{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
		{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
		{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
		{`'\400'`, "octal escape value > 255: 256", 0, 5},
		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
		{`'xx'`, "invalid character literal (more than one character)", 0, 0},

		{"\"\n", "newline in string", 0, 1},
		{`"`, "string not terminated", 0, 0},
		{`"foo`, "string not terminated", 0, 0},
		{"`", "string not terminated", 0, 0},
		{"`foo", "string not terminated", 0, 0},
		{"/*/", "comment not terminated", 0, 0},
		{"/*\n\nfoo", "comment not terminated", 0, 0},
		{`"\`, "string not terminated", 0, 0},
		{`"\"`, "string not terminated", 0, 0},
		{`"\x`, "string not terminated", 0, 0},
		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
		{`"\y"`, "unknown escape sequence", 0, 2},
		{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
		{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
		{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
		{`"\400"`, "octal escape value > 255: 256", 0, 5},

		{`s := "foo\z"`, "unknown escape sequence", 0, 10},
		{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
		{`"\x`, "string not terminated", 0, 0},
		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},

		// former problem cases
		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
	} {
		var s scanner
		nerrors := 0
		s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
			nerrors++
			// only check the first error
			if nerrors == 1 {
				if msg != test.msg {
					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
				}
				if line != test.line+linebase {
					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
				}
				if col != test.col+colbase {
					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
				}
			} else if nerrors > 1 {
				// TODO(gri) make this use position info
				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
			}
		}, 0)

		for {
			s.next()
			if s.tok == _EOF {
				break
			}
		}

		if nerrors == 0 {
			t.Errorf("%q: got no error; want %q", test.src, test.msg)
		}
	}
}

func TestIssue21938(t *testing.T) {
	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"

	var got scanner
	got.init(strings.NewReader(s), nil, 0)
	got.next()

	if got.tok != _Literal || got.lit != ".5" {
		t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
	}
}