普通文本  |  319行  |  7.74 KB

# -*- coding: utf-8 -*-

import unittest
import textwrap
import antlr3
import antlr3.tree
import testbase
import sys
from StringIO import StringIO

class T(testbase.ANTLRTest):
    def setUp(self):
        self.oldPath = sys.path[:]
        sys.path.insert(0, self.baseDir)


    def tearDown(self):
        sys.path = self.oldPath


    def testOverrideMain(self):
        grammar = textwrap.dedent(
            r"""lexer grammar T3;
            options {
              language = Python;
              }

            @main {
            def main(argv):
                raise RuntimeError("no")
            }

            ID: ('a'..'z' | '\u00c0'..'\u00ff')+;
            WS: ' '+ { $channel = HIDDEN; };
            """)


        stdout = StringIO()

        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
        try:
            lexerMod.main(
            ['lexer.py']
            )
            self.fail()
        except RuntimeError:
            pass


    def testLexerFromFile(self):
        input = "foo bar"
        inputPath = self.writeFile("input.txt", input)

        grammar = textwrap.dedent(
            r"""lexer grammar T1;
            options {
              language = Python;
              }

            ID: 'a'..'z'+;
            WS: ' '+ { $channel = HIDDEN; };
            """)


        stdout = StringIO()

        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
        lexerMod.main(
            ['lexer.py', inputPath],
            stdout=stdout
            )

        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)


    def testLexerFromStdIO(self):
        input = "foo bar"

        grammar = textwrap.dedent(
            r"""lexer grammar T2;
            options {
              language = Python;
              }

            ID: 'a'..'z'+;
            WS: ' '+ { $channel = HIDDEN; };
            """)


        stdout = StringIO()

        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
        lexerMod.main(
            ['lexer.py'],
            stdin=StringIO(input),
            stdout=stdout
            )

        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)


    def testLexerEncoding(self):
        input = u"föö bär".encode('utf-8')

        grammar = textwrap.dedent(
            r"""lexer grammar T3;
            options {
              language = Python;
              }

            ID: ('a'..'z' | '\u00c0'..'\u00ff')+;
            WS: ' '+ { $channel = HIDDEN; };
            """)


        stdout = StringIO()

        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
        lexerMod.main(
            ['lexer.py', '--encoding', 'utf-8'],
            stdin=StringIO(input),
            stdout=stdout
            )

        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)


    def testCombined(self):
        input = "foo bar"

        grammar = textwrap.dedent(
            r"""grammar T4;
            options {
              language = Python;
              }

            r returns [res]: (ID)+ EOF { $res = $text; };

            ID: 'a'..'z'+;
            WS: ' '+ { $channel = HIDDEN; };
            """)


        stdout = StringIO()

        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
        parserMod.main(
            ['combined.py', '--rule', 'r'],
            stdin=StringIO(input),
            stdout=stdout
            )

        stdout = stdout.getvalue()
        self.failUnlessEqual(len(stdout.splitlines()), 1, stdout)


    def testCombinedOutputAST(self):
        input = "foo + bar"

        grammar = textwrap.dedent(
            r"""grammar T5;
            options {
              language = Python;
              output = AST;
            }

            r: ID OP^ ID EOF!;

            ID: 'a'..'z'+;
            OP: '+';
            WS: ' '+ { $channel = HIDDEN; };
            """)


        stdout = StringIO()

        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
        parserMod.main(
            ['combined.py', '--rule', 'r'],
            stdin=StringIO(input),
            stdout=stdout
            )

        stdout = stdout.getvalue().strip()
        self.failUnlessEqual(stdout, "(+ foo bar)")


    def testTreeParser(self):
        grammar = textwrap.dedent(
            r'''grammar T6;
            options {
              language = Python;
              output = AST;
            }

            r: ID OP^ ID EOF!;

            ID: 'a'..'z'+;
            OP: '+';
            WS: ' '+ { $channel = HIDDEN; };
            ''')

        treeGrammar = textwrap.dedent(
            r'''tree grammar T6Walker;
            options {
            language=Python;
            ASTLabelType=CommonTree;
            tokenVocab=T6;
            }
            r returns [res]: ^(OP a=ID b=ID)
              { $res = "\%s \%s \%s" \% ($a.text, $OP.text, $b.text) }
              ;
            ''')

        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
        walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True)

        stdout = StringIO()
        walkerMod.main(
            ['walker.py', '--rule', 'r', '--parser', 'T6Parser', '--parser-rule', 'r', '--lexer', 'T6Lexer'],
            stdin=StringIO("a+b"),
            stdout=stdout
            )

        stdout = stdout.getvalue().strip()
        self.failUnlessEqual(stdout, "u'a + b'")


    def testTreeParserRewrite(self):
        grammar = textwrap.dedent(
            r'''grammar T7;
            options {
              language = Python;
              output = AST;
            }

            r: ID OP^ ID EOF!;

            ID: 'a'..'z'+;
            OP: '+';
            WS: ' '+ { $channel = HIDDEN; };
            ''')

        treeGrammar = textwrap.dedent(
            r'''tree grammar T7Walker;
            options {
              language=Python;
              ASTLabelType=CommonTree;
              tokenVocab=T7;
              output=AST;
            }
            tokens {
              ARG;
            }
            r: ^(OP a=ID b=ID) -> ^(OP ^(ARG ID) ^(ARG ID));
            ''')

        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
        walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True)

        stdout = StringIO()
        walkerMod.main(
            ['walker.py', '--rule', 'r', '--parser', 'T7Parser', '--parser-rule', 'r', '--lexer', 'T7Lexer'],
            stdin=StringIO("a+b"),
            stdout=stdout
            )

        stdout = stdout.getvalue().strip()
        self.failUnlessEqual(stdout, "(+ (ARG a) (ARG b))")



    def testGrammarImport(self):
        slave = textwrap.dedent(
            r'''
            parser grammar T8S;
            options {
              language=Python;
            }

            a : B;
            ''')

        parserName = self.writeInlineGrammar(slave)[0]
        # slave parsers are imported as normal python modules
        # to force reloading current version, purge module from sys.modules
        try:
            del sys.modules[parserName+'Parser']
        except KeyError:
            pass

        master = textwrap.dedent(
            r'''
            grammar T8M;
            options {
              language=Python;
            }
            import T8S;
            s returns [res]: a { $res = $a.text };
            B : 'b' ; // defines B from inherited token space
            WS : (' '|'\n') {self.skip()} ;
            ''')

        stdout = StringIO()

        lexerMod, parserMod = self.compileInlineGrammar(master, returnModule=True)
        parserMod.main(
            ['import.py', '--rule', 's'],
            stdin=StringIO("b"),
            stdout=stdout
            )

        stdout = stdout.getvalue().strip()
        self.failUnlessEqual(stdout, "u'b'")


if __name__ == '__main__':
    unittest.main()