lexer grammar t012lexerXMLLexer;
options {
  language =Cpp;
}

@lexer::includes
{
#include "UserTestTraits.hpp"
#include <iostream>
}
@lexer::namespace
{ Antlr3Test }

@lexer::context {
ImplTraits::StringStreamType outbuf;

void output(const char* line)
{
    outbuf << line << "\r\n";
}

void output(const char* line1, const char *line2)
{
    outbuf << line1 << line2 << "\r\n";
}

void output(const char* line1, ImplTraits::StringType const& line2)
{
    outbuf << line1 << line2 << "\r\n";
}

void appendArribute(const char* prefix, ImplTraits::StringType const& name, ImplTraits::StringType const& value)
{
    outbuf << prefix << name << '=' << value << "\r\n";
}

void appendString(const char* name, ImplTraits::StringType const& value)
{
    outbuf << name << '"' << value << '"' << "\r\n";
}

}
DOCUMENT
    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 
    ;

fragment DOCTYPE
    :
        '<!DOCTYPE' WS rootElementName=GENERIC_ID 
        { output("ROOTELEMENT: ", $rootElementName.text);}
        WS
        ( 
            ( 'SYSTEM' WS sys1=VALUE
                {output("SYSTEM: ", $sys1.text);}
                
            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
                {output("PUBLIC: ", $pub.text);}
                {output("SYSTEM: ", $sys2.text);}   
            )
            ( WS )?
        )?
        ( dtd=INTERNAL_DTD
            {output("INTERNAL DTD: ", $dtd.text);}
        )?
		'>'
	;

fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;

fragment PI :
        '<?' target=GENERIC_ID WS? 
          {output("PI: ", $target.text);}
        ( ATTRIBUTE WS? )*  '?>'
	;

fragment XMLDECL :
        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 
          {output("XML declaration");}
        ( ATTRIBUTE WS? )*  '?>'
	;


fragment ELEMENT
    : ( START_TAG
            (ELEMENT
            | t=PCDATA
                {appendString("PCDATA: ", $t.text);}
            | t=CDATA
                {appendString("CDATA: ", $t.text);}
            | t=COMMENT
                {appendString("Comment: ", $t.text);}
            | pi=PI
            )*
            END_TAG
        | EMPTY_ELEMENT
        )
    ;

fragment START_TAG 
    : '<' WS? name=GENERIC_ID WS?
          {output("Start Tag: ", $name.text);}
        ( ATTRIBUTE WS? )* '>'
    ;

fragment EMPTY_ELEMENT 
    : '<' WS? name=GENERIC_ID WS?
          {output("Empty Element: ", $name.text);}
        ( ATTRIBUTE WS? )* '/>'
    ;

fragment ATTRIBUTE 
    : name=GENERIC_ID WS? '=' WS? value=VALUE
        {appendArribute("Attr: ", $name.text, $value.text);}
    ;

fragment END_TAG 
    : '</' WS? name=GENERIC_ID WS? '>'
        {output("End Tag: ", $name.text);}
    ;

fragment COMMENT
	:	'<!--' (options {greedy=false;} : .)* '-->'
	;

fragment CDATA
	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
	;

fragment PCDATA : (~'<')+ ; 

fragment VALUE : 
        ( '\"' (~'\"')* '\"'
        | '\'' (~'\'')* '\''
        )
	;

fragment GENERIC_ID 
    : ( LETTER | '_' | ':') 
        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
	;

fragment LETTER
	: 'a'..'z' 
	| 'A'..'Z'
	;

fragment WS  :
        (   ' '
        |   '\t'
        |  ( '\n'
            |	'\r\n'
            |	'\r'
            )
        )+
    ;