ANTLRLexer.m - Android社区 - https://www.androidos.net.cn/

// [The "BSD licence"]
// Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#import <ANTLR/antlr.h>
#import "ANTLRLexer.h"

@implementation ANTLRLexer

@synthesize input;
@synthesize ruleNestingLevel;
#pragma mark Initializer

- (id) initWithCharStream:(id<ANTLRCharStream>)anInput
{
	self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]];
	if ( self != nil ) {
        input = [anInput retain];
        if (state.token != nil)
            [((ANTLRCommonToken *)state.token) setInput:anInput];
		ruleNestingLevel = 0;
	}
	return self;
}

- (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState
{
	self = [super initWithState:aState];
	if ( self != nil ) {
        input = [anInput retain];
        if (state.token != nil)
            [((ANTLRCommonToken *)state.token) setInput:anInput];
		ruleNestingLevel = 0;
	}
	return self;
}

- (void) dealloc
{
    if ( input ) [input release];
    [super dealloc];
}

- (id) copyWithZone:(NSZone *)aZone
{
    ANTLRLexer *copy;
	
    copy = [[[self class] allocWithZone:aZone] init];
    //    copy = [super copyWithZone:aZone]; // allocation occurs here
    if ( input != nil )
        copy.input = input;
    copy.ruleNestingLevel = ruleNestingLevel;
    return copy;
}

- (void) reset
{
    [super reset]; // reset all recognizer state variables
                   // wack Lexer state variables
    if ( input != nil ) {
        [input seek:0]; // rewind the input
    }
    if ( state == nil ) {
        return; // no shared state work to do
    }
    state.token = nil;
    state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE;
    state.channel = ANTLRCommonToken.DEFAULT_CHANNEL;
    state.tokenStartCharIndex = -1;
    state.tokenStartCharPositionInLine = -1;
    state.tokenStartLine = -1;
    state.text = nil;
}

// token stuff
#pragma mark Tokens

- (id<ANTLRToken>)getToken
{
    return [state getToken]; 
}

- (void) setToken: (id<ANTLRToken>) aToken
{
    if (state.token != aToken) {
        [aToken retain];
        state.token = aToken;
    }
}


// this method may be overridden in the generated lexer if we generate a filtering lexer.
- (id<ANTLRToken>) nextToken
{
	while (YES) {
        [self setToken:nil];
        state.channel = ANTLRCommonToken.DEFAULT_CHANNEL;
        state.tokenStartCharIndex = input.index;
        state.tokenStartCharPositionInLine = input.charPositionInLine;
        state.tokenStartLine = input.line;
        state.text = nil;
        
        // [self setText:[self text]];
		if ([input LA:1] == ANTLRCharStreamEOF) {
            ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input
                                                          Type:ANTLRTokenTypeEOF
                                                       Channel:ANTLRCommonToken.DEFAULT_CHANNEL
                                                         Start:input.index
                                                          Stop:input.index];
            [eof setLine:input.line];
            [eof setCharPositionInLine:input.charPositionInLine];
			return eof;
		}
		@try {
			[self mTokens];
            // SEL aMethod = @selector(mTokens);
            // [[self class] instancesRespondToSelector:aMethod];
            if ( state.token == nil)
                [self emit];
            else if ( state.token == [ANTLRCommonToken skipToken] ) {
                continue;
            }
			return state.token;
		}
		@catch (ANTLRNoViableAltException *nva) {
			[self reportError:nva];
			[self recover:nva];
		}
		@catch (ANTLRRecognitionException *e) {
			[self reportError:e];
		}
	}
}

- (void) mTokens
{   // abstract, defined in generated source as a starting point for matching
    [self doesNotRecognizeSelector:_cmd];
}

- (void) skip
{
    state.token = [ANTLRCommonToken skipToken];
}

- (id<ANTLRCharStream>) input
{
    return input; 
}

- (void) setInput:(id<ANTLRCharStream>) anInput
{
    if ( anInput != input ) {
        if ( input ) [input release];
    }
    input = nil;
    [self reset];
    input = anInput;
    [input retain];
}

/** Currently does not support multiple emits per nextToken invocation
 *  for efficiency reasons.  Subclass and override this method and
 *  nextToken (to push tokens into a list and pull from that list rather
 *  than a single variable as this implementation does).
 */
- (void) emit:(id<ANTLRToken>)aToken
{
	state.token = aToken;
}

/** The standard method called to automatically emit a token at the
 *  outermost lexical rule.  The token object should point into the
 *  char buffer start..stop.  If there is a text override in 'text',
 *  use that to set the token's text.  Override this method to emit
 *  custom Token objects.
 *
 *  If you are building trees, then you should also override
 *  Parser or TreeParser.getMissingSymbol().
 */
- (void) emit
{
	id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input
                                                  Type:state.type
                                               Channel:state.channel
                                                 Start:state.tokenStartCharIndex
                                                  Stop:input.index-1];
	[aToken setLine:state.tokenStartLine];
    aToken.text = [self text];
	[aToken setCharPositionInLine:state.tokenStartCharPositionInLine];
    [aToken retain];
	[self emit:aToken];
	// [aToken release];
}

// matching
#pragma mark Matching
- (void) matchString:(NSString *)aString
{
    unichar c;
	unsigned int i = 0;
	unsigned int stringLength = [aString length];
	while ( i < stringLength ) {
		c = [input LA:1];
        if ( c != [aString characterAtIndex:i] ) {
			if ([state getBacktracking] > 0) {
				state.failed = YES;
				return;
			}
			ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input];
            mte.c = c;
			[self recover:mte];
			@throw mte;
		}
		i++;
		[input consume];
		state.failed = NO;
	}
}

- (void) matchAny
{
	[input consume];
}

- (void) matchChar:(unichar) aChar
{
	// TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype.
	//		 try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code)
    unichar charLA;
	charLA = [input LA:1];
	if ( charLA != aChar) {
		if ([state getBacktracking] > 0) {
			state.failed = YES;
			return;
		}
		ANTLRMismatchedTokenException  *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input];
        mte.c = charLA;
		[self recover:mte];
		@throw mte;
	}
	[input consume];
	state.failed = NO;
}

- (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar
{
	unichar charLA = (unichar)[input LA:1];
	if ( charLA < fromChar || charLA > toChar ) {
		if ([state getBacktracking] > 0) {
			state.failed = YES;
			return;
		}
		ANTLRMismatchedRangeException  *mre = [ANTLRMismatchedRangeException
					newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar)
							   stream:input];
        mre.c = charLA;
		[self recover:mre];
		@throw mre;
	}		
	[input consume];
	state.failed = NO;
}

	// info
#pragma mark Informational

- (NSUInteger) line
{
	return input.line;
}

- (NSUInteger) charPositionInLine
{
	return input.charPositionInLine;
}

- (NSInteger) index
{
    return 0;
}

- (NSString *) text
{
    if (state.text != nil) {
        return state.text;
    }
	return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)];
}

- (void) setText:(NSString *) theText
{
    state.text = theText;
}

	// error handling
- (void) reportError:(ANTLRRecognitionException *)e
{
    /** TODO: not thought about recovery in lexer yet.
     *
     // if we've already reported an error and have not matched a token
     // yet successfully, don't report any errors.
     if ( errorRecovery ) {
     //System.err.print("[SPURIOUS] ");
     return;
     }
     errorRecovery = true;
     */
    
    [self displayRecognitionError:[self getTokenNames] Exception:e];
}

- (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames
{
/*    NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n",
                     e.name];
 */
    NSString *msg = nil;
    if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
        ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
        msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"",
               [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]];
    }
    else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
        ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e;
        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
        // and "(decision="+nvae.decisionNumber+") and
        // "state "+nvae.stateNumber
        msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"",
               [self getCharErrorDisplay:(nvae.c)]];
    }
    else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
        ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
        // for development, can add "(decision="+eee.decisionNumber+")"
        msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"",
               [self getCharErrorDisplay:(eee.c)]];
    }
    else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) {
        ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
        msg = [NSString stringWithFormat:@"mismatched character \"%@\"  expecting set \"%@\"",
               [self getCharErrorDisplay:(mse.c)], mse.expecting];
    }
    else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
        ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
        msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"",
               [self getCharErrorDisplay:(mse.c)], mse.expecting];
    }
    else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) {
        ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e;
        msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"",
               [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)],
               [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]];
    }
    else {
        msg = [super getErrorMessage:e TokenNames:[self getTokenNames]];
    }
    return msg;
}

- (NSString *)getCharErrorDisplay:(NSInteger)c
{
    NSString *s;
    switch ( c ) {
        case ANTLRTokenTypeEOF :
            s = @"<EOF>";
            break;
        case '\n' :
            s = @"\\n";
            break;
        case '\t' :
            s = @"\\t";
            break;
        case '\r' :
            s = @"\\r";
            break;
        default:
            s = [NSString stringWithFormat:@"%c", (char)c];
            break;
    }
    return s;
}

/** Lexers can normally match any char in it's vocabulary after matching
 *  a token, so do the easy thing and just kill a character and hope
 *  it all works out.  You can instead use the rule invocation stack
 *  to do sophisticated error recovery if you are in a fragment rule.
 */
- (void)recover:(ANTLRRecognitionException *)re
{
    //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
    //re.printStackTrace();
    [input consume];
}

- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex
{
    NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine];
    [super traceIn:ruleName Index:ruleIndex Object:inputSymbol];
}

- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex
{
    NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine];
    [super traceOut:ruleName Index:ruleIndex Object:inputSymbol];
}

@end