"""ANTLR3 runtime package""" # begin[licence] # # [The "BSD licence"] # Copyright (c) 2005-2008 Terence Parr # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. The name of the author may not be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # end[licence] from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE ############################################################################ # # basic token interface # ############################################################################ class Token(object): """@brief Abstract token baseclass.""" def getText(self): """@brief Get the text of the token. Using setter/getter methods is deprecated. Use o.text instead. """ raise NotImplementedError def setText(self, text): """@brief Set the text of the token. Using setter/getter methods is deprecated. Use o.text instead. """ raise NotImplementedError def getType(self): """@brief Get the type of the token. Using setter/getter methods is deprecated. Use o.type instead.""" raise NotImplementedError def setType(self, ttype): """@brief Get the type of the token. Using setter/getter methods is deprecated. Use o.type instead.""" raise NotImplementedError def getLine(self): """@brief Get the line number on which this token was matched Lines are numbered 1..n Using setter/getter methods is deprecated. Use o.line instead.""" raise NotImplementedError def setLine(self, line): """@brief Set the line number on which this token was matched Using setter/getter methods is deprecated. Use o.line instead.""" raise NotImplementedError def getCharPositionInLine(self): """@brief Get the column of the tokens first character, Columns are numbered 0..n-1 Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" raise NotImplementedError def setCharPositionInLine(self, pos): """@brief Set the column of the tokens first character, Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" raise NotImplementedError def getChannel(self): """@brief Get the channel of the token Using setter/getter methods is deprecated. Use o.channel instead.""" raise NotImplementedError def setChannel(self, channel): """@brief Set the channel of the token Using setter/getter methods is deprecated. Use o.channel instead.""" raise NotImplementedError def getTokenIndex(self): """@brief Get the index in the input stream. An index from 0..n-1 of the token object in the input stream. This must be valid in order to use the ANTLRWorks debugger. Using setter/getter methods is deprecated. Use o.index instead.""" raise NotImplementedError def setTokenIndex(self, index): """@brief Set the index in the input stream. Using setter/getter methods is deprecated. Use o.index instead.""" raise NotImplementedError def getInputStream(self): """@brief From what character stream was this token created. You don't have to implement but it's nice to know where a Token comes from if you have include files etc... on the input.""" raise NotImplementedError def setInputStream(self, input): """@brief From what character stream was this token created. You don't have to implement but it's nice to know where a Token comes from if you have include files etc... on the input.""" raise NotImplementedError ############################################################################ # # token implementations # # Token # +- CommonToken # \- ClassicToken # ############################################################################ class CommonToken(Token): """@brief Basic token implementation. This implementation does not copy the text from the input stream upon creation, but keeps start/stop pointers into the stream to avoid unnecessary copy operations. """ def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, input=None, start=None, stop=None, oldToken=None): Token.__init__(self) if oldToken is not None: self.type = oldToken.type self.line = oldToken.line self.charPositionInLine = oldToken.charPositionInLine self.channel = oldToken.channel self.index = oldToken.index self._text = oldToken._text self.input = oldToken.input if isinstance(oldToken, CommonToken): self.start = oldToken.start self.stop = oldToken.stop else: self.type = type self.input = input self.charPositionInLine = -1 # set to invalid position self.line = 0 self.channel = channel #What token number is this from 0..n-1 tokens; < 0 implies invalid index self.index = -1 # We need to be able to change the text once in a while. If # this is non-null, then getText should return this. Note that # start/stop are not affected by changing this. self._text = text # The char position into the input buffer where this token starts self.start = start # The char position into the input buffer where this token stops # This is the index of the last char, *not* the index after it! self.stop = stop def getText(self): if self._text is not None: return self._text if self.input is None: return None if self.start < self.input.size() and self.stop < self.input.size(): return self.input.substring(self.start, self.stop) return '<EOF>' def setText(self, text): """ Override the text for this token. getText() will return this text rather than pulling from the buffer. Note that this does not mean that start/stop indexes are not valid. It means that that input was converted to a new string in the token object. """ self._text = text text = property(getText, setText) def getType(self): return self.type def setType(self, ttype): self.type = ttype def getTypeName(self): return str(self.type) typeName = property(lambda s: s.getTypeName()) def getLine(self): return self.line def setLine(self, line): self.line = line def getCharPositionInLine(self): return self.charPositionInLine def setCharPositionInLine(self, pos): self.charPositionInLine = pos def getChannel(self): return self.channel def setChannel(self, channel): self.channel = channel def getTokenIndex(self): return self.index def setTokenIndex(self, index): self.index = index def getInputStream(self): return self.input def setInputStream(self, input): self.input = input def __str__(self): if self.type == EOF: return "<EOF>" channelStr = "" if self.channel > 0: channelStr = ",channel=" + str(self.channel) txt = self.text if txt is not None: txt = txt.replace("\n","\\\\n") txt = txt.replace("\r","\\\\r") txt = txt.replace("\t","\\\\t") else: txt = "<no text>" return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % ( self.index, self.start, self.stop, txt, self.typeName, channelStr, self.line, self.charPositionInLine ) class ClassicToken(Token): """@brief Alternative token implementation. A Token object like we'd use in ANTLR 2.x; has an actual string created and associated with this object. These objects are needed for imaginary tree nodes that have payload objects. We need to create a Token object that has a string; the tree node will point at this token. CommonToken has indexes into a char stream and hence cannot be used to introduce new strings. """ def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, oldToken=None ): Token.__init__(self) if oldToken is not None: self.text = oldToken.text self.type = oldToken.type self.line = oldToken.line self.charPositionInLine = oldToken.charPositionInLine self.channel = oldToken.channel self.text = text self.type = type self.line = None self.charPositionInLine = None self.channel = channel self.index = None def getText(self): return self.text def setText(self, text): self.text = text def getType(self): return self.type def setType(self, ttype): self.type = ttype def getLine(self): return self.line def setLine(self, line): self.line = line def getCharPositionInLine(self): return self.charPositionInLine def setCharPositionInLine(self, pos): self.charPositionInLine = pos def getChannel(self): return self.channel def setChannel(self, channel): self.channel = channel def getTokenIndex(self): return self.index def setTokenIndex(self, index): self.index = index def getInputStream(self): return None def setInputStream(self, input): pass def toString(self): channelStr = "" if self.channel > 0: channelStr = ",channel=" + str(self.channel) txt = self.text if txt is None: txt = "<no text>" return "[@%r,%r,<%r>%s,%r:%r]" % (self.index, txt, self.type, channelStr, self.line, self.charPositionInLine ) __str__ = toString __repr__ = toString INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR # will avoid creating a token for this symbol and try to fetch another. SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)