C++程序  |  1403行  |  38.75 KB

/** \file
 * Implementation of the ANTLR3 string and string factory classes
 */

// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    <antlr3string.h>

/* Factory API
 */
static    pANTLR3_STRING    newRaw8	(pANTLR3_STRING_FACTORY factory);
static    pANTLR3_STRING    newRawUTF16	(pANTLR3_STRING_FACTORY factory);
static    pANTLR3_STRING    newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static    pANTLR3_STRING    newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static    pANTLR3_STRING    newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static    void		    destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static    pANTLR3_STRING    printable8	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static    pANTLR3_STRING    printableUTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static    void		    closeFactory(pANTLR3_STRING_FACTORY factory);

/* String API
 */
static    pANTLR3_UINT8	    set8	(pANTLR3_STRING string, const char * chars);
static    pANTLR3_UINT8	    setUTF16_8	(pANTLR3_STRING string, const char * chars);
static    pANTLR3_UINT8	    setUTF16_UTF16	(pANTLR3_STRING string, const char * chars);
static    pANTLR3_UINT8	    append8	(pANTLR3_STRING string, const char * newbit);
static    pANTLR3_UINT8	    appendUTF16_8	(pANTLR3_STRING string, const char * newbit);
static    pANTLR3_UINT8	    appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit);
static	  pANTLR3_UINT8	    insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static	  pANTLR3_UINT8	    insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static	  pANTLR3_UINT8	    insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);

static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars);
static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit);
static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);

static    pANTLR3_UINT8	    addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c);
static    pANTLR3_UINT8	    addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c);
static    pANTLR3_UINT8	    addi8	(pANTLR3_STRING string, ANTLR3_INT32 i);
static    pANTLR3_UINT8	    addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i);
static	  pANTLR3_UINT8	    inserti8	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
static	  pANTLR3_UINT8	    insertiUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);

static    ANTLR3_UINT32     compare8	(pANTLR3_STRING string, const char * compStr);
static    ANTLR3_UINT32     compareUTF16_8	(pANTLR3_STRING string, const char * compStr);
static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
static    ANTLR3_UINT32     compareS	(pANTLR3_STRING string, pANTLR3_STRING compStr);
static    ANTLR3_UCHAR      charAt8	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
static    ANTLR3_UCHAR      charAtUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
static    pANTLR3_STRING    subString8	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
static    pANTLR3_STRING    subStringUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
static	  ANTLR3_INT32	    toInt32_8	(pANTLR3_STRING string);
static	  ANTLR3_INT32	    toInt32_UTF16  (pANTLR3_STRING string);
static	  pANTLR3_STRING    to8_8		(pANTLR3_STRING string);
static	  pANTLR3_STRING    to8_UTF16		(pANTLR3_STRING string);
static	pANTLR3_STRING		toUTF8_8	(pANTLR3_STRING string);
static	pANTLR3_STRING		toUTF8_UTF16	(pANTLR3_STRING string);

/* Local helpers
 */
static	void			stringInit8	(pANTLR3_STRING string);
static	void			stringInitUTF16	(pANTLR3_STRING string);
static	void	ANTLR3_CDECL	stringFree	(pANTLR3_STRING string);

ANTLR3_API pANTLR3_STRING_FACTORY 
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
{
	pANTLR3_STRING_FACTORY  factory;

	/* Allocate memory
	*/
	factory	= (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));

	if	(factory == NULL)
	{
		return	NULL;
	}

	/* Now we make a new list to track the strings.
	*/
	factory->strings	= antlr3VectorNew(0);
	factory->index	= 0;

	if	(factory->strings == NULL)
	{
		ANTLR3_FREE(factory);
		return	NULL;
	}

    // Install the API
    //
    // TODO: These encodings need equivalent functions to
    // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
	// The STRING stuff was intended as a quick and dirty hack for people that did not
	// want to worry about memory and performance very much, but nobody ever reads the 
	// notes or comments or uses the email list search. I want to discourage using these
	// interfaces as it is much more efficient to use the pointers within the tokens
	// directly, so I am not implementing the string stuff for the newer encodings.
    // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
	// will not be useful beyond returning the text.
	// 
    switch(encoding)
    {
		case    ANTLR3_ENC_UTF32:
			break;

		case    ANTLR3_ENC_UTF32BE:
			break;

		case    ANTLR3_ENC_UTF32LE:
			break;

		case    ANTLR3_ENC_UTF16BE:
		case    ANTLR3_ENC_UTF16LE:
		case    ANTLR3_ENC_UTF16:

			factory->newRaw	    =  newRawUTF16;
			factory->newSize	=  newSizeUTF16;
			factory->newPtr	    =  newPtrUTF16_UTF16;
			factory->newPtr8	=  newPtrUTF16_8;
			factory->newStr	    =  newStrUTF16_UTF16;
			factory->newStr8	=  newStrUTF16_8;
			factory->printable	=  printableUTF16;
			factory->destroy	=  destroy;
			factory->close	    =  closeFactory;
			break;
	 
		case    ANTLR3_ENC_UTF8:
		case    ANTLR3_ENC_EBCDIC:
		case    ANTLR3_ENC_8BIT:
		default:

			factory->newRaw	    =  newRaw8;
			factory->newSize	=  newSize8;
			factory->newPtr	    =  newPtr8;
			factory->newPtr8	=  newPtr8;
			factory->newStr	    =  newStr8;
			factory->newStr8	=  newStr8;
			factory->printable	=  printable8;
			factory->destroy	=  destroy;
			factory->close	    =  closeFactory;
			break;
    }
	return  factory;
}


/**
 *
 * \param factory 
 * \return 
 */
static    pANTLR3_STRING    
newRaw8	(pANTLR3_STRING_FACTORY factory)
{
    pANTLR3_STRING  string;

    string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));

    if	(string == NULL)
    {
		return	NULL;
    }

    /* Structure is allocated, now fill in the API etc.
     */
    stringInit8(string);
    string->factory = factory;

    /* Add the string into the allocated list
     */
    factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
    string->index   = factory->index++;

    return string;
}
/**
 *
 * \param factory 
 * \return 
 */
static    pANTLR3_STRING    
newRawUTF16	(pANTLR3_STRING_FACTORY factory)
{
    pANTLR3_STRING  string;

    string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));

    if	(string == NULL)
    {
		return	NULL;
    }

    /* Structure is allocated, now fill in the API etc.
     */
    stringInitUTF16(string);
    string->factory = factory;

    /* Add the string into the allocated list
     */
    factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
    string->index   = factory->index++;

    return string;
}
static	 
void	ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
{
    /* First free the string itself if there was anything in it
     */
    if	(string->chars)
    {
	ANTLR3_FREE(string->chars);
    }

    /* Now free the space for this string
     */
    ANTLR3_FREE(string);

    return;
}
/**
 *
 * \param string 
 * \return 
 */
static	void
stringInit8  (pANTLR3_STRING string)
{
    string->len			= 0;
    string->size		= 0;
    string->chars		= NULL;
    string->encoding	= ANTLR3_ENC_8BIT ;

    /* API for 8 bit strings*/

    string->set		= set8;
    string->set8	= set8;
    string->append	= append8;
    string->append8	= append8;
    string->insert	= insert8;
    string->insert8	= insert8;
    string->addi	= addi8;
    string->inserti	= inserti8;
    string->addc	= addc8;
    string->charAt	= charAt8;
    string->compare	= compare8;
    string->compare8	= compare8;
    string->subString	= subString8;
    string->toInt32	= toInt32_8;
    string->to8		= to8_8;
    string->toUTF8	= toUTF8_8;
    string->compareS	= compareS;
    string->setS	= setS;
    string->appendS	= appendS;
    string->insertS	= insertS;

}
/**
 *
 * \param string 
 * \return 
 */
static	void
stringInitUTF16  (pANTLR3_STRING string)
{
    string->len		= 0;
    string->size	= 0;
    string->chars	= NULL;
    string->encoding	= ANTLR3_ENC_8BIT;

    /* API for UTF16 strings */

    string->set		= setUTF16_UTF16;
    string->set8	= setUTF16_8;
    string->append	= appendUTF16_UTF16;
    string->append8	= appendUTF16_8;
    string->insert	= insertUTF16_UTF16;
    string->insert8	= insertUTF16_8;
    string->addi	= addiUTF16;
    string->inserti	= insertiUTF16;
    string->addc	= addcUTF16;
    string->charAt	= charAtUTF16;
    string->compare	= compareUTF16_UTF16;
    string->compare8	= compareUTF16_8;
    string->subString	= subStringUTF16;
    string->toInt32	= toInt32_UTF16;
    string->to8		= to8_UTF16;
    string->toUTF8	= toUTF8_UTF16;

    string->compareS	= compareS;
    string->setS	= setS;
    string->appendS	= appendS;
    string->insertS	= insertS;
}
/**
 *
 * \param string 
 * \return 
 * TODO: Implement UTF-8
 */
static	void
stringInitUTF8  (pANTLR3_STRING string)
{
    string->len	    = 0;
    string->size    = 0;
    string->chars   = NULL;

    /* API */

}

// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
// a memcpy as we make no assumptions about the 8 bit encoding.
//
static	pANTLR3_STRING		
toUTF8_8	(pANTLR3_STRING string)
{
	return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
}

// Convert a UTF16 string into a UTF8 representation using the Unicode.org
// supplied C algorithms, which are now contained within the ANTLR3 C runtime
// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
// UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
//
static	pANTLR3_STRING	
toUTF8_UTF16	(pANTLR3_STRING string)
{

    UTF8	      * outputEnd;	
    UTF16	      * inputEnd;
    pANTLR3_STRING	utf8String;

    ConversionResult	cResult;

    // Allocate the output buffer, which needs to accommodate potentially
    // 3X (in bytes) the input size (in chars).
    //
    utf8String	= string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");

    if	(utf8String != NULL)
    {
        // Free existing allocation
        //
        ANTLR3_FREE(utf8String->chars);

        // Reallocate according to maximum expected size
        //
        utf8String->size	= string->len *3;
        utf8String->chars	= (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);

        if	(utf8String->chars != NULL)
        {
            inputEnd  = (UTF16 *)	(string->chars);
            outputEnd = (UTF8 *)	(utf8String->chars);

            // Call the Unicode converter
            //
            cResult =  ConvertUTF16toUTF8
                (
                (const UTF16**)&inputEnd, 
                ((const UTF16 *)(string->chars)) + string->len, 
                &outputEnd, 
                outputEnd + utf8String->size - 1,
                lenientConversion
                );

            // We don't really care if things failed or not here, we just converted
            // everything that was vaguely possible and stopped when it wasn't. It is
            // up to the grammar programmer to verify that the input is sensible.
            //
            utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);

            *(outputEnd+1) = '\0';		// Always null terminate
        }
    }
    return utf8String;
}

/**
 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
 *
 * \param[in] factory - Pointer to the string factory that owns strings
 * \param[in] size - In characters
 * \return pointer to the new string.
 */
static    pANTLR3_STRING    
newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
{
    pANTLR3_STRING  string;

    string  = factory->newRaw(factory);

    if	(string == NULL)
    {
        return	string;
    }

    /* Always add one more byte for a terminator ;-)
    */
    string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
    *(string->chars)	= '\0';
    string->size	= size + 1;


    return string;
}
/**
 * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
 *
 * \param[in] factory - Pointer to the string factory that owns strings
 * \param[in] size - In characters (count double for surrogate pairs!!!)
 * \return pointer to the new string.
 */
static    pANTLR3_STRING    
newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
{
    pANTLR3_STRING  string;

    string  = factory->newRaw(factory);

    if	(string == NULL)
    {
        return	string;
    }

    /* Always add one more byte for a terminator ;-)
    */	
    string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
    *(string->chars)	= '\0';
    string->size	= size+1;	/* Size is always in characters, as is len */

    return string;
}

/** Creates a new 8 bit string initialized with the 8 bit characters at the 
 *  supplied ptr, of pre-determined size.
 * \param[in] factory - Pointer to the string factory that owns the strings
 * \param[in] ptr - Pointer to 8 bit encoded characters
 * \return pointer to the new string
 */
static    pANTLR3_STRING    
newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
	pANTLR3_STRING  string;

	string  = factory->newSize(factory, size);

	if	(string == NULL)
	{
		return	NULL;
	}

	if	(size <= 0)
	{
		return	string;
	}

	if	(ptr != NULL)
	{
		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
		*(string->chars + size) = '\0';	    /* Terminate, these strings are usually used for Token streams and printing etc.	*/
		string->len = size;
	}

	return  string;
}

/** Creates a new UTF16 string initialized with the 8 bit characters at the 
 *  supplied 8 bit character ptr, of pre-determined size.
 * \param[in] factory - Pointer to the string factory that owns the strings
 * \param[in] ptr - Pointer to 8 bit encoded characters
 * \return pointer to the new string
 */
static    pANTLR3_STRING    
newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
	pANTLR3_STRING  string;

	/* newSize accepts size in characters, not bytes
	*/
	string  = factory->newSize(factory, size);

	if	(string == NULL)
	{
		return	NULL;
	}

	if	(size <= 0)
	{
		return	string;
	}

	if	(ptr != NULL)
	{
		pANTLR3_UINT16	out;
		ANTLR3_INT32    inSize;

		out = (pANTLR3_UINT16)(string->chars);
		inSize	= size;

		while	(inSize-- > 0)
		{
			*out++ = (ANTLR3_UINT16)(*ptr++);
		}

		/* Terminate, these strings are usually used for Token streams and printing etc.	
		*/
		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';

		string->len = size;
	}

	return  string;
}

/** Creates a new UTF16 string initialized with the UTF16 characters at the 
 *  supplied ptr, of pre-determined size.
 * \param[in] factory - Pointer to the string factory that owns the strings
 * \param[in] ptr - Pointer to UTF16 encoded characters
 * \return pointer to the new string
 */
static    pANTLR3_STRING    
newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
	pANTLR3_STRING  string;

	string  = factory->newSize(factory, size);

	if	(string == NULL)
	{
		return	NULL;
	}

	if	(size <= 0)
	{
		return	string;
	}

	if	(ptr != NULL)
	{
		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));

		/* Terminate, these strings are usually used for Token streams and printing etc.	
		*/
		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';	    
		string->len = size;
	}

	return  string;
}

/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
 * \param[in] factory - Pointer to the string factory that owns strings.
 * \param[in] ptr - Pointer to the 8 bit encoded string
 * \return Pointer to the newly initialized string
 */
static    pANTLR3_STRING    
newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
    return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
}

/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
 * \param[in] factory - Pointer to the string factory that owns strings.
 * \param[in] ptr - Pointer to the 8 bit encoded string
 * \return POinter to the newly initialized string
 */
static    pANTLR3_STRING    
newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
    return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
}

/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
 * \param[in] factory - Pointer to the string factory that owns strings.
 * \param[in] ptr - Pointer to the UTF16 encoded string
 * \return Pointer to the newly initialized string
 */
static    pANTLR3_STRING    
newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
    pANTLR3_UINT16  in;
    ANTLR3_UINT32   count;

    /** First, determine the length of the input string
     */
    in	    = (pANTLR3_UINT16)ptr;
    count   = 0;

    while   (*in++ != '\0')
    {
		count++;
    }
    return factory->newPtr(factory, ptr, count);
}

static    void		    
destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
{
    // Record which string we are deleting
    //
    ANTLR3_UINT32 strIndex = string->index;
    
    // Ensure that the string was not factory made, or we would try
    // to delete memory that wasn't allocated outside the factory
    // block.
    // Remove the specific indexed string from the vector
    //
    factory->strings->del(factory->strings, strIndex);

    // One less string in the vector, so decrement the factory index
    // so that the next string allocated is indexed correctly with
    // respect to the vector.
    //
    factory->index--;

    // Now we have to reindex the strings in the vector that followed
    // the one we just deleted. We only do this if the one we just deleted
    // was not the last one.
    //
    if  (strIndex< factory->index)
    {
        // We must reindex the strings after the one we just deleted.
        // The one that follows the one we just deleted is also out
        // of whack, so we start there.
        //
        ANTLR3_UINT32 i;

        for (i = strIndex; i < factory->index; i++)
        {
            // Renumber the entry
            //
            ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
        }
    }

    // The string has been destroyed and the elements of the factory are reindexed.
    //

}

static    pANTLR3_STRING    
printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
{
    pANTLR3_STRING  string;
    
    /* We don't need to be too efficient here, this is mostly for error messages and so on.
     */
    pANTLR3_UINT8   scannedText;
    ANTLR3_UINT32   i;

    /* Assume we need as much as twice as much space to parse out the control characters
     */
    string  = factory->newSize(factory, instr->len *2 + 1);

    /* Scan through and replace unprintable (in terms of this routine)
     * characters
     */
    scannedText = string->chars;

    for	(i = 0; i < instr->len; i++)
    {
		if (*(instr->chars + i) == '\n')
		{
			*scannedText++ = '\\';
			*scannedText++ = 'n';
		}
		else if (*(instr->chars + i) == '\r')
		{
			*scannedText++ = '\\';
			*scannedText++ = 'r';
		}
		else if	(!isprint(*(instr->chars +i)))
		{
			*scannedText++ = '?';
		}
		else
		{
			*scannedText++ = *(instr->chars + i);
		}
    }
    *scannedText  = '\0';

    string->len	= (ANTLR3_UINT32)(scannedText - string->chars);
    
    return  string;
}

static    pANTLR3_STRING    
printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
{
    pANTLR3_STRING  string;
    
    /* We don't need to be too efficient here, this is mostly for error messages and so on.
     */
    pANTLR3_UINT16  scannedText;
    pANTLR3_UINT16  inText;
    ANTLR3_UINT32   i;
    ANTLR3_UINT32   outLen;

    /* Assume we need as much as twice as much space to parse out the control characters
     */
    string  = factory->newSize(factory, instr->len *2 + 1);

    /* Scan through and replace unprintable (in terms of this routine)
     * characters
     */
    scannedText = (pANTLR3_UINT16)(string->chars);
    inText	= (pANTLR3_UINT16)(instr->chars);
    outLen	= 0;

    for	(i = 0; i < instr->len; i++)
    {
		if (*(inText + i) == '\n')
		{
			*scannedText++   = '\\';
			*scannedText++   = 'n';
			outLen	    += 2;
		}
		else if (*(inText + i) == '\r')
		{
			*scannedText++   = '\\';
			*scannedText++   = 'r';
			outLen	    += 2;
		}
		else if	(!isprint(*(inText +i)))
		{
			*scannedText++ = '?';
			outLen++;
		}
		else
		{
			*scannedText++ = *(inText + i);
			outLen++;
		}
    }
    *scannedText  = '\0';

    string->len	= outLen;
    
    return  string;
}

/** Fascist Capitalist Pig function created
 *  to oppress the workers comrade.
 */
static    void		    
closeFactory	(pANTLR3_STRING_FACTORY factory)
{
    /* Delete the vector we were tracking the strings with, this will
     * causes all the allocated strings to be deallocated too
     */
    factory->strings->free(factory->strings);

    /* Delete the space for the factory itself
     */
    ANTLR3_FREE((void *)factory);
}

static    pANTLR3_UINT8   
append8	(pANTLR3_STRING string, const char * newbit)
{
    ANTLR3_UINT32 len;

    len	= (ANTLR3_UINT32)strlen(newbit);

    if	(string->size < (string->len + len + 1))
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
		string->size	= string->len + len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing
     */
    ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
    string->len	+= len;

    return string->chars;
}

static    pANTLR3_UINT8   
appendUTF16_8	(pANTLR3_STRING string, const char * newbit)
{
    ANTLR3_UINT32   len;
    pANTLR3_UINT16  apPoint;
    ANTLR3_UINT32   count;

    len	= (ANTLR3_UINT32)strlen(newbit);

    if	(string->size < (string->len + len + 1))
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
		string->size	= string->len + len + 1;
    }

    apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
    string->len	+= len;

    for	(count = 0; count < len; count++)
    {
		*apPoint++   = *(newbit + count);
    }
    *apPoint = '\0';

    return string->chars;
}

static    pANTLR3_UINT8   
appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit)
{
    ANTLR3_UINT32 len;
    pANTLR3_UINT16  in;

    /** First, determine the length of the input string
     */
    in	    = (pANTLR3_UINT16)newbit;
    len   = 0;

    while   (*in++ != '\0')
    {
		len++;
    }

    if	(string->size < (string->len + len + 1))
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
		string->size	= string->len + len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing delimiter
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
    string->len	+= len;

    return string->chars;
}

static    pANTLR3_UINT8   
set8	(pANTLR3_STRING string, const char * chars)
{
    ANTLR3_UINT32	len;

    len = (ANTLR3_UINT32)strlen(chars);
    if	(string->size < len + 1)
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
		string->size	= len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing '\0'
     */
    ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
    string->len	    = len;

    return  string->chars;

}

static    pANTLR3_UINT8   
setUTF16_8	(pANTLR3_STRING string, const char * chars)
{
    ANTLR3_UINT32	len;
    ANTLR3_UINT32	count;
    pANTLR3_UINT16	apPoint;

    len = (ANTLR3_UINT32)strlen(chars);
    if	(string->size < len + 1)
	{
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
		string->size	= len + 1;
    }
    apPoint = ((pANTLR3_UINT16)string->chars);
    string->len	= len;

    for	(count = 0; count < string->len; count++)
    {
		*apPoint++   = *(chars + count);
    }
    *apPoint = '\0';

    return  string->chars;
}

static    pANTLR3_UINT8   
setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
{
    ANTLR3_UINT32   len;
    pANTLR3_UINT16  in;

    /** First, determine the length of the input string
     */
    in	    = (pANTLR3_UINT16)chars;
    len   = 0;

    while   (*in++ != '\0')
    {
		len++;
    }

    if	(string->size < len + 1)
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
		string->size	= len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing '\0'
     */
    ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
    string->len	    = len;

    return  string->chars;

}

static    pANTLR3_UINT8   
addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c)
{
    if	(string->size < string->len + 2)
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
		string->size	= string->len + 2;
    }
    *(string->chars + string->len)	= (ANTLR3_UINT8)c;
    *(string->chars + string->len + 1)	= '\0';
    string->len++;

    return  string->chars;
}

static    pANTLR3_UINT8   
addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c)
{
    pANTLR3_UINT16  ptr;

    if	(string->size < string->len + 2)
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
		string->size	= string->len + 2;
    }
    ptr	= (pANTLR3_UINT16)(string->chars);

    *(ptr + string->len)	= (ANTLR3_UINT16)c;
    *(ptr + string->len + 1)	= '\0';
    string->len++;

    return  string->chars;
}

static    pANTLR3_UINT8   
addi8	(pANTLR3_STRING string, ANTLR3_INT32 i)
{
    ANTLR3_UINT8	    newbit[32];

    sprintf((char *)newbit, "%d", i);

    return  string->append8(string, (const char *)newbit);
}
static    pANTLR3_UINT8   
addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i)
{
    ANTLR3_UINT8	    newbit[32];

    sprintf((char *)newbit, "%d", i);

    return  string->append8(string, (const char *)newbit);
}

static	  pANTLR3_UINT8
inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
{
    ANTLR3_UINT8	    newbit[32];

    sprintf((char *)newbit, "%d", i);
    return  string->insert8(string, point, (const char *)newbit);
}
static	  pANTLR3_UINT8
insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
{
    ANTLR3_UINT8	    newbit[32];

    sprintf((char *)newbit, "%d", i);
    return  string->insert8(string, point, (const char *)newbit);
}

static	pANTLR3_UINT8
insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
    ANTLR3_UINT32	len;

    if	(point >= string->len)
    {
		return	string->append(string, newbit);
    }
 
    len	= (ANTLR3_UINT32)strlen(newbit);

    if	(len == 0)
    {
		return	string->chars;
    }

    if	(string->size < (string->len + len + 1))
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
		string->size	= string->len + len + 1;
    }

    /* Move the characters we are inserting before, including the delimiter
     */
    ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));

    /* Note we copy the exact number of bytes
     */
    ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
    
    string->len += len;

    return  string->chars;
}

static	pANTLR3_UINT8
insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
    ANTLR3_UINT32	len;
    ANTLR3_UINT32	count;
    pANTLR3_UINT16	inPoint;

    if	(point >= string->len)
    {
		return	string->append8(string, newbit);
    }
 
    len	= (ANTLR3_UINT32)strlen(newbit);

    if	(len == 0)
    {
		return	string->chars;
    }

    if	(string->size < (string->len + len + 1))
    {
	string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
	string->size	= string->len + len + 1;
    }

    /* Move the characters we are inserting before, including the delimiter
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));

    string->len += len;
    
    inPoint = ((pANTLR3_UINT16)(string->chars))+point;
    for	(count = 0; count<len; count++)
    {
		*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
    }

    return  string->chars;
}

static	pANTLR3_UINT8
insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
    ANTLR3_UINT32	len;
    pANTLR3_UINT16	in;

    if	(point >= string->len)
    {
		return	string->append(string, newbit);
    }
 
    /** First, determine the length of the input string
     */
    in	    = (pANTLR3_UINT16)newbit;
    len	    = 0;

    while   (*in++ != '\0')
    {
		len++;
    }

    if	(len == 0)
    {
		return	string->chars;
    }

    if	(string->size < (string->len + len + 1))
    {
		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
		string->size	= string->len + len + 1;
    }

    /* Move the characters we are inserting before, including the delimiter
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));


    /* Note we copy the exact number of characters
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
    
    string->len += len;

    return  string->chars;
}

static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars)
{
    return  string->set(string, (const char *)(chars->chars));
}

static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit)
{
    /* We may be passed an empty string, in which case we just return the current pointer
     */
    if	(newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
    {
		return	string->chars;
    }
    else
    {
		return  string->append(string, (const char *)(newbit->chars));
    }
}

static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
{
    return  string->insert(string, point, (const char *)(newbit->chars));
}

/* Function that compares the text of a string to the supplied
 * 8 bit character string and returns a result a la strcmp()
 */
static ANTLR3_UINT32   
compare8	(pANTLR3_STRING string, const char * compStr)
{
    return  strcmp((const char *)(string->chars), compStr);
}

/* Function that compares the text of a string with the supplied character string
 * (which is assumed to be in the same encoding as the string itself) and returns a result
 * a la strcmp()
 */
static ANTLR3_UINT32   
compareUTF16_8	(pANTLR3_STRING string, const char * compStr)
{
    pANTLR3_UINT16  ourString;
    ANTLR3_UINT32   charDiff;

    ourString	= (pANTLR3_UINT16)(string->chars);

    while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
    {
		charDiff = *ourString - *compStr;
		if  (charDiff != 0)
		{
			return charDiff;
		}
		ourString++;
		compStr++;
    }

    /* At this point, one of the strings was terminated
     */
    return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));

}

/* Function that compares the text of a string with the supplied character string
 * (which is assumed to be in the same encoding as the string itself) and returns a result
 * a la strcmp()
 */
static ANTLR3_UINT32   
compareUTF16_UTF16	(pANTLR3_STRING string, const char * compStr8)
{
    pANTLR3_UINT16  ourString;
    pANTLR3_UINT16  compStr;
    ANTLR3_UINT32   charDiff;

    ourString	= (pANTLR3_UINT16)(string->chars);
    compStr	= (pANTLR3_UINT16)(compStr8);

    while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
    {
		charDiff = *ourString - *compStr;
		if  (charDiff != 0)
		{
			return charDiff;
		}
		ourString++;
		compStr++;
    }

    /* At this point, one of the strings was terminated
     */
    return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
}

/* Function that compares the text of a string with the supplied string
 * (which is assumed to be in the same encoding as the string itself) and returns a result
 * a la strcmp()
 */
static ANTLR3_UINT32   
compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
{
    return  string->compare(string, (const char *)compStr->chars);
}


/* Function that returns the character indexed at the supplied
 * offset as a 32 bit character.
 */
static ANTLR3_UCHAR    
charAt8	    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
{
    if	(offset > string->len)
    {
		return (ANTLR3_UCHAR)'\0';
    }
    else
    {
		return  (ANTLR3_UCHAR)(*(string->chars + offset));
    }
}

/* Function that returns the character indexed at the supplied
 * offset as a 32 bit character.
 */
static ANTLR3_UCHAR    
charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
{
    if	(offset > string->len)
    {
		return (ANTLR3_UCHAR)'\0';
    }
    else
    {
		return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
    }
}

/* Function that returns a substring of the supplied string a la .subString(s,e)
 * in java runtimes.
 */
static pANTLR3_STRING
subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
{
    pANTLR3_STRING newStr;

    if	(endIndex > string->len)
    {
		endIndex = string->len + 1;
    }
    newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);

    return newStr;
}

/* Returns a substring of the supplied string a la .subString(s,e)
 * in java runtimes.
 */
static pANTLR3_STRING
subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
{
    pANTLR3_STRING newStr;

    if	(endIndex > string->len)
    {
		endIndex = string->len + 1;
    }
    newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);

    return newStr;
}

/* Function that can convert the characters in the string to an integer
 */
static ANTLR3_INT32
toInt32_8	    (struct ANTLR3_STRING_struct * string)
{
    return  atoi((const char *)(string->chars));
}

/* Function that can convert the characters in the string to an integer
 */
static ANTLR3_INT32
toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
{
    pANTLR3_UINT16  input;
    ANTLR3_INT32   value;
    ANTLR3_BOOLEAN  negate;

    value   = 0;
    input   = (pANTLR3_UINT16)(string->chars);
    negate  = ANTLR3_FALSE;

    if	(*input == (ANTLR3_UCHAR)'-')
    {
		negate = ANTLR3_TRUE;
		input++;
    }
    else if (*input == (ANTLR3_UCHAR)'+')
    {
		input++;
    }

    while   (*input != '\0' && isdigit(*input))
    {
		value	 = value * 10;
		value	+= ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
		input++;
    }

    return negate ? -value : value;
}

/* Function that returns a pointer to an 8 bit version of the string,
 * which in this case is just the string as this is 
 * 8 bit encodiing anyway.
 */
static	  pANTLR3_STRING	    to8_8	(pANTLR3_STRING string)
{
    return  string;
}

/* Function that returns an 8 bit version of the string,
 * which in this case is returning all the UTF16 characters
 * narrowed back into 8 bits, with characters that are too large
 * replaced with '_'
 */
static	  pANTLR3_STRING    to8_UTF16	(pANTLR3_STRING string)
{
	pANTLR3_STRING  newStr;
	ANTLR3_UINT32   i;

	/* Create a new 8 bit string
	*/
	newStr  = newRaw8(string->factory);

	if	(newStr == NULL)
	{
		return	NULL;
	}

	/* Always add one more byte for a terminator
	*/
	newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
	newStr->size    = string->len + 1;
	newStr->len	    = string->len;

	/* Now copy each UTF16 charActer , making it an 8 bit character of 
	* some sort.
	*/
	for	(i=0; i<string->len; i++)
	{
		ANTLR3_UCHAR	c;

		c = *(((pANTLR3_UINT16)(string->chars)) + i);

		*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
	}

	/* Terminate
	*/
	*(newStr->chars + newStr->len) = '\0';

	return newStr;
}