// UTFConvert.cpp
#include "StdAfx.h"
#include "MyTypes.h"
#include "UTFConvert.h"
static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
bool CheckUTF8(const char *src) throw()
{
for (;;)
{
Byte c;
unsigned numAdds;
c = *src++;
if (c == 0)
return true;
if (c < 0x80)
continue;
if (c < 0xC0)
return false;
for (numAdds = 1; numAdds < 5; numAdds++)
if (c < kUtf8Limits[numAdds])
break;
UInt32 value = (c - kUtf8Limits[numAdds - 1]);
do
{
Byte c2 = *src++;
if (c2 < 0x80 || c2 >= 0xC0)
return false;
value <<= 6;
value |= (c2 - 0x80);
}
while (--numAdds);
if (value >= 0x110000)
return false;
}
}
static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) throw()
{
size_t destPos = 0, srcPos = 0;
for (;;)
{
Byte c;
unsigned numAdds;
if (srcPos == srcLen)
{
*destLen = destPos;
return True;
}
c = (Byte)src[srcPos++];
if (c < 0x80)
{
if (dest)
dest[destPos] = (wchar_t)c;
destPos++;
continue;
}
if (c < 0xC0)
break;
for (numAdds = 1; numAdds < 5; numAdds++)
if (c < kUtf8Limits[numAdds])
break;
UInt32 value = (c - kUtf8Limits[numAdds - 1]);
do
{
Byte c2;
if (srcPos == srcLen)
break;
c2 = (Byte)src[srcPos++];
if (c2 < 0x80 || c2 >= 0xC0)
break;
value <<= 6;
value |= (c2 - 0x80);
}
while (--numAdds);
if (value < 0x10000)
{
if (dest)
dest[destPos] = (wchar_t)value;
destPos++;
}
else
{
value -= 0x10000;
if (value >= 0x100000)
break;
if (dest)
{
dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
}
destPos += 2;
}
}
*destLen = destPos;
return False;
}
static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
{
size_t destPos = 0, srcPos = 0;
for (;;)
{
unsigned numAdds;
UInt32 value;
if (srcPos == srcLen)
{
*destLen = destPos;
return True;
}
value = src[srcPos++];
if (value < 0x80)
{
if (dest)
dest[destPos] = (char)value;
destPos++;
continue;
}
if (value >= 0xD800 && value < 0xE000)
{
UInt32 c2;
if (value >= 0xDC00 || srcPos == srcLen)
break;
c2 = src[srcPos++];
if (c2 < 0xDC00 || c2 >= 0xE000)
break;
value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
}
for (numAdds = 1; numAdds < 5; numAdds++)
if (value < (((UInt32)1) << (numAdds * 5 + 6)))
break;
if (dest)
dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
destPos++;
do
{
numAdds--;
if (dest)
dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
destPos++;
}
while (numAdds != 0);
}
*destLen = destPos;
return False;
}
bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
{
dest.Empty();
size_t destLen = 0;
Utf8_To_Utf16(NULL, &destLen, src, src.Len());
Bool res = Utf8_To_Utf16(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
dest.ReleaseBuffer((unsigned)destLen);
return res ? true : false;
}
bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
{
dest.Empty();
size_t destLen = 0;
Utf16_To_Utf8(NULL, &destLen, src, src.Len());
Bool res = Utf16_To_Utf8(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
dest.ReleaseBuffer((unsigned)destLen);
return res ? true : false;
}