From 4cc54e7dcd19f78a3ebaaa3c6f05885029c53b01 Mon Sep 17 00:00:00 2001
From: David 'Digit' Turner <digit@google.com>
Date: Wed, 12 Feb 2014 20:04:18 +0800
Subject: [PATCH 01/12] android: Add locale support.

This is based on the Bionic <ctype.h> declarations. Note that
unfortunately, the _ctype_ table exposed by this header has a bug
so a fixed copy is included here instead.

See src/support/android/locale_android.cpp for details.
---
 include/__locale                       |  17 +++++-
 src/locale.cpp                         |  25 +++++++-
 src/support/android/locale_android.cpp | 101 +++++++++++++++++++++++++++++++++
 3 files changed, 140 insertions(+), 3 deletions(-)
 create mode 100644 src/support/android/locale_android.cpp

diff --git a/include/__locale b/include/__locale
index fb5b196..c793cfe 100644
--- a/include/__locale
+++ b/include/__locale
@@ -375,7 +375,20 @@ public:
     static const mask punct  = _ISPUNCT;
     static const mask xdigit = _ISXDIGIT;
     static const mask blank  = _ISBLANK;
-#else  // __GLIBC__ || _WIN32 || __APPLE__ || __FreeBSD__ || __EMSCRIPTEN__ || __sun__
+#elif defined(__ANDROID__)
+    typedef unsigned short mask;
+    static const mask space  = _S;
+    static const mask print  = _P | _U | _L | _N | _B;
+    static const mask cntrl  = _C;
+    static const mask upper  = _U;
+    static const mask lower  = _L;
+    static const mask alpha  = _U | _L;
+    static const mask digit  = _N;
+    static const mask punct  = _P;
+    static const mask xdigit = _N | _X;
+    // See src/support/android/locale_android.cpp for details!
+    static const mask blank  = 0x100;
+#else  // __ANDROID__
     typedef unsigned long mask;
     static const mask space  = 1<<0;
     static const mask print  = 1<<1;
@@ -387,7 +400,7 @@ public:
     static const mask punct  = 1<<7;
     static const mask xdigit = 1<<8;
     static const mask blank  = 1<<9;
-#endif  // __GLIBC__ || _WIN32 || __APPLE__ || __FreeBSD__
+#endif  // __GLIBC__ || _WIN32 || __APPLE__ || __FreeBSD__ || __EMSCRIPTEN__ || __sun__ || __ANDROID__
     static const mask alnum  = alpha | digit;
     static const mask graph  = alnum | punct;
 
diff --git a/src/locale.cpp b/src/locale.cpp
index 4877f2b..66aaca9 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -814,6 +814,8 @@ ctype<wchar_t>::do_toupper(char_type c) const
     return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
     return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
+#elif defined(__ANDROID__)
+    return isascii(c) ? _toupper_tab_[c + 1] : c;
 #else
     return (isascii(c) && iswlower_l(c, __cloc())) ? c-L'a'+L'A' : c;
 #endif
@@ -828,6 +830,8 @@ ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
         *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
                              : *low;
+#elif defined(__ANDROID__)
+        *low = isascii(*low) ? _toupper_tab_[*low + 1] : *low;
 #else
         *low = (isascii(*low) && islower_l(*low, __cloc())) ? (*low-L'a'+L'A') : *low;
 #endif
@@ -841,6 +845,8 @@ ctype<wchar_t>::do_tolower(char_type c) const
     return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
     return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
+#elif defined(__ANDROID__)
+    return isascii(c) ? _tolower_tab_[c + 1] : c;
 #else
     return (isascii(c) && isupper_l(c, __cloc())) ? c-L'A'+'a' : c;
 #endif
@@ -855,6 +861,8 @@ ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
         *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
                              : *low;
+#elif defined(__ANDROID__)
+        *low = isascii(*low) ? _tolower_tab_[*low + 1] : *low;
 #else
         *low = (isascii(*low) && isupper_l(*low, __cloc())) ? *low-L'A'+L'a' : *low;
 #endif
@@ -924,6 +932,8 @@ ctype<char>::do_toupper(char_type c) const
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
     return isascii(c) ? 
       static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
+#elif defined(__ANDROID__)
+    return isascii(c) ? _toupper_tab_[c + 1] : c;
 #else
     return (isascii(c) && islower_l(c, __cloc())) ? c-'a'+'A' : c;
 #endif
@@ -941,6 +951,8 @@ ctype<char>::do_toupper(char_type* low, const char_type* high) const
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
         *low = isascii(*low) ?
           static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
+#elif defined(__ANDROID__)
+        *low = isascii(*low) ? _toupper_tab_[*low + 1] : *low;
 #else
         *low = (isascii(*low) && islower_l(*low, __cloc())) ? *low-'a'+'A' : *low;
 #endif
@@ -958,6 +970,8 @@ ctype<char>::do_tolower(char_type c) const
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
     return isascii(c) ?
       static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
+#elif defined(__ANDROID__)
+    return isascii(c) ? _tolower_tab_[c + 1] : c;
 #else
     return (isascii(c) && isupper_l(c, __cloc())) ? c-'A'+'a' : c;
 #endif
@@ -973,6 +987,8 @@ ctype<char>::do_tolower(char_type* low, const char_type* high) const
         *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
         *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
+#elif defined(__ANDROID__)
+        *low = isascii(*low) ? _tolower_tab_[*low + 1] : *low;
 #else
         *low = (isascii(*low) && isupper_l(*low, __cloc())) ? *low-'A'+'a' : *low;
 #endif
@@ -1018,6 +1034,11 @@ extern "C" const int ** __ctype_tolower_loc();
 extern "C" const int ** __ctype_toupper_loc();
 #endif
 
+#if defined(__ANDROID__)
+// See src/support/android/android_locale.cpp
+extern "C" const unsigned short* const _ctype_android;
+#endif
+
 const ctype<char>::mask*
 ctype<char>::classic_table()  _NOEXCEPT
 {
@@ -1035,6 +1056,8 @@ ctype<char>::classic_table()  _NOEXCEPT
 // going to end up dereferencing it later...
 #elif defined(__EMSCRIPTEN__)
     return *__ctype_b_loc();
+#elif defined(__ANDROID__)
+    return _ctype_android;
 #elif defined(_AIX)
     return (const unsigned int *)__lc_ctype_ptr->obj->mask;
 #else
@@ -1422,7 +1445,7 @@ locale::id codecvt<wchar_t, char, mbstate_t>::id;
 
 codecvt<wchar_t, char, mbstate_t>::codecvt(size_t refs)
     : locale::facet(refs),
-      __l(_LIBCPP_GET_C_LOCALE)
+      __l(0)
 {
 }
 
diff --git a/src/support/android/locale_android.cpp b/src/support/android/locale_android.cpp
new file mode 100644
index 0000000..7193028
--- /dev/null
+++ b/src/support/android/locale_android.cpp
@@ -0,0 +1,101 @@
+// -*- C++ -*-
+//===-------------------- support/win32/locale_win32.cpp ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ctype.h>
+
+// Bionic exports the non-standard _ctype_ array in <ctype.h>,
+// unfortunately, cannot be used directly for libc++ because it doesn't
+// have a proper bit-flag for blank characters.
+//
+// Note that the header does define a _B flag (as 0x80), but it
+// is only set on the space (32) character, and used to implement
+// isprint() properly. The implementation of isblank() relies on
+// direct comparisons with 9 and 32 instead.
+//
+// The following is a local copy of the Bionic _ctype_ array that has
+// been modified in the following way:
+//
+//   - It stores 16-bit unsigned values, instead of 8-bit char ones.
+//
+//   - Bit flag _BLANK (0x100) is used to indicate blank characters.
+//     It is only set for indices 9 (TAB) and 32 (SPACE).
+//
+//   - Support signed char properly for indexing.
+
+// Used to tag blank characters, this doesn't appear in <ctype.h> nor
+// the original Bionic _ctype_ array.
+#define _BLANK  0x100
+
+// NOTE: A standalone forward declaration is required to ensure that this
+// variable is properly exported with a C name. In other words, this does
+// _not_ work:
+//
+//  extern "C" {
+//  const char* const _ctype_android = ...;
+//  }
+//
+extern "C" const unsigned short* const _ctype_android;
+
+static const unsigned short ctype_android_tab[256+128] = {
+       /* -128..-1 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 80 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 88 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 90 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 98 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* A0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* A8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* B0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* B8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* C0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* C8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* D0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* D8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* E0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* E8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* F0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* F8 */
+       /* 0..127 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C,
+        _C,     _C|_S|_BLANK, _C|_S,  _C|_S,  _C|_S,  _C|_S,  _C,     _C,
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C,
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C,
+  _S|_B|_BLANK, _P,           _P,     _P,     _P,     _P,     _P,     _P,
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P,
+        _N,     _N,           _N,     _N,     _N,     _N,     _N,     _N,
+        _N,     _N,           _P,     _P,     _P,     _P,     _P,     _P,
+        _P,     _U|_X,        _U|_X,  _U|_X,  _U|_X,  _U|_X,  _U|_X,  _U,
+        _U,     _U,           _U,     _U,     _U,     _U,     _U,     _U,
+        _U,     _U,           _U,     _U,     _U,     _U,     _U,     _U,
+        _U,     _U,           _U,     _P,     _P,     _P,     _P,     _P,
+        _P,     _L|_X,        _L|_X,  _L|_X,  _L|_X,  _L|_X,  _L|_X,  _L,
+        _L,     _L,           _L,     _L,     _L,     _L,     _L,     _L,
+        _L,     _L,           _L,     _L,     _L,     _L,     _L,     _L,
+        /* determine printability based on the IS0 8859 8-bit standard */
+        _L,     _L,           _L,     _P,     _P,     _P,     _P,     _C,
+        /* 128..255, same as -128..127 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 80 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 88 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 90 */
+        _C,     _C,           _C,     _C,     _C,     _C,     _C,     _C, /* 98 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* A0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* A8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* B0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* B8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* C0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* C8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* D0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* D8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* E0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* E8 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* F0 */
+        _P,     _P,           _P,     _P,     _P,     _P,     _P,     _P, /* F8 */
+};
+
+const unsigned short* const _ctype_android = ctype_android_tab + 128;
-- 
1.9.1.423.g4596e3a