普通文本  |  407行  |  11.22 KB

#!/usr/bin/python
#
# Copyright (C) 2010 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Generates icudtXXl-default.dat from icudtXXl-all.dat and icu-data-default.txt.
#
# Usage:
#    icu_dat_generator.py [-v] [-h]
#
# Sample usage:
#   $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose

import getopt
import glob
import os
import os.path
import re
import shutil
import subprocess
import sys


def PrintHelpAndExit():
  print "Usage:"
  print "  icu_dat_generator.py [-v|--verbose] [-h|--help]"
  print "Example:"
  print "  $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py"
  sys.exit(1)


def InvokeIcuTool(tool, working_dir, args):
  command_list = [os.path.join(ICU_PREBUILT_DIR, tool)]
  command_list.extend(args)

  if VERBOSE:
    command = "[%s] %s" % (working_dir, " ".join(command_list))
    print command

  ret = subprocess.call(command_list, cwd=working_dir)
  if ret != 0:
    sys.exit(command_list[0:])


def ExtractAllResourceFilesToTmpDir():
  # copy icudtXXl-all.dat to icudtXXl.dat
  src_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + "-all.dat")
  dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + ".dat")
  shutil.copyfile(src_dat, dst_dat)
  InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH])


def MakeDat(input_file, stubdata_dir):
  print "------ Processing '%s'..." % (input_file)
  if not os.path.isfile(input_file):
    print "%s not a file!" % input_file
    sys.exit(1)
  GenResIndex(input_file)
  CopyAndroidCnvFiles(stubdata_dir)
  # Run "icupkg -tl -s icudtXXl -a icu-data-default.txt new icudtXXl.dat".
  args = ["-tl", "-s", TMP_DAT_PATH, "-a", "add_list.txt", "new", ICU_DATA + ".dat"]
  InvokeIcuTool("icupkg", TMP_DAT_PATH, args)


def ResFilesToLocales(res_files):
  locales = []
  for res_file in res_files:
    # res_file is something like 'coll/en_US.res'.
    if not '/' in res_file:
      locales.append(res_file)
    else:
      locales.append(res_file.split('/')[1].replace('.res', ''))
  return locales


def WriteIndex(path, locales):
  empty_value = " {\"\"}\n"  # key-value pair for all locale entries

  f = open(path, "w")
  f.write("res_index:table(nofallback) {\n")
  f.write("  InstalledLocales {\n")
  for locale in sorted(locales):
    f.write(locale + empty_value)

  f.write("  }\n")
  f.write("}\n")
  f.close()


def AddResFile(collection, path):
  # There are two consumers of the the input .txt file: this script and
  # icupkg. We only care about .res files, but icupkg needs files they depend
  # on too, so it's not an error to have to ignore non-.res files here.
  end = path.find(".res")
  if end > 0:
    collection.add(path[path.find("/")+1:end])
  return


def AddAllResFiles(collection, dir_name, language):
  pattern1 = '%s/data/%s/%s.txt' % (ICU4C_DIR, dir_name, language)
  pattern2 = '%s/data/%s/%s_*.txt' % (ICU4C_DIR, dir_name, language)
  for path in glob.glob(pattern1) + glob.glob(pattern2):
    if 'TRADITIONAL' in path or 'PHONEBOOK' in path:
      continue
    parts = path.split('/')
    if dir_name == 'locales':
      path = parts[-1].replace('.txt', '')
    else:
      path = parts[-2] + '/' + parts[-1].replace('.txt', '.res')
    collection.add(path)


def DumpFile(filename):
  print ' ----------------- %s' % filename
  os.system("cat %s" % filename)
  print ' ----------------- END'


# Open input file (such as icu-data-default.txt).
# Go through the list and generate res_index.res for locales, brkitr,
# coll, et cetera.
def GenResIndex(input_file):
  brkitrs = set()
  colls = set()
  currs = set()
  langs = set()
  locales = set()
  regions = set()
  zones = set()

  languages = [
    # Group 0.
    'en',

    # Group 1.
    'ar',
    'zh',
    'nl',
    'fr',
    'de',
    'it',
    'ja',
    'ko',
    'pl',
    'pt',
    'ru',
    'es',
    'th',
    'tr',

    # Group 2.
    'bg',
    'ca',
    'hr',
    'cs',
    'da',
    'fil','tl',
    'fi',
    'el',
    'iw','he',
    'hi',
    'hu',
    'id','in',
    'lv',
    'lt',
    'nb',
    'ro',
    'sr',
    'sk',
    'sl',
    'sv',
    'uk',
    'vi',
    'fa',

    # Group 3.
    'af',
    'am',
    'bn',
    'et',
    'is',
    'ms',
    'mr',
    'sw',
    'ta',
    'zu',

    # Group 4.
    'eu',
    'gl',
    'gu',
    'kn',
    'ml',
    'te',
    'ur',

    # Group 5.
    'km',
    'lo',
    'ne',
    'si',
    'ka',
    'hy',
    'mn',
    'cy',

    # Others.
    'az',
    'be',
    'rm',
  ]

  for language in languages:
    AddAllResFiles(brkitrs, 'brkitr', language)
    AddAllResFiles(colls, 'coll', language)
    AddAllResFiles(currs, 'curr', language)
    AddAllResFiles(langs, 'lang', language)
    AddAllResFiles(regions, 'region', language)
    AddAllResFiles(zones, 'zone', language)
    AddAllResFiles(locales, 'locales', language)

  # We need to merge the human-edited icu-data-default.txt with the
  # machine-generated list of files needed to support the various languages.
  new_add_list = []

  for line in open(input_file, "r"):
    new_add_list.append(line)
    if "root." in line or "res_index" in line or "_.res" in line:
      continue
    if "brkitr/" in line:
      AddResFile(brkitrs, line)
    elif "coll/" in line:
      AddResFile(colls, line)
    elif "curr/" in line:
      AddResFile(currs, line)
    elif "lang/" in line:
      AddResFile(langs, line)
    elif "region/" in line:
      AddResFile(regions, line)
    elif "zone/" in line:
      AddResFile(zones, line)
    elif ".res" in line:
      # TODO: these should all now be misc resources!
      # We need to determine the resource is locale resource or misc resource.
      # To determine the locale resource, we assume max script length is 3.
      end = line.find(".res")
      if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0):
        locales.add(line[:end])

  kind_to_res_files = {
      "brkitr": brkitrs,
      "coll": colls,
      "curr": currs,
      "lang": langs,
      "locales": locales,
      "region": regions,
      "zone": zones
  }

  # Merge the machine-generated list into the human-generated list.
  for kind, res_files in kind_to_res_files.items():
    for res_file in sorted(res_files):
      if '.' not in res_file:
        res_file = res_file + '.res'
      new_add_list.append(res_file)

  if VERBOSE:
    for kind, res_files in kind_to_res_files.items():
      print "%s=%s" % (kind, sorted(res_files))

  # Write the genrb input files.

  # First add_list.txt, the argument to icupkg -a...
  f = open(os.path.join(TMP_DAT_PATH, "add_list.txt"), "w")
  for line in new_add_list:
    if line.startswith('#'):
      continue
    f.write("%s\n" % line)
  f.close()

  # Second res_index.txt, used below by genrb.
  res_index = "res_index.txt"
  WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales)
  for kind, res_files in kind_to_res_files.items():
    if kind == "locales":
      continue
    res_index_filename = os.path.join(TMP_DAT_PATH, kind, res_index)
    WriteIndex(res_index_filename, ResFilesToLocales(res_files))
    if VERY_VERBOSE:
      DumpFile(res_index_filename)

  # Useful if you need to see the temporary input files we generated.
  if VERY_VERBOSE:
    DumpFile('%s/add_list.txt' % TMP_DAT_PATH)
    DumpFile('%s/res_index.txt' % TMP_DAT_PATH)

  # Call genrb to generate new res_index.res.
  InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index])
  for kind, res_files in kind_to_res_files.items():
    if kind == "locales":
      continue
    InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index])


def CopyAndroidCnvFiles(stubdata_dir):
  android_specific_cnv = ["gsm-03.38-2000.cnv",
                          "iso-8859_16-2001.cnv",
                          "docomo-shift_jis-2012.cnv",
                          "kddi-jisx-208-2007.cnv",
                          "kddi-shift_jis-2012.cnv",
                          "softbank-jisx-208-2007.cnv",
                          "softbank-shift_jis-2012.cnv"]
  for cnv_file in android_specific_cnv:
    src_path = os.path.join(stubdata_dir, "cnv", cnv_file)
    dst_path = os.path.join(TMP_DAT_PATH, cnv_file)
    shutil.copyfile(src_path, dst_path)
    if VERBOSE:
      print "copy " + src_path + " " + dst_path


def main():
  global ANDROID_BUILD_TOP  # $ANDROID_BUILD_TOP
  global ICU4C_DIR          # $ANDROID_BUILD_TOP/external/icu4c
  global ICU_PREBUILT_DIR   # Directory containing pre-built ICU tools.
  global ICU_DATA           # e.g. "icudt50l"
  global TMP_DAT_PATH       # Temporary directory to store all resource files and
                            # intermediate dat files.
  global VERBOSE, VERY_VERBOSE

  VERBOSE = VERY_VERBOSE = False

  show_help = False
  try:
    opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose", "very-verbose"])
  except getopt.error:
    PrintHelpAndExit()
  for opt, _ in opts:
    if opt in ("-h", "--help"):
      show_help = True
    elif opt in ("-v", "--verbose"):
      VERBOSE = True
    elif opt in ("--very-verbose"):
      VERY_VERBOSE = VERBOSE = True
  if args:
    show_help = True

  if show_help:
    PrintHelpAndExit()

  ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP")
  if not ANDROID_BUILD_TOP:
    print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
    sys.exit(1)
  ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c")
  stubdata_dir = os.path.join(ICU4C_DIR, "stubdata")

  # Work out the ICU version from the source .dat filename, so we can find the
  # appropriate pre-built ICU tools.
  source_dat = os.path.basename(glob.glob(os.path.join(stubdata_dir, "icudt*.dat"))[0])
  icu_version = re.sub(r"([^0-9])", "", source_dat)
  ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_BUILD_TOP"),
      "prebuilts", "misc", "linux-x86_64", "icu-%s%s" % (icu_version[0], icu_version[1]))
  if not os.path.exists(ICU_PREBUILT_DIR):
    print "%s does not exist!" % ICU_PREBUILT_DIR

  ICU_DATA = "icudt" + icu_version + "l"

  # Check that icudtXXl-all.dat exists (since we build the other .dat files from that).
  full_data_filename = os.path.join(stubdata_dir, ICU_DATA + "-all.dat")
  if not os.path.isfile(full_data_filename):
    print "%s not present." % full_data_filename
    sys.exit(1)

  # Create a temporary working directory.
  TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp")
  if os.path.exists(TMP_DAT_PATH):
    shutil.rmtree(TMP_DAT_PATH)
  os.mkdir(TMP_DAT_PATH)

  # Extract resource files from icudtXXl-all.dat to TMP_DAT_PATH.
  ExtractAllResourceFilesToTmpDir()

  input_file = os.path.join(stubdata_dir, "icu-data-default.txt")
  output_file = os.path.join(stubdata_dir, ICU_DATA + "-default.dat")
  MakeDat(input_file, stubdata_dir)
  shutil.copyfile(os.path.join(TMP_DAT_PATH, ICU_DATA + ".dat"), output_file)
  print "Generated ICU data: %s" % output_file

  # Cleanup temporary working directory and icudtXXl.dat
  shutil.rmtree(TMP_DAT_PATH)
  os.remove(os.path.join(stubdata_dir, ICU_DATA + ".dat"))

if __name__ == "__main__":
  main()