#!/neo/opt/bin/python import sys, string, os, getopt, pwd, signal, time, re import fcntl import tstart import db_trans from log import * import neo_cgi, neo_util import odb eTransError = "eTransError" DONE = 0 DEBUG = 0 TIER2_DIV = 11 TIER1_DIV = 11 * TIER2_DIV if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0 def handleSignal(*arg): global DONE DONE = 1 def usage(): print "usage info!!" def exceptionString(): import StringIO, traceback ## get the traceback message sfp = StringIO.StringIO() traceback.print_exc(file=sfp) exception = sfp.getvalue() sfp.close() return exception class TransLoc: def __init__ (self, string_id, filename, location): self.string_id = string_id self.filename = filename self.location = location class Translator: _HTML_TAG_RE = None _HTML_TAG_REGEX = '<[^!][^>]*?>' _HTML_CMT_RE = None _HTML_CMT_REGEX = '<!--.*?-->' _CS_TAG_RE = None _CS_TAG_REGEX = '<\\?.+?\\?>' def __init__ (self): self.tdb = db_trans.trans_connect() # configuration data ...... # - we should stop hardcoding this... - jeske self.root = "testroot" self.languages = ['es', 'en'] self.ignore_paths = ['tmpl/m'] # common place for mockups self.ignore_files = ['blah_ignore.cs'] # ignore clearsilver file # ignore clearsilver javascript files self.ignore_patterns = ['tmpl/[^ ]*_js.cs'] # ............................ if self.root is None: raise "Unable to determine installation root" if Translator._HTML_TAG_RE is None: Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL) if Translator._HTML_CMT_RE is None: Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL) if Translator._CS_TAG_RE is None: Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL) self._html_state = 0 def parseHTMLTag(self, data): # this is only called if we see a full tag in one parse... i = 0 if len(data) == 0: return [] if data[0] in '/?': return [] while i < len(data) and data[i] not in ' \n\r\t>': i = i + 1 if i == len(data): return [] tag = data[:i].lower() #print "Searching tag: %s" % data #print "Found tag: %s" % tag results = [] attrfind = re.compile( r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?') k = i attrs = {} attrs_beg = {} while k < len(data): match = attrfind.match(data, k) if not match: break attrname, rest, attrvalue = match.group(1, 2, 3) if not rest: attrvalue = attrname elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ attrvalue[:1] == '"' == attrvalue[-1:]: attrvalue = attrvalue[1:-1] attrname = attrname.lower() if attrs.has_key(attrname): log("Can't handle duplicate attrs: %s" % attrname) attrs[attrname] = attrvalue attrs_beg[attrname] = match.start(3) k = match.end(0) find_l = [] if tag == "input": if attrs.get('type', "").lower() in ["submit", "button"]: find_l.append((attrs.get('value', ''), attrs_beg.get('value', 0))) for s,k in find_l: if s: x = data[k:].find(s) if x != -1: results.append((s, x+k, 1)) return results def parseHTML(self, data, reset=1): if reset: self._html_state = 0 if DEBUG: print "- %d ---------\n%s\n- E ---------" % (self._html_state, data) results = [] i = 0 n = len(data) # if we had state from the last parse... find it if self._html_state: if self._html_state == 2: x = string.find(data[i:], '-->') l = 3 else: x = string.find(data[i:], '>') l = 1 if x == -1: return results i = i + x + l self._html_state = 0 while i < n: if DEBUG: print "MATCHING>%s<MATCHING" % data[i:] cmt_b = string.find(data[i:], '<!--') cmt_e = string.find(data[i:], '-->') tag_b = string.find(data[i:], '<') tag_e = string.find(data[i:], '>') if DEBUG: print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e) if cmt_b != -1 and cmt_b <= tag_b: x = i y = i+cmt_b-1 while x < y and data[x] in string.whitespace: x+=1 while y > x and data[y] in string.whitespace: y-=1 results.append((data[x:y+1], x, 1)) if cmt_e == -1: # partial comment: self._html_state = 2 break i = i + cmt_e + 3 elif tag_b != -1: x = i y = i+tag_b-1 while x < y and data[x] in string.whitespace: x+=1 while y > x and data[y] in string.whitespace: y-=1 results.append((data[x:y+1], x, 1)) if tag_e == -1: # partial tag self._html_state = 1 break h_results = self.parseHTMLTag(data[i+tag_b+1:i+tag_e]) h_results = map(lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results) results = results + h_results i = i + tag_e + 1 else: x = i y = n-1 while x < y and data[x] in string.whitespace: x+=1 while y > x and data[y] in string.whitespace: y-=1 results.append((data[x:y+1], x, 1)) break return results def parseCS(self, data): results = [] i = 0 n = len(data) while i < n: m = Translator._CS_TAG_RE.search(data, i) if not m: # search for a partial... x = string.find(data[i:], '<?') if x == -1: results.append((data[i:], i)) else: results.append((data[i:x], i)) break (b, e) = m.span() if i != b: results.append((data[i:b], i)) i = e t_results = [] self._html_in = 0 for (s, ofs) in results: r = self.parseHTML(s, reset=0) r = map(lambda x: (x[0], x[1] + ofs, x[2]), r) t_results = t_results + r return t_results def descendHDF(self, obj, prefix): results = [] while obj is not None: if obj.value(): attrs = obj.attrs() attrs = map(lambda x: x[0], attrs) if "Lang" in attrs: if prefix: results.append((obj.value(), "%s.%s" % (prefix, obj.name()), 0)) else: results.append((obj.value(), "%s" % (obj.name()), 0)) if obj.child(): if prefix: results = results + self.descendHDF(obj.child(), "%s.%s" % (prefix, obj.name())) else: results = results + self.descendHDF(obj.child(), (obj.name())) obj = obj.next() return results def parseHDF(self, data): # Ok, we handle HDF files specially.. the theory is, we only # extract entire HDF elements which have the attribute Lang hdf = neo_util.HDF() hdf.readString(data, 1) return self.descendHDF(hdf, "") def handleFile(self, file): if file in self.ignore_files: return [] for a_re in self.ignore_patterns: if re.match(a_re,file): return [] fpath = self.root + '/' + file x = string.rfind(file, '.') if x == -1: return [] data = open(fpath, 'r').read() ext = file[x:] strings = [] if ext in ['.cst', '.cs']: strings = self.parseCS(data) elif ext in ['.html', '.htm']: strings = self.parseHTML(data) elif ext in ['.hdf']: strings = self.parseHDF(data) if len(strings): print "Found %d strings in %s" % (len(strings), file) return strings return [] def walkDirectory(self, path): if path in self.ignore_paths: return [] fpath = self.root + '/' + path files = os.listdir(fpath) dirs = [] results = [] for file in files: if file[0] == '.': continue fname = fpath + '/' + file if os.path.isdir(fname): dirs.append(file) else: strings = self.handleFile(path + '/' + file) if len(strings): results.append((path + '/' + file, strings)) for dir in dirs: if dir not in ["release"]: results = results + self.walkDirectory(path + '/' + dir) return results def cleanHtmlString(self, s): s = re.sub("\s+", " ", s) return string.strip(s) def containsWords(self, s, ishtml): if ishtml: s = string.replace(s, ' ', ' ') s = string.replace(s, '"', '"') s = string.replace(s, '©', '') s = string.replace(s, '<', '<') s = string.replace(s, '>', '>') s = string.replace(s, '&', '&') for x in range (len (s)): n = ord(s[x]) if (n>47 and n<58) or (n>64 and n<91) or (n>96 and n<123): return 1 return 0 def findString(self, s): rows = self.tdb.strings.fetchRows( ('string', s) ) if len(rows) == 0: row = self.tdb.strings.newRow() row.string = s row.save() return row.string_id elif len(rows) > 1: raise eTransError, "String %s exists multiple times!" % s else: return rows[0].string_id def loadStrings(self, one_file=None, verbose=0): if one_file is not None: strings = self.handleFile(one_file) results = [(one_file, strings)] else: results = self.walkDirectory('tmpl') uniq = {} cnt = 0 seen_hdf = {} for fname, strings in results: for (s, ofs, ishtml) in strings: if s and string.strip(s): l = len(s) if ishtml: s = self.cleanHtmlString(s) if self.containsWords(s, ishtml): if type(ofs) == type(""): # HDF if seen_hdf.has_key(ofs): if seen_hdf[ofs][0] != s: log("Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s)) else: seen_hdf[ofs] = (s, fname) try: uniq[s].append((fname, ofs, l)) except KeyError: uniq[s] = [(fname, ofs, l)] cnt = cnt + 1 print "%d strings, %d unique" % (cnt, len(uniq.keys())) fp = open("map", 'w') for (s, locs) in uniq.items(): locs = map(lambda x: "%s:%s:%d" % x, locs) fp.write('#: %s\n' % (string.join(locs, ','))) fp.write('msgid=%s\n\n' % repr(s)) log("Loading strings/locations into database") locations = [] for (s, locs) in uniq.items(): s_id = self.findString(s) for (fname, ofs, l) in locs: if type(ofs) == type(""): # ie, its HDF location = "hdf:%s" % ofs else: location = "ofs:%d:%d" % (ofs, l) loc_r = TransLoc(s_id, fname, location) locations.append(loc_r) return locations def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0): hdf = neo_util.HDF() if exist and lang == 'en': return hdf done = {} locations.sort() maps = self.tdb.maps.fetchRows( ('lang', lang) ) maps_d = {} for map in maps: maps_d[int(map.string_id)] = map strings = self.tdb.strings.fetchRows() strings_d = {} for string in strings: strings_d[int(string.string_id)] = string count = 0 for loc in locations: s_id = int(loc.string_id) if done.has_key(s_id): continue try: s_row = maps_d[s_id] if exist: continue except KeyError: try: s_row = strings_d[s_id] except KeyError: log("Missing string_id %d, skipping" % s_id) continue count = count + 1 if tiered: hdf.setValue("%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string) else: hdf.setValue("%s.%s" % (prefix, s_id), s_row.string) done[s_id] = 1 if exist == 1: log("Missing %d strings for lang %s" % (count, lang)) return hdf def dumpStrings(self, locations, lang=None): log("Dumping strings to HDF") if lang is None: langs = ['en'] sql = "select lang from nt_trans_maps group by lang" cursor = self.tdb.defaultCursor() cursor.execute(sql) rows = cursor.fetchall() for row in rows: langs.append(row[0]) else: langs = [lang] for a_lang in langs: hdf = self.stringsHDF('S', locations, a_lang) hdf.writeFile("strings_%s.hdf" % a_lang) for a_lang in langs: hdf = self.stringsHDF('S', locations, a_lang, exist=1) if hdf.child(): hdf.writeFile("strings_missing_%s.hdf" % a_lang) def fetchString(self, s_id, lang): if lang == "hdf": return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id) rows = self.tdb.maps.fetchRows( [('string_id', s_id), ('lang', lang)] ) if len(rows) == 0: try: row = self.tdb.strings.fetchRow( ('string_id', s_id) ) except odb.eNoMatchingRows: log("Unable to find string id %s" % s_id) raise eNoString if lang != 'en': log("Untranslated string for id %s" % s_id) return row.string else: return rows[0].string def dumpFiles(self, locations, lang): log("Dumping files for %s" % lang) files = {} for row in locations: try: files[row.filename].append(row) except KeyError: files[row.filename] = [row] hdf_map = [] os.system("rm -rf %s/gen/tmpl" % (self.root)) for file in files.keys(): fname = "%s/gen/%s" % (self.root, file) try: os.makedirs(os.path.dirname(fname)) except OSError, reason: if reason[0] != 17: raise do_hdf = 0 x = string.rfind(file, '.') if x != -1 and file[x:] == '.hdf': do_hdf = 1 ofs = [] for loc in files[file]: parts = string.split(loc.location, ':') if len(parts) == 3 and parts[0] == 'ofs' and do_hdf == 0: ofs.append((int(parts[1]), int(parts[2]), loc.string_id)) elif len(parts) == 2 and parts[0] == 'hdf' and do_hdf == 1: hdf_map.append((parts[1], loc.string_id)) else: log("Invalid location for loc_id %s" % loc.loc_id) continue if not do_hdf: ofs.sort() data = open(self.root + '/' + file).read() # ok, now we split up the original data into sections x = 0 n = len(data) out = [] #sys.stderr.write("%s\n" % repr(ofs)) while len(ofs): if ofs[0][0] > x: out.append(data[x:ofs[0][0]]) x = ofs[0][0] elif ofs[0][0] == x: out.append(self.fetchString(ofs[0][2], lang)) x = ofs[0][0] + ofs[0][1] ofs = ofs[1:] else: log("How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2])) log("Data[x:20]: %s" % data[x:20]) log("Data[ofs:20]: %s" % data[ofs[0][0]:20]) break if n > x: out.append(data[x:]) odata = string.join(out, '') open(fname, 'w').write(odata) if lang == "hdf": langs = self.languages else: langs = [lang] for d_lang in langs: # dumping the extracted strings hdf = self.stringsHDF('Lang.Extracted', locations, d_lang, tiered=1) fname = "%s/gen/tmpl/lang_%s.hdf" % (self.root, d_lang) hdf.writeFile(fname) data = open(fname).read() fp = open(fname, 'w') fp.write('## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n') fp.write(data) fp.write('\n#include "lang_map.hdf"\n') # dumping the hdf strings file if d_lang == "en": map_file = "%s/gen/tmpl/lang_map.hdf" % (self.root) else: map_file = "%s/gen/tmpl/%s/lang_map.hdf" % (self.root, d_lang) try: os.makedirs(os.path.dirname(map_file)) except OSError, reason: if reason[0] != 17: raise map_hdf = neo_util.HDF() for (name, s_id) in hdf_map: str = hdf.getValue('Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), '') map_hdf.setValue(name, str) map_hdf.writeFile(map_file) def loadMap(self, file, prefix, lang): log("Loading map for language %s" % lang) hdf = neo_util.HDF() hdf.readFile(file) obj = hdf.getChild(prefix) updates = 0 new_r = 0 while obj is not None: s_id = obj.name() str = obj.value() try: map_r = self.tdb.maps.fetchRow( [('string_id', s_id), ('lang', lang)]) except odb.eNoMatchingRows: map_r = self.tdb.maps.newRow() map_r.string_id = s_id map_r.lang = lang new_r = new_r + 1 if map_r.string != str: updates = updates + 1 map_r.string = str map_r.save() obj = obj.next() log("New maps: %d Updates: %d" % (new_r, updates - new_r)) def main(argv): alist, args = getopt.getopt(argv[1:], "f:v:", ["help", "load=", "lang="]) one_file = None verbose = 0 load_file = None lang = 'en' for (field, val) in alist: if field == "--help": usage(argv[0]) return -1 if field == "-f": one_file = val if field == "-v": verbose = int(val) if field == "--load": load_file = val if field == "--lang": lang = val global DONE #signal.signal(signal.SIGTERM, handleSignal) #signal.signal(signal.SIGINT, handleSignal) log("trans: start") start_time = time.time() try: t = Translator() if load_file: t.loadMap(load_file, 'S', lang) else: locations = t.loadStrings(one_file, verbose=verbose) t.dumpStrings(locations) t.dumpFiles(locations, 'hdf') except KeyboardInterrupt: pass except: import handle_error handle_error.handleException("Translation Error") if __name__ == "__main__": main(sys.argv)