#! /usr/bin/python # Copyright (C) 2009-2012, International Business Machines Corporation, Google and Others. # All rights reserved. # # Script to check and fix svn property settings for CLDR source files. # This script is a modified version of ICU's icu-svnprops-check.py. # Also check for the correct line endings on files with svn:eol-style = native # # THIS SCRIPT DOES NOT WORK ON WINDOWS # It only works correctly on platforms where the native line ending is a plain \n # # usage: # cldr-svnprops-check.py [options] # # options: # -f | --fix Fix any problems that are found # -h | --help Print a usage line and exit. # # The tool operates recursively on the directory from which it is run. # Only files from the svn repository are checked. # No changes are made to the repository; only the working copy will be altered. import sys import os import os.path import re import getopt # # svn autoprops definitions. # Copy and paste here the ICU recommended auto-props from # http://icu-project.org/docs/subversion_howto/index.html # # This program will parse this autoprops string, and verify that files in # the repository have the recommeded properties set. # svn_auto_props = """ ### Section for configuring automatic properties. [auto-props] ### The format of the entries is: ### file-name-pattern = propname[=value][;propname[=value]...] ### The file-name-pattern can contain wildcards (such as '*' and ### '?'). All entries which match will be applied to the file. ### Note that auto-props functionality must be enabled, which ### is typically done by setting the 'enable-auto-props' option. *.c = svn:eol-style=native *.cc = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native *.rc = svn:eol-style=native *.dsp = svn:eol-style=native *.dsw = svn:eol-style=native *.sln = svn:eol-style=native *.vcproj = svn:eol-style=native configure = svn:eol-style=native;svn:executable *.sh = svn:eol-style=native;svn:executable *.pl = svn:eol-style=native;svn:executable *.py = svn:eol-style=native;svn:executable *.txt = svn:mime-type=text/plain;svn:eol-style=native *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 *.ucm = svn:eol-style=native *.html = svn:eol-style=native;svn:mime-type=text/html *.htm = svn:eol-style=native;svn:mime-type=text/html *.xml = svn:eol-style=native Makefile = svn:eol-style=native *.in = svn:eol-style=native *.mak = svn:eol-style=native *.mk = svn:eol-style=native *.png = svn:mime-type=image/png *.jpeg = svn:mime-type=image/jpeg *.jpg = svn:mime-type=image/jpeg *.bin = svn:mime-type=application/octet-stream *.brk = svn:mime-type=application/octet-stream *.cnv = svn:mime-type=application/octet-stream *.dat = svn:mime-type=application/octet-stream *.icu = svn:mime-type=application/octet-stream *.res = svn:mime-type=application/octet-stream *.spp = svn:mime-type=application/octet-stream # new additions 2007-dec-5 srl *.rtf = mime-type=text/rtf *.pdf = mime-type=application/pdf # changed 2008-04-08: modified .txt, above, adding mime-type # changed 2010-11-09: modified .java, adding mime-type # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 """ # file_types: The parsed form of the svn auto-props specification. # A list of file types - .cc, .cpp, .txt, etc. # each element is a [type, proplist] # "type" is a regular expression string that will match a file name # prop list is another list, one element per property. # Each property item is a two element list, [prop name, prop value] file_types = list() def parse_auto_props(): aprops = svn_auto_props.splitlines() for propline in aprops: if re.match("\s*(#.*)?$", propline): # Match comment and blank lines continue if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. continue if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = print "Bad line from autoprops definitions: " + propline continue file_type, string_proplist = propline.split("=", 1) #transform the file type expression from autoprops into a normal regular expression. # e.g. "*.cpp" ==> ".*\.cpp$" file_type = file_type.strip() file_type = file_type.replace(".", "\.") file_type = file_type.replace("*", ".*") file_type = file_type + "$" # example string_proplist at this point: " svn:eol-style=native;svn:executable" # split on ';' into a list of properties. The negative lookahead and lookbehind # in the split regexp are to prevent matching on ';;', which is an escaped ';' # within a property value. string_proplist = re.split("(?<!;);(?!;)", string_proplist) proplist = list() for prop in string_proplist: if prop.find("=") >= 0: prop_name, prop_val = prop.split("=", 1) else: # properties with no explicit value, e.g. svn:executable prop_name, prop_val = prop, "" prop_name = prop_name.strip() prop_val = prop_val.strip() # unescape any ";;" in a property value, e.g. the mime-type from # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 prop_val = prop_val.replace(";;", ";"); proplist.append((prop_name, prop_val)) file_types.append((file_type, proplist)) # print file_types def runCommand(cmd): output_file = os.popen(cmd); output_text = output_file.read(); exit_status = output_file.close(); if exit_status: print >>sys.stderr, '"', cmd, '" failed. Exiting.' sys.exit(exit_status) return output_text def usage(): print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" # # UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 # file_name: name of a text file. # base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) # actual_mime_type: existing svn:mime-type property value for the file. # return: svn:mime-type property value, with charset added when appropriate. # def check_utf8(file_name, base_mime_type, actual_mime_type): # If the file already has a charset in its mime-type, don't make any change. if actual_mime_type.find("charset=") > 0: return actual_mime_type; f = open(file_name, 'r') bytes = f.read() f.close() if all(ord(byte) < 128 for byte in bytes): # pure ASCII. # print "Pure ASCII " + file_name return base_mime_type try: bytes.decode("UTF-8") except UnicodeDecodeError: print "warning: %s: not ASCII, not UTF-8" % file_name return base_mime_type if ord(bytes[0]) == 0xef: print "UTF-8 file with BOM: " + file_name # Append charset=utf-8. return base_mime_type + ';charset=utf-8' def main(argv): fix_problems = False; try: opts, args = getopt.getopt(argv, "fh", ("fix", "help")) except getopt.GetoptError: print "unrecognized option: " + argv[0] usage() sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit() if opt in ("-f", "--fix"): fix_problems = True if args: print "unexpected command line argument" usage() sys.exit() parse_auto_props() output = runCommand("svn ls -R "); file_list = output.splitlines() for f in file_list: if os.path.isdir(f): # print "Skipping dir " + f continue if not os.path.isfile(f): print "Repository file not in working copy: " + f continue; for file_pattern, props in file_types: if re.match(file_pattern, f): # print "doing " + f for propname, propval in props: actual_propval = runCommand("svn propget --strict " + propname + " '" + f + "'") #print propname + ": " + actual_propval if propname == "svn:mime-type" and propval.find("text/") == 0: # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 propval = check_utf8(f, propval, actual_propval) if not (propval == actual_propval or (propval == "" and actual_propval == "*")): print "svn propset %s '%s' '%s'" % (propname, propval, f) if fix_problems: os.system("svn propset %s '%s' '%s'" % (propname, propval, f)) if propname == "svn:eol-style" and propval == "native": if os.system("grep -q -v \r '" + f + "'"): if fix_problems: print f + ": Removing DOS CR characters." os.system("sed -i s/\r// '" + f + "'"); else: print f + " contains DOS CR characters." if __name__ == "__main__": main(sys.argv[1:])