#!/usr/bin/python

# Copyright (c) 2008 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

# Usage: strip_save_dsym <whatever-arguments-you-would-pass-to-strip>
#
# strip_save_dsym is a wrapper around the standard strip utility.  Given an
# input Mach-O file, strip_save_dsym will save a copy of the file in a "fake"
# .dSYM bundle for debugging, and then call strip to strip the Mach-O file.
# Note that the .dSYM file is a "fake" in that it's not a self-contained
# .dSYM bundle, it just contains a copy of the original (unstripped) Mach-O
# file, and therefore contains references to object files on the filesystem.
# The generated .dSYM bundle is therefore unsuitable for debugging in the
# absence of these .o files.
#
# If a .dSYM already exists and has a newer timestamp than the Mach-O file,
# this utility does nothing.  That allows strip_save_dsym to be run on a file
# that has already been stripped without trashing the .dSYM.
#
# Rationale: the "right" way to generate dSYM bundles, dsymutil, is incredibly
# slow.  On the other hand, doing a file copy (which is really all that
# dsymutil does) is comparatively fast.  Since we usually just want to strip
# a release-mode executable but still be able to debug it, and we don't care
# so much about generating a hermetic dSYM bundle, we'll prefer the file copy.
# If a real dSYM is ever needed, it's still possible to create one by running
# dsymutil and pointing it at the original Mach-O file inside the "fake"
# bundle, provided that the object files are available.

import errno
import os
import re
import shutil
import subprocess
import sys
import time

# Returns a list of architectures contained in a Mach-O file.  The file can be
# a universal (fat) file, in which case there will be one list element for
# each contained architecture, or it can be a thin single-architecture Mach-O
# file, in which case the list will contain a single element identifying the
# architecture.  On error, returns an empty list.  Determines the architecture
# list by calling file.
def macho_archs(macho):
  macho_types = ["executable",
                 "dynamically linked shared library",
                 "bundle"]
  macho_types_re = "Mach-O (?:64-bit )?(?:" + "|".join(macho_types) + ")"

  file_cmd = subprocess.Popen(["/usr/bin/file", "-b", "--", macho],
                              stdout=subprocess.PIPE)

  archs = []

  type_line = file_cmd.stdout.readline()
  type_match = re.match("^%s (.*)$" % macho_types_re, type_line)
  if type_match:
    archs.append(type_match.group(1))
    return [type_match.group(1)]
  else:
    type_match = re.match("^Mach-O universal binary with (.*) architectures$",
                          type_line)
    if type_match:
      for i in range(0, int(type_match.group(1))):
        arch_line = file_cmd.stdout.readline()
        arch_match = re.match(
                     "^.* \(for architecture (.*)\):\t%s .*$" % macho_types_re,
                     arch_line)
        if arch_match:
          archs.append(arch_match.group(1))

  if file_cmd.wait() != 0:
    archs = []

  if len(archs) == 0:
    print >> sys.stderr, "No architectures in %s" % macho

  return archs

# Returns a dictionary mapping architectures contained in the file as returned
# by macho_archs to the LC_UUID load command for that architecture.
# Architectures with no LC_UUID load command are omitted from the dictionary.
# Determines the UUID value by calling otool.
def macho_uuids(macho):
  uuids = {}

  archs = macho_archs(macho)
  if len(archs) == 0:
    return uuids

  for arch in archs:
    if arch == "":
      continue

    otool_cmd = subprocess.Popen(["/usr/bin/otool", "-arch", arch, "-l", "-",
                                  macho],
                                 stdout=subprocess.PIPE)
    # state 0 is when nothing UUID-related has been seen yet.  State 1 is
    # entered after a load command begins, but it may not be an LC_UUID load
    # command.  States 2, 3, and 4 are intermediate states while reading an
    # LC_UUID command.  State 5 is the terminal state for a successful LC_UUID
    # read.  State 6 is the error state.
    state = 0
    uuid = ""
    for otool_line in otool_cmd.stdout:
      if state == 0:
        if re.match("^Load command .*$", otool_line):
          state = 1
      elif state == 1:
        if re.match("^     cmd LC_UUID$", otool_line):
          state = 2
        else:
          state = 0
      elif state == 2:
        if re.match("^ cmdsize 24$", otool_line):
          state = 3
        else:
          state = 6
      elif state == 3:
        # The UUID display format changed in the version of otool shipping
        # with the Xcode 3.2.2 prerelease.  The new format is traditional:
        #    uuid 4D7135B2-9C56-C5F5-5F49-A994258E0955
        # and with Xcode 3.2.6, then line is indented one more space:
        #     uuid 4D7135B2-9C56-C5F5-5F49-A994258E0955
        # The old format, from cctools-750 and older's otool, breaks the UUID
        # up into a sequence of bytes:
        #    uuid 0x4d 0x71 0x35 0xb2 0x9c 0x56 0xc5 0xf5
        #         0x5f 0x49 0xa9 0x94 0x25 0x8e 0x09 0x55
        new_uuid_match = re.match("^ {3,4}uuid (.{8}-.{4}-.{4}-.{4}-.{12})$",
                                  otool_line)
        if new_uuid_match:
          uuid = new_uuid_match.group(1)

          # Skip state 4, there is no second line to read.
          state = 5
        else:
          old_uuid_match = re.match("^   uuid 0x(..) 0x(..) 0x(..) 0x(..) "
                                    "0x(..) 0x(..) 0x(..) 0x(..)$",
                                    otool_line)
          if old_uuid_match:
            state = 4
            uuid = old_uuid_match.group(1) + old_uuid_match.group(2) + \
                   old_uuid_match.group(3) + old_uuid_match.group(4) + "-" + \
                   old_uuid_match.group(5) + old_uuid_match.group(6) + "-" + \
                   old_uuid_match.group(7) + old_uuid_match.group(8) + "-"
          else:
            state = 6
      elif state == 4:
        old_uuid_match = re.match("^        0x(..) 0x(..) 0x(..) 0x(..) "
                                  "0x(..) 0x(..) 0x(..) 0x(..)$",
                                  otool_line)
        if old_uuid_match:
          state = 5
          uuid += old_uuid_match.group(1) + old_uuid_match.group(2) + "-" + \
                  old_uuid_match.group(3) + old_uuid_match.group(4) + \
                  old_uuid_match.group(5) + old_uuid_match.group(6) + \
                  old_uuid_match.group(7) + old_uuid_match.group(8)
        else:
          state = 6

    if otool_cmd.wait() != 0:
      state = 6

    if state == 5:
      uuids[arch] = uuid.upper()

  if len(uuids) == 0:
    print >> sys.stderr, "No UUIDs in %s" % macho

  return uuids

# Given a path to a Mach-O file and possible information from the environment,
# determines the desired path to the .dSYM.
def dsym_path(macho):
  # If building a bundle, the .dSYM should be placed next to the bundle.  Use
  # WRAPPER_NAME to make this determination.  If called from xcodebuild,
  # WRAPPER_NAME will be set to the name of the bundle.
  dsym = ""
  if "WRAPPER_NAME" in os.environ:
    if "BUILT_PRODUCTS_DIR" in os.environ:
      dsym = os.path.join(os.environ["BUILT_PRODUCTS_DIR"],
                          os.environ["WRAPPER_NAME"])
    else:
      dsym = os.environ["WRAPPER_NAME"]
  else:
    dsym = macho

  dsym += ".dSYM"

  return dsym

# Creates a fake .dSYM bundle at dsym for macho, a Mach-O image with the
# architectures and UUIDs specified by the uuids map.
def make_fake_dsym(macho, dsym):
  uuids = macho_uuids(macho)
  if len(uuids) == 0:
    return False

  dwarf_dir = os.path.join(dsym, "Contents", "Resources", "DWARF")
  dwarf_file = os.path.join(dwarf_dir, os.path.basename(macho))
  try:
    os.makedirs(dwarf_dir)
  except OSError, (err, error_string):
    if err != errno.EEXIST:
      raise
  shutil.copyfile(macho, dwarf_file)

  # info_template is the same as what dsymutil would have written, with the
  # addition of the fake_dsym key.
  info_template = \
'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
	<dict>
		<key>CFBundleDevelopmentRegion</key>
		<string>English</string>
		<key>CFBundleIdentifier</key>
		<string>com.apple.xcode.dsym.%(root_name)s</string>
		<key>CFBundleInfoDictionaryVersion</key>
		<string>6.0</string>
		<key>CFBundlePackageType</key>
		<string>dSYM</string>
		<key>CFBundleSignature</key>
		<string>????</string>
		<key>CFBundleShortVersionString</key>
		<string>1.0</string>
		<key>CFBundleVersion</key>
		<string>1</string>
		<key>dSYM_UUID</key>
		<dict>
%(uuid_dict)s		</dict>
		<key>fake_dsym</key>
		<true/>
	</dict>
</plist>
'''

  root_name = os.path.basename(dsym)[:-5]  # whatever.dSYM without .dSYM
  uuid_dict = ""
  for arch in sorted(uuids):
    uuid_dict += "\t\t\t<key>" + arch + "</key>\n"\
                 "\t\t\t<string>" + uuids[arch] + "</string>\n"
  info_dict = {
    "root_name": root_name,
    "uuid_dict": uuid_dict,
  }
  info_contents = info_template % info_dict
  info_file = os.path.join(dsym, "Contents", "Info.plist")
  info_fd = open(info_file, "w")
  info_fd.write(info_contents)
  info_fd.close()

  return True

# For a Mach-O file, determines where the .dSYM bundle should be located.  If
# the bundle does not exist or has a modification time older than the Mach-O
# file, calls make_fake_dsym to create a fake .dSYM bundle there, then strips
# the Mach-O file and sets the modification time on the .dSYM bundle and Mach-O
# file to be identical.
def strip_and_make_fake_dsym(macho):
  dsym = dsym_path(macho)
  macho_stat = os.stat(macho)
  dsym_stat = None
  try:
    dsym_stat = os.stat(dsym)
  except OSError, (err, error_string):
    if err != errno.ENOENT:
      raise

  if dsym_stat is None or dsym_stat.st_mtime < macho_stat.st_mtime:
    # Make a .dSYM bundle
    if not make_fake_dsym(macho, dsym):
      return False

    # Strip the Mach-O file
    remove_dsym = True
    try:
      strip_path = ""
      if "SYSTEM_DEVELOPER_BIN_DIR" in os.environ:
        strip_path = os.environ["SYSTEM_DEVELOPER_BIN_DIR"]
      else:
        strip_path = "/usr/bin"
      strip_path = os.path.join(strip_path, "strip")
      strip_cmdline = [strip_path] + sys.argv[1:]
      # Print the strip invocation so that it's obvious something is happening
      print " ".join(strip_cmdline)
      strip_cmd = subprocess.Popen(strip_cmdline)
      if strip_cmd.wait() == 0:
        remove_dsym = False
    finally:
      if remove_dsym:
        shutil.rmtree(dsym)

    # Update modification time on the Mach-O file and .dSYM bundle
    now = time.time()
    os.utime(macho, (now, now))
    os.utime(dsym, (now, now))

  return True

def main(argv=None):
  if argv is None:
    argv = sys.argv

  # This only supports operating on one file at a time.  Look at the arguments
  # to strip to figure out what the source to be stripped is.  Arguments are
  # processed in the same way that strip does, although to reduce complexity,
  # this doesn't do all of the same checking as strip.  For example, strip
  # has no -Z switch and would treat -Z on the command line as an error.  For
  # the purposes this is needed for, that's fine.
  macho = None
  process_switches = True
  ignore_argument = False
  for arg in argv[1:]:
    if ignore_argument:
      ignore_argument = False
      continue
    if process_switches:
      if arg == "-":
        process_switches = False
      # strip has these switches accept an argument:
      if arg in ["-s", "-R", "-d", "-o", "-arch"]:
        ignore_argument = True
      if arg[0] == "-":
        continue
    if macho is None:
      macho = arg
    else:
      print >> sys.stderr, "Too many things to strip"
      return 1

  if macho is None:
    print >> sys.stderr, "Nothing to strip"
    return 1

  if not strip_and_make_fake_dsym(macho):
    return 1

  return 0

if __name__ == "__main__":
  sys.exit(main(sys.argv))