普通文本  |  682行  |  20.17 KB

// Copyright (c) 2010 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Author: Alfred Peng

#include <demangle.h>
#include <fcntl.h>
#include <gelf.h>
#include <link.h>
#include <sys/mman.h>
#include <stab.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <functional>
#include <map>
#include <vector>

#include "common/scoped_ptr.h"
#include "common/solaris/dump_symbols.h"
#include "common/solaris/file_id.h"
#include "common/solaris/guid_creator.h"

// This namespace contains helper functions.
namespace {

using std::make_pair;

#if defined(_LP64)
typedef Elf64_Sym   Elf_Sym;
#else
typedef Elf32_Sym   Elf_Sym;
#endif

// Symbol table entry from stabs. Sun CC specific.
struct slist {
  // String table index.
  unsigned int n_strx;
  // Stab type. 
  unsigned char n_type;
  char n_other;
  short n_desc;
  unsigned long n_value;
};

// Symbol table entry
struct SymbolEntry {
  // Offset from the start of the file.
  GElf_Addr offset;
  // Function size.
  GElf_Word size;
};

// Infomation of a line.
struct LineInfo {
  // Offset from start of the function.
  // Load from stab symbol.
  GElf_Off rva_to_func;
  // Offset from base of the loading binary.
  GElf_Off rva_to_base;
  // Size of the line.
  // The first line: equals to rva_to_func.
  // The other lines: the difference of rva_to_func of the line and
  // rva_to_func of the previous N_SLINE.
  uint32_t size;
  // Line number.
  uint32_t line_num;
};

// Information of a function.
struct FuncInfo {
  // Name of the function.
  const char *name;
  // Offset from the base of the loading address.
  GElf_Off rva_to_base;
  // Virtual address of the function.
  // Load from stab symbol.
  GElf_Addr addr;
  // Size of the function.
  // Equal to rva_to_func of the last function line.
  uint32_t size;
  // Total size of stack parameters.
  uint32_t stack_param_size;
  // Line information array.
  std::vector<struct LineInfo> line_info;
};

// Information of a source file.
struct SourceFileInfo {
  // Name of the source file.
  const char *name;
  // Starting address of the source file.
  GElf_Addr addr;
  // Id of the source file.
  int source_id;
  // Functions information.
  std::vector<struct FuncInfo> func_info;
};

struct CompareString {
  bool operator()(const char *s1, const char *s2) const {
    return strcmp(s1, s2) < 0;
  }
};

typedef std::map<const char *, struct SymbolEntry *, CompareString> SymbolMap;

// Information of a symbol table.
// This is the root of all types of symbol.
struct SymbolInfo {
  std::vector<struct SourceFileInfo> source_file_info;
  // Symbols information.
  SymbolMap symbol_entries;
};

// Stab section name.
const char *kStabName = ".stab";

// Stab str section name.
const char *kStabStrName = ".stabstr";

// Symtab section name.
const char *kSymtabName = ".symtab";

// Strtab section name.
const char *kStrtabName = ".strtab";

// Default buffer lenght for demangle.
const int demangleLen = 20000;

// Offset to the string table.
uint64_t stringOffset = 0;

// Update the offset to the start of the string index of the next
// object module for every N_ENDM stabs.
inline void RecalculateOffset(struct slist* cur_list, char *stabstr) {
  while ((--cur_list)->n_strx == 0) ;
  stringOffset += cur_list->n_strx;

  char *temp = stabstr + stringOffset;
  while (*temp != '\0') {
    ++stringOffset;
    ++temp;
  }
  // Skip the extra '\0'
  ++stringOffset;
}

// Demangle using demangle library on Solaris.
std::string Demangle(const char *mangled) {
  int status = 0;
  std::string str(mangled);
  char *demangled = (char *)malloc(demangleLen);

  if (!demangled) {
    fprintf(stderr, "no enough memory.\n");
    goto out;
  }

  if ((status = cplus_demangle(mangled, demangled, demangleLen)) ==
      DEMANGLE_ESPACE) {
    fprintf(stderr, "incorrect demangle.\n");
    goto out;
  }

  str = demangled;
  free(demangled);

out:
  return str; 
}

bool WriteFormat(int fd, const char *fmt, ...) {
  va_list list;
  char buffer[4096];
  ssize_t expected, written;
  va_start(list, fmt);
  vsnprintf(buffer, sizeof(buffer), fmt, list);
  expected = strlen(buffer);
  written = write(fd, buffer, strlen(buffer));
  va_end(list);
  return expected == written;
}

bool IsValidElf(const GElf_Ehdr *elf_header) {
  return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
}

static bool FindSectionByName(Elf *elf, const char *name,
                              int shstrndx,
                              GElf_Shdr *shdr) {
  assert(name != NULL);

  if (strlen(name) == 0)
    return false;

  Elf_Scn *scn = NULL;

  while ((scn = elf_nextscn(elf, scn)) != NULL) {
    if (gelf_getshdr(scn, shdr) == (GElf_Shdr *)0) {
      fprintf(stderr, "failed to read section header: %s\n", elf_errmsg(0));
      return false;
    }

    const char *section_name = elf_strptr(elf, shstrndx, shdr->sh_name);
    if (!section_name) {
      fprintf(stderr, "Section name error: %s\n", elf_errmsg(-1));
      continue;
    }

    if (strcmp(section_name, name) == 0)
      return true;
  }

  return false;
}

// The parameter size is used for FPO-optimized code, and
// this is all tied up with the debugging data for Windows x86.
// Set it to 0 on Solaris.
int LoadStackParamSize(struct slist *list,
                       struct slist *list_end,
                       struct FuncInfo *func_info) {
  struct slist *cur_list = list;
  int step = 1;
  while (cur_list < list_end && cur_list->n_type == N_PSYM) {
    ++cur_list;
    ++step;
  }

  func_info->stack_param_size = 0;
  return step;
}

int LoadLineInfo(struct slist *list,
                 struct slist *list_end,
                 struct FuncInfo *func_info) {
  struct slist *cur_list = list;
  do {
    // Skip non line information.
    while (cur_list < list_end && cur_list->n_type != N_SLINE) {
      // Only exit when got another function, or source file, or end stab.
      if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO ||
          cur_list->n_type == N_ENDM) {
        return cur_list - list;
      }
      ++cur_list;
    }
    struct LineInfo line;
    while (cur_list < list_end && cur_list->n_type == N_SLINE) {
      line.rva_to_func = cur_list->n_value;
      // n_desc is a signed short
      line.line_num = (unsigned short)cur_list->n_desc;
      func_info->line_info.push_back(line);
      ++cur_list;
    }
    if (cur_list == list_end && cur_list->n_type == N_ENDM)
      break;
  } while (list < list_end);

  return cur_list - list;
}

int LoadFuncSymbols(struct slist *list,
                    struct slist *list_end,
                    char *stabstr,
                    GElf_Word base,
                    struct SourceFileInfo *source_file_info) {
  struct slist *cur_list = list;
  assert(cur_list->n_type == N_SO);
  ++cur_list;

  source_file_info->func_info.clear();
  while (cur_list < list_end) {
    // Go until the function symbol.
    while (cur_list < list_end && cur_list->n_type != N_FUN) {
      if (cur_list->n_type == N_SO) {
        return cur_list - list;
      }
      ++cur_list;
      if (cur_list->n_type == N_ENDM)
        RecalculateOffset(cur_list, stabstr);
      continue;
    }
    while (cur_list->n_type == N_FUN) {
      struct FuncInfo func_info;
      memset(&func_info, 0, sizeof(func_info));
      func_info.name = stabstr + cur_list->n_strx + stringOffset;
      // The n_value field is always 0 from stab generated by Sun CC.
      // TODO(Alfred): Find the correct value.
      func_info.addr = cur_list->n_value;
      ++cur_list;
      if (cur_list->n_type == N_ENDM)
        RecalculateOffset(cur_list, stabstr);
      if (cur_list->n_type != N_ESYM && cur_list->n_type != N_ISYM &&
          cur_list->n_type != N_FUN) {
        // Stack parameter size.
        cur_list += LoadStackParamSize(cur_list, list_end, &func_info);
        // Line info.
        cur_list += LoadLineInfo(cur_list, list_end, &func_info);
      }
      if (cur_list < list_end && cur_list->n_type == N_ENDM)
        RecalculateOffset(cur_list, stabstr);
      // Functions in this module should have address bigger than the module
      // starting address.
      //
      // These two values are always 0 with Sun CC.
      // TODO(Alfred): Get the correct value or remove the condition statement.
      if (func_info.addr >= source_file_info->addr) {
        source_file_info->func_info.push_back(func_info);
      }
    }
  }
  return cur_list - list;
}

// Compute size and rva information based on symbols loaded from stab section.
bool ComputeSizeAndRVA(struct SymbolInfo *symbols) {
  std::vector<struct SourceFileInfo> *sorted_files =
    &(symbols->source_file_info);
  SymbolMap *symbol_entries = &(symbols->symbol_entries);
  for (size_t i = 0; i < sorted_files->size(); ++i) {
    struct SourceFileInfo &source_file = (*sorted_files)[i];
    std::vector<struct FuncInfo> *sorted_functions = &(source_file.func_info);
    int func_size = sorted_functions->size();

    for (size_t j = 0; j < func_size; ++j) {
      struct FuncInfo &func_info = (*sorted_functions)[j];
      int line_count = func_info.line_info.size();

      // Discard the ending part of the name.
      std::string func_name(func_info.name);
      std::string::size_type last_colon = func_name.find_first_of(':');
      if (last_colon != std::string::npos)
        func_name = func_name.substr(0, last_colon);

      // Fine the symbol offset from the loading address and size by name.
      SymbolMap::const_iterator it = symbol_entries->find(func_name.c_str());
      if (it->second) {
        func_info.rva_to_base = it->second->offset;
        func_info.size = (line_count == 0) ? 0 : it->second->size;
      } else {
        func_info.rva_to_base = 0;
        func_info.size = 0;
      }

      // Compute function and line size.
      for (size_t k = 0; k < line_count; ++k) {
        struct LineInfo &line_info = func_info.line_info[k];

        line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base;
        if (k == line_count - 1) {
          line_info.size = func_info.size - line_info.rva_to_func;
        } else {
          struct LineInfo &next_line = func_info.line_info[k + 1];
          line_info.size = next_line.rva_to_func - line_info.rva_to_func;
        }
      }  // for each line.
    }  // for each function.
  }  // for each source file.
  for (SymbolMap::iterator it = symbol_entries->begin();
       it != symbol_entries->end(); ++it) {
    free(it->second);
  }
  return true;
}

bool LoadAllSymbols(const GElf_Shdr *stab_section,
                    const GElf_Shdr *stabstr_section,
                    GElf_Word base,
                    struct SymbolInfo *symbols) {
  if (stab_section == NULL || stabstr_section == NULL)
    return false;

  char *stabstr = 
    reinterpret_cast<char *>(stabstr_section->sh_offset + base);
  struct slist *lists =
    reinterpret_cast<struct slist *>(stab_section->sh_offset + base);
  int nstab = stab_section->sh_size / sizeof(struct slist);
  int source_id = 0;

  // First pass, load all symbols from the object file.
  for (int i = 0; i < nstab; ) {
    int step = 1;
    struct slist *cur_list = lists + i;
    if (cur_list->n_type == N_SO) {
      // FUNC <address> <size> <param_stack_size> <function>
      struct SourceFileInfo source_file_info;
      source_file_info.name = stabstr + cur_list->n_strx + stringOffset;
      // The n_value field is always 0 from stab generated by Sun CC.
      // TODO(Alfred): Find the correct value.
      source_file_info.addr = cur_list->n_value;
      if (strchr(source_file_info.name, '.'))
        source_file_info.source_id = source_id++;
      else
        source_file_info.source_id = -1;
      step = LoadFuncSymbols(cur_list, lists + nstab - 1, stabstr,
                             base, &source_file_info);
      symbols->source_file_info.push_back(source_file_info);
    }
    i += step;
  }
  // Second pass, compute the size of functions and lines.
  return ComputeSizeAndRVA(symbols);
}

bool LoadSymbols(Elf *elf, GElf_Ehdr *elf_header, struct SymbolInfo *symbols,
                 void *obj_base) {
  GElf_Word base = reinterpret_cast<GElf_Word>(obj_base);

  const GElf_Shdr *sections =
    reinterpret_cast<GElf_Shdr *>(elf_header->e_shoff + base);
  GElf_Shdr stab_section;
  if (!FindSectionByName(elf, kStabName, elf_header->e_shstrndx,
                         &stab_section)) {
    fprintf(stderr, "Stab section not found.\n");
    return false;
  }
  GElf_Shdr stabstr_section;
  if (!FindSectionByName(elf, kStabStrName, elf_header->e_shstrndx,
                         &stabstr_section)) {
    fprintf(stderr, "Stabstr section not found.\n");
    return false;
  }
  GElf_Shdr symtab_section;
  if (!FindSectionByName(elf, kSymtabName, elf_header->e_shstrndx,
                         &symtab_section)) {
    fprintf(stderr, "Symtab section not found.\n");
    return false;
  }
  GElf_Shdr strtab_section;
  if (!FindSectionByName(elf, kStrtabName, elf_header->e_shstrndx,
                         &strtab_section)) {
    fprintf(stderr, "Strtab section not found.\n");
    return false;
  }

  Elf_Sym *symbol = (Elf_Sym *)((char *)base + symtab_section.sh_offset);
  for (int i = 0; i < symtab_section.sh_size/symtab_section.sh_entsize; ++i) {
    struct SymbolEntry *symbol_entry =
        (struct SymbolEntry *)malloc(sizeof(struct SymbolEntry));
    const char *name = reinterpret_cast<char *>(
        strtab_section.sh_offset + (GElf_Word)base + symbol->st_name);
    symbol_entry->offset = symbol->st_value;
    symbol_entry->size = symbol->st_size;
    symbols->symbol_entries.insert(make_pair(name, symbol_entry));
    ++symbol;
  }


  // Load symbols.
  return LoadAllSymbols(&stab_section, &stabstr_section, base, symbols);
}

bool WriteModuleInfo(int fd, GElf_Half arch, const std::string &obj_file) {
  const char *arch_name = NULL;
  if (arch == EM_386)
    arch_name = "x86";
  else if (arch == EM_X86_64)
    arch_name = "x86_64";
  else if (arch == EM_SPARC32PLUS)
    arch_name = "SPARC_32+";
  else {
    printf("Please add more ARCH support\n");
    return false;
  }

  unsigned char identifier[16];
  google_breakpad::FileID file_id(obj_file.c_str());
  if (file_id.ElfFileIdentifier(identifier)) {
    char identifier_str[40];
    file_id.ConvertIdentifierToString(identifier,
                                      identifier_str, sizeof(identifier_str));
    std::string filename = obj_file;
    size_t slash_pos = obj_file.find_last_of("/");
    if (slash_pos != std::string::npos)
      filename = obj_file.substr(slash_pos + 1);
    return WriteFormat(fd, "MODULE solaris %s %s %s\n", arch_name,
                       identifier_str, filename.c_str());
  }
  return false;
}

bool WriteSourceFileInfo(int fd, const struct SymbolInfo &symbols) {
  for (size_t i = 0; i < symbols.source_file_info.size(); ++i) {
    if (symbols.source_file_info[i].source_id != -1) {
      const char *name = symbols.source_file_info[i].name;
      if (!WriteFormat(fd, "FILE %d %s\n",
                       symbols.source_file_info[i].source_id, name))
        return false;
    }
  }
  return true;
}

bool WriteOneFunction(int fd, int source_id,
                      const struct FuncInfo &func_info){
  // Discard the ending part of the name.
  std::string func_name(func_info.name);
  std::string::size_type last_colon = func_name.find_last_of(':');
  if (last_colon != std::string::npos)
    func_name = func_name.substr(0, last_colon);
  func_name = Demangle(func_name.c_str());

  if (func_info.size <= 0)
    return true;

  // rva_to_base could be unsigned long(32 bit) or unsigned long long(64 bit).
  if (WriteFormat(fd, "FUNC %llx %x %d %s\n",
                  (long long)func_info.rva_to_base,
                  func_info.size,
                  func_info.stack_param_size,
                  func_name.c_str())) {
    for (size_t i = 0; i < func_info.line_info.size(); ++i) {
      const struct LineInfo &line_info = func_info.line_info[i];
      if (line_info.line_num == 0)
        return true;
      if (!WriteFormat(fd, "%llx %x %d %d\n",
                       (long long)line_info.rva_to_base,
                       line_info.size,
                       line_info.line_num,
                       source_id))
        return false;
    }
    return true;
  }
  return false;
}

bool WriteFunctionInfo(int fd, const struct SymbolInfo &symbols) {
  for (size_t i = 0; i < symbols.source_file_info.size(); ++i) {
    const struct SourceFileInfo &file_info = symbols.source_file_info[i];
    for (size_t j = 0; j < file_info.func_info.size(); ++j) {
      const struct FuncInfo &func_info = file_info.func_info[j];
      if (!WriteOneFunction(fd, file_info.source_id, func_info))
        return false;
    }
  }
  return true;
}

bool DumpStabSymbols(int fd, const struct SymbolInfo &symbols) {
  return WriteSourceFileInfo(fd, symbols) &&
    WriteFunctionInfo(fd, symbols);
}

//
// FDWrapper
//
// Wrapper class to make sure opened file is closed.
//
class FDWrapper {
 public:
  explicit FDWrapper(int fd) :
    fd_(fd) {
    }
  ~FDWrapper() {
    if (fd_ != -1)
      close(fd_);
  }
  int get() {
    return fd_;
  }
  int release() {
    int fd = fd_;
    fd_ = -1;
    return fd;
  }
 private:
  int fd_;
};

//
// MmapWrapper
//
// Wrapper class to make sure mapped regions are unmapped.
//
class MmapWrapper {
 public:
  MmapWrapper(void *mapped_address, size_t mapped_size) :
    base_(mapped_address), size_(mapped_size) {
  }
  ~MmapWrapper() {
    if (base_ != NULL) {
      assert(size_ > 0);
      munmap((char *)base_, size_);
    }
  }
  void release() {
    base_ = NULL;
    size_ = 0;
  }

 private:
  void *base_;
  size_t size_;
};

}  // namespace

namespace google_breakpad {

class AutoElfEnder {
 public:
  AutoElfEnder(Elf *elf) : elf_(elf) {}
  ~AutoElfEnder() { if (elf_) elf_end(elf_); }
 private:
  Elf *elf_;
};


bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, int sym_fd) {
  if (elf_version(EV_CURRENT) == EV_NONE) {
    fprintf(stderr, "elf_version() failed: %s\n", elf_errmsg(0));
    return false;
  }

  int obj_fd = open(obj_file.c_str(), O_RDONLY);
  if (obj_fd < 0)
    return false;
  FDWrapper obj_fd_wrapper(obj_fd);
  struct stat st;
  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0)
    return false;
  void *obj_base = mmap(NULL, st.st_size,
                        PROT_READ, MAP_PRIVATE, obj_fd, 0);
  if (obj_base == MAP_FAILED)
    return false;
  MmapWrapper map_wrapper(obj_base, st.st_size);
  GElf_Ehdr elf_header;
  Elf *elf = elf_begin(obj_fd, ELF_C_READ, NULL);
  AutoElfEnder elfEnder(elf);

  if (gelf_getehdr(elf, &elf_header) == (GElf_Ehdr *)NULL) {
    fprintf(stderr, "failed to read elf header: %s\n", elf_errmsg(-1));
    return false;
  }

  if (!IsValidElf(&elf_header)) {
    fprintf(stderr, "header magic doesn't match\n");
    return false;
  }
  struct SymbolInfo symbols;
  if (!LoadSymbols(elf, &elf_header, &symbols, obj_base))
    return false;
  // Write to symbol file.
  if (WriteModuleInfo(sym_fd, elf_header.e_machine, obj_file) &&
      DumpStabSymbols(sym_fd, symbols))
    return true;

  return false;
}

}  // namespace google_breakpad