/*--------------------------------------------------------------------*/ /*--- Reading of syms & debug info from Mach-O files. ---*/ /*--- readmacho.c ---*/ /*--------------------------------------------------------------------*/ /* This file is part of Valgrind, a dynamic binary instrumentation framework. Copyright (C) 2005-2012 Apple Inc. Greg Parker gparker@apple.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. */ #if defined(VGO_darwin) #include "pub_core_basics.h" #include "pub_core_vki.h" #include "pub_core_libcbase.h" #include "pub_core_libcprint.h" #include "pub_core_libcassert.h" #include "pub_core_libcfile.h" #include "pub_core_libcproc.h" #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ #include "pub_core_machine.h" /* VG_ELF_CLASS */ #include "pub_core_options.h" #include "pub_core_oset.h" #include "pub_core_tooliface.h" /* VG_(needs) */ #include "pub_core_xarray.h" #include "pub_core_clientstate.h" #include "pub_core_debuginfo.h" #include "priv_d3basics.h" #include "priv_misc.h" #include "priv_tytypes.h" #include "priv_storage.h" #include "priv_readmacho.h" #include "priv_readdwarf.h" #include "priv_readdwarf3.h" #include "priv_readstabs.h" /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ #include <mach-o/loader.h> #include <mach-o/nlist.h> #include <mach-o/fat.h> /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ #if VG_WORDSIZE == 4 # define MAGIC MH_MAGIC # define MACH_HEADER mach_header # define LC_SEGMENT_CMD LC_SEGMENT # define SEGMENT_COMMAND segment_command # define SECTION section # define NLIST nlist #else # define MAGIC MH_MAGIC_64 # define MACH_HEADER mach_header_64 # define LC_SEGMENT_CMD LC_SEGMENT_64 # define SEGMENT_COMMAND segment_command_64 # define SECTION section_64 # define NLIST nlist_64 #endif /*------------------------------------------------------------*/ /*--- ---*/ /*--- Mach-O file mapping/unmapping helpers ---*/ /*--- ---*/ /*------------------------------------------------------------*/ typedef struct { /* These two describe the entire mapped-in ("primary") image, fat headers, kitchen sink, whatnot: the entire file. The image is mapped into img[0 .. img_szB-1]. */ UChar* img; SizeT img_szB; /* These two describe the Mach-O object of interest, which is presumably somewhere inside the primary image. map_image_aboard() below, which generates this info, will carefully check that the macho_ fields denote a section of memory that falls entirely inside img[0 .. img_szB-1]. */ UChar* macho_img; SizeT macho_img_szB; } ImageInfo; Bool ML_(is_macho_object_file)( const void* buf, SizeT szB ) { /* (JRS: the Mach-O headers might not be in this mapped data, because we only mapped a page for this initial check, or at least not very much, and what's at the start of the file is in general a so-called fat header. The Mach-O object we're interested in could be arbitrarily far along the image, and so we can't assume its header will fall within this page.) */ /* But we can say that either it's a fat object, in which case it begins with a fat header, or it's unadorned Mach-O, in which case it starts with a normal header. At least do what checks we can to establish whether or not we're looking at something sane. */ const struct fat_header* fh_be = buf; const struct MACH_HEADER* mh = buf; vg_assert(buf); if (szB < sizeof(struct fat_header)) return False; if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC) return True; if (szB < sizeof(struct MACH_HEADER)) return False; if (mh->magic == MAGIC) return True; return False; } /* Unmap an image mapped in by map_image_aboard. */ static void unmap_image ( /*MOD*/ImageInfo* ii ) { SysRes sres; vg_assert(ii->img); vg_assert(ii->img_szB > 0); sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB ); /* Do we care if this fails? I suppose so; it would indicate some fairly serious snafu with the mapping of the file. */ vg_assert( !sr_isError(sres) ); VG_(memset)(ii, 0, sizeof(*ii)); } /* Map a given fat or thin object aboard, find the thin part if necessary, do some checks, and write details of both the fat and thin parts into *ii. Returns False (and leaves the file unmapped) on failure. Guarantees to return pointers to a valid(ish) Mach-O image if it succeeds. */ static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */ /*OUT*/ImageInfo* ii, UChar* filename ) { VG_(memset)(ii, 0, sizeof(*ii)); /* First off, try to map the thing in. */ { SizeT size; SysRes fd, sres; struct vg_stat stat_buf; fd = VG_(stat)(filename, &stat_buf); if (sr_isError(fd)) { ML_(symerr)(di, True, "Can't stat image (to determine its size)?!"); return False; } size = stat_buf.size; fd = VG_(open)(filename, VKI_O_RDONLY, 0); if (sr_isError(fd)) { ML_(symerr)(di, True, "Can't open image to read symbols?!"); return False; } sres = VG_(am_mmap_file_float_valgrind) ( size, VKI_PROT_READ, sr_Res(fd), 0 ); if (sr_isError(sres)) { ML_(symerr)(di, True, "Can't mmap image to read symbols?!"); return False; } VG_(close)(sr_Res(fd)); ii->img = (UChar*)sr_Res(sres); ii->img_szB = size; } /* Now it's mapped in and we have .img and .img_szB set. Look for the embedded Mach-O object. If not findable, unmap and fail. */ { struct fat_header* fh_be; struct fat_header fh; struct MACH_HEADER* mh; // Assume initially that we have a thin image, and update // these if it turns out to be fat. ii->macho_img = ii->img; ii->macho_img_szB = ii->img_szB; // Check for fat header. if (ii->img_szB < sizeof(struct fat_header)) { ML_(symerr)(di, True, "Invalid Mach-O file (0 too small)."); goto unmap_and_fail; } // Fat header is always BIG-ENDIAN fh_be = (struct fat_header *)ii->img; fh.magic = VG_(ntohl)(fh_be->magic); fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch); if (fh.magic == FAT_MAGIC) { // Look for a good architecture. struct fat_arch *arch_be; struct fat_arch arch; Int f; if (ii->img_szB < sizeof(struct fat_header) + fh.nfat_arch * sizeof(struct fat_arch)) { ML_(symerr)(di, True, "Invalid Mach-O file (1 too small)."); goto unmap_and_fail; } for (f = 0, arch_be = (struct fat_arch *)(fh_be+1); f < fh.nfat_arch; f++, arch_be++) { Int cputype; # if defined(VGA_ppc) cputype = CPU_TYPE_POWERPC; # elif defined(VGA_ppc64) cputype = CPU_TYPE_POWERPC64; # elif defined(VGA_x86) cputype = CPU_TYPE_X86; # elif defined(VGA_amd64) cputype = CPU_TYPE_X86_64; # else # error "unknown architecture" # endif arch.cputype = VG_(ntohl)(arch_be->cputype); arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype); arch.offset = VG_(ntohl)(arch_be->offset); arch.size = VG_(ntohl)(arch_be->size); if (arch.cputype == cputype) { if (ii->img_szB < arch.offset + arch.size) { ML_(symerr)(di, True, "Invalid Mach-O file (2 too small)."); goto unmap_and_fail; } ii->macho_img = ii->img + arch.offset; ii->macho_img_szB = arch.size; break; } } if (f == fh.nfat_arch) { ML_(symerr)(di, True, "No acceptable architecture found in fat file."); goto unmap_and_fail; } } /* Sanity check what we found. */ /* assured by logic above */ vg_assert(ii->img_szB >= sizeof(struct fat_header)); if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) { ML_(symerr)(di, True, "Invalid Mach-O file (3 too small)."); goto unmap_and_fail; } if (ii->macho_img_szB > ii->img_szB) { ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat)."); goto unmap_and_fail; } if (ii->macho_img >= ii->img && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) { /* thin entirely within fat, as expected */ } else { ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat)."); goto unmap_and_fail; } mh = (struct MACH_HEADER *)ii->macho_img; if (mh->magic != MAGIC) { ML_(symerr)(di, True, "Invalid Mach-O file (bad magic)."); goto unmap_and_fail; } if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) { ML_(symerr)(di, True, "Invalid Mach-O file (4 too small)."); goto unmap_and_fail; } } vg_assert(ii->img); vg_assert(ii->macho_img); vg_assert(ii->img_szB > 0); vg_assert(ii->macho_img_szB > 0); vg_assert(ii->macho_img >= ii->img); vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); return True; /* success */ /*NOTREACHED*/ unmap_and_fail: unmap_image(ii); return False; /* bah! */ } /*------------------------------------------------------------*/ /*--- ---*/ /*--- Mach-O symbol table reading ---*/ /*--- ---*/ /*------------------------------------------------------------*/ /* Read a symbol table (nlist). Add the resulting candidate symbols to 'syms'; the caller will post-process them and hand them off to ML_(addSym) itself. */ static void read_symtab( /*OUT*/XArray* /* DiSym */ syms, struct _DebugInfo* di, struct NLIST* o_symtab, UInt o_symtab_count, UChar* o_strtab, UInt o_strtab_sz ) { Int i; Addr sym_addr; DiSym disym; UChar* name; static UChar* s_a_t_v = NULL; /* do not make non-static */ for (i = 0; i < o_symtab_count; i++) { struct NLIST *nl = o_symtab+i; if ((nl->n_type & N_TYPE) == N_SECT) { sym_addr = di->text_bias + nl->n_value; /*} else if ((nl->n_type & N_TYPE) == N_ABS) { GrP fixme don't ignore absolute symbols? sym_addr = nl->n_value; */ } else { continue; } if (di->trace_symtab) VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, o_strtab + nl->n_un.n_strx ); /* If no part of the symbol falls within the mapped range, ignore it. */ if (sym_addr <= di->text_avma || sym_addr >= di->text_avma+di->text_size) { continue; } /* skip names which point outside the string table; following these risks segfaulting Valgrind */ name = o_strtab + nl->n_un.n_strx; if (name < o_strtab || name >= o_strtab + o_strtab_sz) continue; /* skip nameless symbols; these appear to be common, but useless */ if (*name == 0) continue; disym.addr = sym_addr; disym.tocptr = 0; disym.pri_name = ML_(addStr)(di, name, -1); disym.sec_names = NULL; disym.size = // let canonicalize fix it di->text_avma+di->text_size - sym_addr; disym.isText = True; disym.isIFunc = False; // Lots of user function names get prepended with an underscore. Eg. the // function 'f' becomes the symbol '_f'. And the "below main" // function is called "start". So we skip the leading underscore, and // if we see 'start' and --show-below-main=no, we rename it as // "start_according_to_valgrind", which makes it easy to spot later // and display as "(below main)". if (disym.pri_name[0] == '_') { disym.pri_name++; } else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) { if (s_a_t_v == NULL) s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1); vg_assert(s_a_t_v); disym.pri_name = s_a_t_v; } vg_assert(disym.pri_name); VG_(addToXA)( syms, &disym ); } } /* Compare DiSyms by their start address, and for equal addresses, use the primary name as a secondary sort key. */ static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 ) { DiSym* s1 = (DiSym*)v1; DiSym* s2 = (DiSym*)v2; if (s1->addr < s2->addr) return -1; if (s1->addr > s2->addr) return 1; return VG_(strcmp)(s1->pri_name, s2->pri_name); } /* 'cand' is a bunch of candidate symbols obtained by reading nlist-style symbol table entries. Their ends may overlap, so sort them and truncate them accordingly. The code in this routine is copied almost verbatim from read_symbol_table() in readxcoff.c. */ static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms, Bool trace_symtab ) { Word nsyms, i, j, k, m; nsyms = VG_(sizeXA)(syms); VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name); VG_(sortXA)(syms); /* We only know for sure the start addresses (actual VMAs) of symbols, and an overestimation of their end addresses. So sort by start address, then clip each symbol so that its end address does not overlap with the next one along. There is a small refinement: if a group of symbols have the same address, treat them as a group: find the next symbol along that has a higher start address, and clip all of the group accordingly. This clips the group as a whole so as not to overlap following symbols. This leaves prefersym() in storage.c, which is not nlist-specific, to later decide which of the symbols in the group to keep. Another refinement is that we need to get rid of symbols which, after clipping, have identical starts, ends, and names. So the sorting uses the name as a secondary key. */ for (i = 0; i < nsyms; i++) { for (k = i+1; k < nsyms && ((DiSym*)VG_(indexXA)(syms,i))->addr == ((DiSym*)VG_(indexXA)(syms,k))->addr; k++) ; /* So now [i .. k-1] is a group all with the same start address. Clip their ending addresses so they don't overlap [k]. In the normal case (no overlaps), k == i+1. */ if (k < nsyms) { DiSym* next = (DiSym*)VG_(indexXA)(syms,k); for (m = i; m < k; m++) { DiSym* here = (DiSym*)VG_(indexXA)(syms,m); vg_assert(here->addr < next->addr); if (here->addr + here->size > next->addr) here->size = next->addr - here->addr; } } i = k-1; vg_assert(i <= nsyms); } j = 0; if (nsyms > 0) { j = 1; for (i = 1; i < nsyms; i++) { DiSym *s_j1, *s_j, *s_i; vg_assert(j <= i); s_j1 = (DiSym*)VG_(indexXA)(syms, j-1); s_j = (DiSym*)VG_(indexXA)(syms, j); s_i = (DiSym*)VG_(indexXA)(syms, i); if (s_i->addr != s_j1->addr || s_i->size != s_j1->size || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) { *s_j = *s_i; j++; } else { if (trace_symtab) VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n", s_i->addr, s_i->pri_name ); } } } vg_assert(j >= 0 && j <= nsyms); VG_(dropTailXA)(syms, nsyms - j); } /*------------------------------------------------------------*/ /*--- ---*/ /*--- Mach-O top-level processing ---*/ /*--- ---*/ /*------------------------------------------------------------*/ #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY) #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/" #endif static Bool file_exists_p(const Char *path) { struct vg_stat sbuf; SysRes res = VG_(stat)(path, &sbuf); return sr_isError(res) ? False : True; } /* Search for an existing dSYM file as a possible separate debug file. Adapted from gdb. */ static Char * find_separate_debug_file (const Char *executable_name) { Char *basename_str; Char *dot_ptr; Char *slash_ptr; Char *dsymfile; /* Make sure the object file name itself doesn't contain ".dSYM" in it or we will end up with an infinite loop where after we add a dSYM symbol file, it will then enter this function asking if there is a debug file for the dSYM file itself. */ if (VG_(strcasestr) (executable_name, ".dSYM") == NULL) { /* Check for the existence of a .dSYM file for a given executable. */ basename_str = VG_(basename) (executable_name); dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile", VG_(strlen) (executable_name) + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY) + VG_(strlen) (basename_str) + 1 ); /* First try for the dSYM in the same directory as the original file. */ VG_(strcpy) (dsymfile, executable_name); VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY); VG_(strcat) (dsymfile, basename_str); if (file_exists_p (dsymfile)) return dsymfile; /* Now search for any parent directory that has a '.' in it so we can find Mac OS X applications, bundles, plugins, and any other kinds of files. Mac OS X application bundles wil have their program in "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with ".bundle" or ".plugin" for other types of bundles). So we look for any prior '.' character and try appending the apple dSYM extension and subdirectory and see if we find an existing dSYM file (in the above MyApp example the dSYM would be at either: "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */ VG_(strcpy) (dsymfile, VG_(dirname) (executable_name)); while ((dot_ptr = VG_(strrchr) (dsymfile, '.'))) { /* Find the directory delimiter that follows the '.' character since we now look for a .dSYM that follows any bundle extension. */ slash_ptr = VG_(strchr) (dot_ptr, '/'); if (slash_ptr) { /* NULL terminate the string at the '/' character and append the path down to the dSYM file. */ *slash_ptr = '\0'; VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); VG_(strcat) (slash_ptr, basename_str); if (file_exists_p (dsymfile)) return dsymfile; } /* NULL terminate the string at the '.' character and append the path down to the dSYM file. */ *dot_ptr = '\0'; VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY); VG_(strcat) (dot_ptr, basename_str); if (file_exists_p (dsymfile)) return dsymfile; /* NULL terminate the string at the '.' locatated by the strrchr() function again. */ *dot_ptr = '\0'; /* We found a previous extension '.' character and did not find a dSYM file so now find previous directory delimiter so we don't try multiple times on a file name that may have a version number in it such as "/some/path/MyApp.6.0.4.app". */ slash_ptr = VG_(strrchr) (dsymfile, '/'); if (!slash_ptr) break; /* NULL terminate the string at the previous directory character and search again. */ *slash_ptr = '\0'; } } return NULL; } static UChar *getsectdata(UChar* base, SizeT size, Char *segname, Char *sectname, /*OUT*/Word *sect_size) { struct MACH_HEADER *mh = (struct MACH_HEADER *)base; struct load_command *cmd; Int c; for (c = 0, cmd = (struct load_command *)(mh+1); c < mh->ncmds; c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd)) { if (cmd->cmd == LC_SEGMENT_CMD) { struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd; if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) { struct SECTION *sects = (struct SECTION *)(seg+1); Int s; for (s = 0; s < seg->nsects; s++) { if (0 == VG_(strncmp(sects[s].sectname, sectname, sizeof(sects[s].sectname)))) { if (sect_size) *sect_size = sects[s].size; return (UChar *)(base + sects[s].offset); } } } } } if (sect_size) *sect_size = 0; return 0; } /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */ static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid ) { Word i; UChar* img = (UChar*)imgA; UChar first = uuid[0]; if (n_img < 16) return False; for (i = 0; i < n_img-16; i++) { if (img[i] != first) continue; if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 )) return True; } return False; } /* Heuristic kludge: return True if this looks like an installed standard library; hence we shouldn't consider automagically running dsymutil on it. */ static Bool is_systemish_library_name ( UChar* name ) { vg_assert(name); if (0 == VG_(strncasecmp)(name, "/usr/", 5) || 0 == VG_(strncasecmp)(name, "/bin/", 5) || 0 == VG_(strncasecmp)(name, "/sbin/", 6) || 0 == VG_(strncasecmp)(name, "/opt/", 5) || 0 == VG_(strncasecmp)(name, "/sw/", 4) || 0 == VG_(strncasecmp)(name, "/System/", 8) || 0 == VG_(strncasecmp)(name, "/Library/", 9) || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) { return True; } else { return False; } } Bool ML_(read_macho_debug_info)( struct _DebugInfo* di ) { struct symtab_command *symcmd = NULL; struct dysymtab_command *dysymcmd = NULL; HChar* dsymfilename = NULL; Bool have_uuid = False; UChar uuid[16]; ImageInfo ii; /* main file */ ImageInfo iid; /* auxiliary .dSYM file */ Bool ok; Word i; struct _DebugInfoMapping* rx_map = NULL; struct _DebugInfoMapping* rw_map = NULL; /* mmap the object file to look for di->soname and di->text_bias and uuid and nlist and STABS */ /* This should be ensured by our caller (that we're in the accept state). */ vg_assert(di->fsm.have_rx_map); vg_assert(di->fsm.have_rw_map); for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) { struct _DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i); if (map->rx && !rx_map) rx_map = map; if (map->rw && !rw_map) rw_map = map; if (rx_map && rw_map) break; } vg_assert(rx_map); vg_assert(rw_map); if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename, rx_map->avma, rw_map->avma ); VG_(memset)(&ii, 0, sizeof(ii)); VG_(memset)(&iid, 0, sizeof(iid)); VG_(memset)(&uuid, 0, sizeof(uuid)); ok = map_image_aboard( di, &ii, di->fsm.filename ); if (!ok) goto fail; vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0); /* Poke around in the Mach-O header, to find some important stuff. */ // Find LC_SYMTAB and LC_DYSYMTAB, if present. // Read di->soname from LC_ID_DYLIB if present, // or from LC_ID_DYLINKER if present, // or use "NONE". // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT // Get uuid for later dsym search di->text_bias = 0; { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img; struct load_command *cmd; Int c; for (c = 0, cmd = (struct load_command *)(mh+1); c < mh->ncmds; c++, cmd = (struct load_command *)(cmd->cmdsize + (unsigned long)cmd)) { if (cmd->cmd == LC_SYMTAB) { symcmd = (struct symtab_command *)cmd; } else if (cmd->cmd == LC_DYSYMTAB) { dysymcmd = (struct dysymtab_command *)cmd; } else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) { // GrP fixme bundle? struct dylib_command *dcmd = (struct dylib_command *)cmd; UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd; UChar *soname = VG_(strrchr)(dylibname, '/'); if (!soname) soname = dylibname; else soname++; di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname", soname); } else if (cmd->cmd==LC_ID_DYLINKER && mh->filetype==MH_DYLINKER) { struct dylinker_command *dcmd = (struct dylinker_command *)cmd; UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd; UChar *soname = VG_(strrchr)(dylinkername, '/'); if (!soname) soname = dylinkername; else soname++; di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername", soname); } // A comment from Julian about why varinfo[35] fail: // // My impression is, from comparing the output of otool -l for these // executables with the logic in ML_(read_macho_debug_info), // specifically the part that begins "else if (cmd->cmd == // LC_SEGMENT_CMD) {", that it's a complete hack which just happens // to work ok for text symbols. In particular, it appears to assume // that in a "struct load_command" of type LC_SEGMENT_CMD, the first // "struct SEGMENT_COMMAND" inside it is going to contain the info we // need. However, otool -l shows, and also the Apple docs state, // that a struct load_command may contain an arbitrary number of // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely // snarf the first. But I'm not sure about this. // // The "Try for __DATA" block below simply adds acquisition of data // svma/bias values using the same assumption. It also needs // (probably) to deal with bss sections, but I don't understand how // this all ties together really, so it requires further study. // // If you can get your head around the relationship between MachO // segments, sections and load commands, this might be relatively // easy to fix properly. // // Basically we need to come up with plausible numbers for di-> // {text,data,bss}_{avma,svma}, from which the _bias numbers are // then trivially derived. Then I think the debuginfo reader should // work pretty well. else if (cmd->cmd == LC_SEGMENT_CMD) { struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd; /* Try for __TEXT */ if (!di->text_present && 0 == VG_(strcmp)(seg->segname, "__TEXT") /* DDD: is the next line a kludge? -- JRS */ && seg->fileoff == 0 && seg->filesize != 0) { di->text_present = True; di->text_svma = (Addr)seg->vmaddr; di->text_avma = rx_map->avma; di->text_size = seg->vmsize; di->text_bias = di->text_avma - di->text_svma; /* Make the _debug_ values be the same as the svma/bias for the primary object, since there is no secondary (debuginfo) object, but nevertheless downstream biasing of Dwarf3 relies on the _debug_ values. */ di->text_debug_svma = di->text_svma; di->text_debug_bias = di->text_bias; } /* Try for __DATA */ if (!di->data_present && 0 == VG_(strcmp)(seg->segname, "__DATA") /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) { di->data_present = True; di->data_svma = (Addr)seg->vmaddr; di->data_avma = rw_map->avma; di->data_size = seg->vmsize; di->data_bias = di->data_avma - di->data_svma; di->data_debug_svma = di->data_svma; di->data_debug_bias = di->data_bias; } } else if (cmd->cmd == LC_UUID) { struct uuid_command *uuid_cmd = (struct uuid_command *)cmd; VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid)); have_uuid = True; } } } if (!di->soname) { di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE"); } if (di->trace_symtab) { VG_(printf)("\n"); VG_(printf)("SONAME = %s\n", di->soname); VG_(printf)("\n"); } /* Now we have the base object to hand. Read symbols from it. */ if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) { /* Read nlist symbol table */ struct NLIST *syms; UChar *strs; XArray* /* DiSym */ candSyms = NULL; Word nCandSyms; if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms * sizeof(struct NLIST)) { ML_(symerr)(di, False, "Invalid Mach-O file (5 too small)."); goto fail; } if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) { ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table)."); goto fail; } syms = (struct NLIST *)(ii.macho_img + symcmd->symoff); strs = (UChar *)(ii.macho_img + symcmd->stroff); if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, " reading syms from primary file (%d %d)\n", dysymcmd->nextdefsym, dysymcmd->nlocalsym ); /* Read candidate symbols into 'candSyms', so we can truncate overlapping ends and generally tidy up, before presenting them to ML_(addSym). */ candSyms = VG_(newXA)( ML_(dinfo_zalloc), "di.readmacho.candsyms.1", ML_(dinfo_free), sizeof(DiSym) ); vg_assert(candSyms); // extern symbols read_symtab(candSyms, di, syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym, strs, symcmd->strsize); // static and private_extern symbols read_symtab(candSyms, di, syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym, strs, symcmd->strsize); /* tidy up the cand syms -- trim overlapping ends. May resize candSyms. */ tidy_up_cand_syms( candSyms, di->trace_symtab ); /* and finally present them to ML_(addSym) */ nCandSyms = VG_(sizeXA)( candSyms ); for (i = 0; i < nCandSyms; i++) { DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i ); vg_assert(cand->pri_name != NULL); vg_assert(cand->sec_names == NULL); if (di->trace_symtab) VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n", cand->addr, cand->addr + cand->size - 1, cand->pri_name ); ML_(addSym)( di, cand ); } VG_(deleteXA)( candSyms ); } /* If there's no UUID in the primary, don't even bother to try and read any DWARF, since we won't be able to verify it matches. Our policy is not to load debug info unless we can verify that it matches the primary. Just declare success at this point. And don't complain to the user, since that would cause us to complain on objects compiled without -g. (Some versions of XCode are observed to omit a UUID entry for object linked(?) without -g. Others don't appear to omit it.) */ if (!have_uuid) goto success; /* mmap the dSYM file to look for DWARF debug info. If successful, use the .macho_img and .macho_img_szB in iid. */ dsymfilename = find_separate_debug_file( di->fsm.filename ); /* Try to load it. */ if (dsymfilename) { Bool valid; if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename); ok = map_image_aboard( di, &iid, dsymfilename ); if (!ok) goto fail; /* check it has the right uuid. */ vg_assert(have_uuid); valid = iid.macho_img && iid.macho_img_szB > 0 && check_uuid_matches( (Addr)iid.macho_img, iid.macho_img_szB, uuid ); if (valid) goto read_the_dwarf; if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, " dSYM does not have " "correct UUID (out of date?)\n"); } /* There was no dsym file, or it doesn't match. We'll have to try regenerating it, unless --dsymutil=no, in which case just complain instead. */ /* If this looks like a lib that we shouldn't run dsymutil on, just give up. (possible reasons: is system lib, or in /usr etc, or the dsym dir would not be writable by the user, or we're running as root) */ vg_assert(di->fsm.filename); if (is_systemish_library_name(di->fsm.filename)) goto success; if (!VG_(clo_dsymutil)) { if (VG_(clo_verbosity) == 1) { VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename); } if (VG_(clo_verbosity) > 0) VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using " "--dsymutil=yes\n", VG_(clo_verbosity) > 1 ? " " : "", dsymfilename ? "has wrong UUID" : "is missing"); goto success; } /* Run dsymutil */ { Int r; HChar* dsymutil = "/usr/bin/dsymutil "; HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1", VG_(strlen)(dsymutil) + VG_(strlen)(di->fsm.filename) + 32 /* misc */ ); VG_(strcpy)(cmd, dsymutil); if (0) VG_(strcat)(cmd, "--verbose "); VG_(strcat)(cmd, "\""); VG_(strcat)(cmd, di->fsm.filename); VG_(strcat)(cmd, "\""); VG_(message)(Vg_DebugMsg, "run: %s\n", cmd); r = VG_(system)( cmd ); if (r) VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil); ML_(dinfo_free)(cmd); dsymfilename = find_separate_debug_file(di->fsm.filename); } /* Try again to load it. */ if (dsymfilename) { Bool valid; if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename); ok = map_image_aboard( di, &iid, dsymfilename ); if (!ok) goto fail; /* check it has the right uuid. */ vg_assert(have_uuid); valid = iid.macho_img && iid.macho_img_szB > 0 && check_uuid_matches( (Addr)iid.macho_img, iid.macho_img_szB, uuid ); if (!valid) { if (VG_(clo_verbosity) > 0) { VG_(message)(Vg_DebugMsg, "WARNING: did not find expected UUID %02X%02X%02X%02X" "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X" " in dSYM dir\n", (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3], (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7], (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10], (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13], (UInt)uuid[14], (UInt)uuid[15] ); VG_(message)(Vg_DebugMsg, "WARNING: for %s\n", di->fsm.filename); } unmap_image( &iid ); /* unmap_image zeroes the fields, so the following test makes sense. */ goto fail; } } /* Right. Finally we have our best try at the dwarf image, so go on to reading stuff out of it. */ read_the_dwarf: if (iid.macho_img && iid.macho_img_szB > 0) { UChar* debug_info_img = NULL; Word debug_info_sz; UChar* debug_abbv_img; Word debug_abbv_sz; UChar* debug_line_img; Word debug_line_sz; UChar* debug_str_img; Word debug_str_sz; UChar* debug_ranges_img; Word debug_ranges_sz; UChar* debug_loc_img; Word debug_loc_sz; UChar* debug_name_img; Word debug_name_sz; debug_info_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_info", &debug_info_sz); debug_abbv_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_abbrev", &debug_abbv_sz); debug_line_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_line", &debug_line_sz); debug_str_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_str", &debug_str_sz); debug_ranges_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_ranges", &debug_ranges_sz); debug_loc_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_loc", &debug_loc_sz); debug_name_img = getsectdata(iid.macho_img, iid.macho_img_szB, "__DWARF", "__debug_pubnames", &debug_name_sz); if (debug_info_img) { if (VG_(clo_verbosity) > 1) { if (0) VG_(message)(Vg_DebugMsg, "Reading dwarf3 for %s (%#lx) from %s" " (%ld %ld %ld %ld %ld %ld)\n", di->fsm.filename, di->text_avma, dsymfilename, debug_info_sz, debug_abbv_sz, debug_line_sz, debug_str_sz, debug_ranges_sz, debug_loc_sz ); VG_(message)(Vg_DebugMsg, " reading dwarf3 from dsyms file\n"); } /* The old reader: line numbers and unwind info only */ ML_(read_debuginfo_dwarf3) ( di, debug_info_img, debug_info_sz, NULL, 0, debug_abbv_img, debug_abbv_sz, debug_line_img, debug_line_sz, debug_str_img, debug_str_sz, NULL, 0 /* ALT .debug_str */ ); /* The new reader: read the DIEs in .debug_info to acquire information on variable types and locations. But only if the tool asks for it, or the user requests it on the command line. */ if (VG_(needs).var_info /* the tool requires it */ || VG_(clo_read_var_info) /* the user asked for it */) { ML_(new_dwarf3_reader)( di, debug_info_img, debug_info_sz, NULL, 0, debug_abbv_img, debug_abbv_sz, debug_line_img, debug_line_sz, debug_str_img, debug_str_sz, debug_ranges_img, debug_ranges_sz, debug_loc_img, debug_loc_sz, NULL, 0, /* ALT .debug_info */ NULL, 0, /* ALT .debug_abbv */ NULL, 0, /* ALT .debug_line */ NULL, 0 /* ALT .debug_str */ ); } } } if (dsymfilename) ML_(dinfo_free)(dsymfilename); success: if (ii.img) unmap_image(&ii); if (iid.img) unmap_image(&iid); return True; /* NOTREACHED */ fail: ML_(symerr)(di, True, "Error reading Mach-O object."); if (ii.img) unmap_image(&ii); if (iid.img) unmap_image(&iid); return False; } #endif // defined(VGO_darwin) /*--------------------------------------------------------------------*/ /*--- end ---*/ /*--------------------------------------------------------------------*/