/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Access the contents of a .dex file. */ #include "DexFile.h" #include "DexProto.h" #include "DexCatch.h" #include "Leb128.h" #include "sha1.h" #include "ZipArchive.h" #include <zlib.h> #include <stdlib.h> #include <stddef.h> #include <string.h> #include <fcntl.h> #include <errno.h> // fwd static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader); /* * Verifying checksums is good, but it slows things down and causes us to * touch every page. In the "optimized" world, it doesn't work at all, * because we rewrite the contents. */ static const bool kVerifyChecksum = false; static const bool kVerifySignature = false; /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode * code point values for comparison. This treats different encodings * for the same code point as equivalent, except that only a real '\0' * byte is considered the string terminator. The return value is as * for strcmp(). */ int dexUtf8Cmp(const char* s1, const char* s2) { for (;;) { if (*s1 == '\0') { if (*s2 == '\0') { return 0; } return -1; } else if (*s2 == '\0') { return 1; } int utf1 = dexGetUtf16FromUtf8(&s1); int utf2 = dexGetUtf16FromUtf8(&s2); int diff = utf1 - utf2; if (diff != 0) { return diff; } } } /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ u4 DEX_MEMBER_VALID_LOW_ASCII[4] = { 0x00000000, // 00..1f low control characters; nothing valid 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-' 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_' 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z' }; /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { /* * It's a multibyte encoded character. Decode it and analyze. We * accept anything that isn't (a) an improperly encoded low value, * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high * control character, or (e) a high space, layout, or special * character (U+00a0, U+2000..U+200f, U+2028..U+202f, * U+fff0..U+ffff). */ u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); // Perform follow-up tests based on the high 8 bits. switch (utf16 >> 8) { case 0x00: { // It's only valid if it's above the ISO-8859-1 high space (0xa0). return (utf16 > 0x00a0); } case 0xd8: case 0xd9: case 0xda: case 0xdb: { /* * It's a leading surrogate. Check to see that a trailing * surrogate follows. */ utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); } case 0xdc: case 0xdd: case 0xde: case 0xdf: { // It's a trailing surrogate, which is not valid at this point. return false; } case 0x20: case 0xff: { // It's in the range that has spaces, controls, and specials. switch (utf16 & 0xfff8) { case 0x2000: case 0x2008: case 0x2028: case 0xfff0: case 0xfff8: { return false; } } break; } } return true; } /* Return whether the given string is a valid field or method name. */ bool dexIsValidMemberName(const char* s) { bool angleName = false; switch (*s) { case '\0': { // The empty string is not a valid name. return false; } case '<': { /* * '<' is allowed only at the start of a name, and if present, * means that the name must end with '>'. */ angleName = true; s++; break; } } for (;;) { switch (*s) { case '\0': { return !angleName; } case '>': { return angleName && s[1] == '\0'; } } if (!dexIsValidMemberNameUtf8(&s)) { return false; } } } /* Return whether the given string is a valid type descriptor. */ bool dexIsValidTypeDescriptor(const char* s) { int arrayCount = 0; while (*s == '[') { arrayCount++; s++; } if (arrayCount > 255) { // Arrays may have no more than 255 dimensions. return false; } switch (*(s++)) { case 'B': case 'C': case 'D': case 'F': case 'I': case 'J': case 'S': case 'Z': { // These are all single-character descriptors for primitive types. return (*s == '\0'); } case 'V': { // You can't have an array of void. return (arrayCount == 0) && (*s == '\0'); } case 'L': { // Break out and continue below. break; } default: { // Oddball descriptor character. return false; } } // We just consumed the 'L' that introduces a class name. bool slashOrFirst = true; // first character or just encountered a slash for (;;) { u1 c = (u1) *s; switch (c) { case '\0': { // Premature end. return false; } case ';': { /* * Make sure that this is the end of the string and that * it doesn't end with an empty component (including the * degenerate case of "L;"). */ return (s[1] == '\0') && !slashOrFirst; } case '/': { if (slashOrFirst) { // Slash at start or two slashes in a row. return false; } slashOrFirst = true; s++; break; } default: { if (!dexIsValidMemberNameUtf8(&s)) { return false; } slashOrFirst = false; break; } } } } /* Return whether the given string is a valid reference descriptor. This * is true if dexIsValidTypeDescriptor() returns true and the descriptor * is for a class or array and not a primitive type. */ bool dexIsReferenceDescriptor(const char* s) { if (!dexIsValidTypeDescriptor(s)) { return false; } return (s[0] == 'L') || (s[0] == '['); } /* Return whether the given string is a valid class descriptor. This * is true if dexIsValidTypeDescriptor() returns true and the descriptor * is for a class and not an array or primitive type. */ bool dexIsClassDescriptor(const char* s) { if (!dexIsValidTypeDescriptor(s)) { return false; } return s[0] == 'L'; } /* Return whether the given string is a valid field type descriptor. This * is true if dexIsValidTypeDescriptor() returns true and the descriptor * is for anything but "void". */ bool dexIsFieldDescriptor(const char* s) { if (!dexIsValidTypeDescriptor(s)) { return false; } return s[0] != 'V'; } /* Return the UTF-8 encoded string with the specified string_id index, * also filling in the UTF-16 size (number of 16-bit code points).*/ const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, u4* utf16Size) { const DexStringId* pStringId = dexGetStringId(pDexFile, idx); const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; *utf16Size = readUnsignedLeb128(&ptr); return (const char*) ptr; } /* * Format an SHA-1 digest for printing. tmpBuf must be able to hold at * least kSHA1DigestOutputLen bytes. */ const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); /* * Compute a SHA-1 digest on a range of bytes. */ static void dexComputeSHA1Digest(const unsigned char* data, size_t length, unsigned char digest[]) { SHA1_CTX context; SHA1Init(&context); SHA1Update(&context, data, length); SHA1Final(digest, &context); } /* * Format the SHA-1 digest into the buffer, which must be able to hold at * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, */ static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) { static const char hexDigit[] = "0123456789abcdef"; char* cp; int i; cp = tmpBuf; for (i = 0; i < kSHA1DigestLen; i++) { *cp++ = hexDigit[digest[i] >> 4]; *cp++ = hexDigit[digest[i] & 0x0f]; } *cp++ = '\0'; assert(cp == tmpBuf + kSHA1DigestOutputLen); return tmpBuf; } /* * Compute a hash code on a UTF-8 string, for use with internal hash tables. * * This may or may not be compatible with UTF-8 hash functions used inside * the Dalvik VM. * * The basic "multiply by 31 and add" approach does better on class names * than most other things tried (e.g. adler32). */ static u4 classDescriptorHash(const char* str) { u4 hash = 1; while (*str != '\0') hash = hash * 31 + *str++; return hash; } /* * Add an entry to the class lookup table. We hash the string and probe * until we find an open slot. */ static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, int stringOff, int classDefOff, int* pNumProbes) { const char* classDescriptor = (const char*) (pDexFile->baseAddr + stringOff); const DexClassDef* pClassDef = (const DexClassDef*) (pDexFile->baseAddr + classDefOff); u4 hash = classDescriptorHash(classDescriptor); int mask = pLookup->numEntries-1; int idx = hash & mask; /* * Find the first empty slot. We oversized the table, so this is * guaranteed to finish. */ int probes = 0; while (pLookup->table[idx].classDescriptorOffset != 0) { idx = (idx + 1) & mask; probes++; } //if (probes > 1) // LOGW("classLookupAdd: probes=%d\n", probes); pLookup->table[idx].classDescriptorHash = hash; pLookup->table[idx].classDescriptorOffset = stringOff; pLookup->table[idx].classDefOffset = classDefOff; *pNumProbes = probes; } /* * Round up to the next highest power of 2. * * Found on http://graphics.stanford.edu/~seander/bithacks.html. */ u4 dexRoundUpPower2(u4 val) { val--; val |= val >> 1; val |= val >> 2; val |= val >> 4; val |= val >> 8; val |= val >> 16; val++; return val; } /* * Create the class lookup hash table. * * Returns newly-allocated storage. */ DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) { DexClassLookup* pLookup; int allocSize; int i, numEntries; int numProbes, totalProbes, maxProbes; numProbes = totalProbes = maxProbes = 0; assert(pDexFile != NULL); /* * Using a factor of 3 results in far less probing than a factor of 2, * but almost doubles the flash storage requirements for the bootstrap * DEX files. The overall impact on class loading performance seems * to be minor. We could probably get some performance improvement by * using a secondary hash. */ numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); allocSize = offsetof(DexClassLookup, table) + numEntries * sizeof(pLookup->table[0]); pLookup = (DexClassLookup*) calloc(1, allocSize); if (pLookup == NULL) return NULL; pLookup->size = allocSize; pLookup->numEntries = numEntries; for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { const DexClassDef* pClassDef; const char* pString; pClassDef = dexGetClassDef(pDexFile, i); pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); classLookupAdd(pDexFile, pLookup, (u1*)pString - pDexFile->baseAddr, (u1*)pClassDef - pDexFile->baseAddr, &numProbes); if (numProbes > maxProbes) maxProbes = numProbes; totalProbes += numProbes; } LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" " total=%d max=%d\n", pDexFile->pHeader->classDefsSize, numEntries, (100 * pDexFile->pHeader->classDefsSize) / numEntries, allocSize, totalProbes, maxProbes); return pLookup; } /* * Set up the basic raw data pointers of a DexFile. This function isn't * meant for general use. */ void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { DexHeader *pHeader = (DexHeader*) data; pDexFile->baseAddr = data; pDexFile->pHeader = pHeader; pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); } /* * Parse out an index map entry, advancing "*pData" and reducing "*pSize". */ static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding, u4* pFullCount, u4* pReducedCount, const u2** pMap) { const u4* wordPtr = (const u4*) *pData; u4 size = *pSize; u4 mapCount; if (expanding) { if (size < 4) return false; mapCount = *pReducedCount = *wordPtr++; *pFullCount = (u4) -1; size -= sizeof(u4); } else { if (size < 8) return false; mapCount = *pFullCount = *wordPtr++; *pReducedCount = *wordPtr++; size -= sizeof(u4) * 2; } u4 mapSize = mapCount * sizeof(u2); if (size < mapSize) return false; *pMap = (const u2*) wordPtr; size -= mapSize; /* advance the pointer */ const u1* ptr = (const u1*) wordPtr; ptr += (mapSize + 3) & ~0x3; /* update pass-by-reference values */ *pData = (const u1*) ptr; *pSize = size; return true; } /* * Set up some pointers into the mapped data. * * See analysis/ReduceConstants.c for the data layout description. */ static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size, bool expanding) { if (!parseIndexMapEntry(&data, &size, expanding, &pDexFile->indexMap.classFullCount, &pDexFile->indexMap.classReducedCount, &pDexFile->indexMap.classMap)) { return false; } if (!parseIndexMapEntry(&data, &size, expanding, &pDexFile->indexMap.methodFullCount, &pDexFile->indexMap.methodReducedCount, &pDexFile->indexMap.methodMap)) { return false; } if (!parseIndexMapEntry(&data, &size, expanding, &pDexFile->indexMap.fieldFullCount, &pDexFile->indexMap.fieldReducedCount, &pDexFile->indexMap.fieldMap)) { return false; } if (!parseIndexMapEntry(&data, &size, expanding, &pDexFile->indexMap.stringFullCount, &pDexFile->indexMap.stringReducedCount, &pDexFile->indexMap.stringMap)) { return false; } if (expanding) { /* * The map includes the "reduced" counts; pull the original counts * out of the DexFile so that code has a consistent source. */ assert(pDexFile->indexMap.classFullCount == (u4) -1); assert(pDexFile->indexMap.methodFullCount == (u4) -1); assert(pDexFile->indexMap.fieldFullCount == (u4) -1); assert(pDexFile->indexMap.stringFullCount == (u4) -1); #if 0 // TODO: not available yet -- do later or just skip this pDexFile->indexMap.classFullCount = pDexFile->pHeader->typeIdsSize; pDexFile->indexMap.methodFullCount = pDexFile->pHeader->methodIdsSize; pDexFile->indexMap.fieldFullCount = pDexFile->pHeader->fieldIdsSize; pDexFile->indexMap.stringFullCount = pDexFile->pHeader->stringIdsSize; #endif } LOGI("Class : %u %u %u\n", pDexFile->indexMap.classFullCount, pDexFile->indexMap.classReducedCount, pDexFile->indexMap.classMap[0]); LOGI("Method: %u %u %u\n", pDexFile->indexMap.methodFullCount, pDexFile->indexMap.methodReducedCount, pDexFile->indexMap.methodMap[0]); LOGI("Field : %u %u %u\n", pDexFile->indexMap.fieldFullCount, pDexFile->indexMap.fieldReducedCount, pDexFile->indexMap.fieldMap[0]); LOGI("String: %u %u %u\n", pDexFile->indexMap.stringFullCount, pDexFile->indexMap.stringReducedCount, pDexFile->indexMap.stringMap[0]); return true; } /* * Parse some auxillary data tables. * * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup * table. Subsequent versions switched to the "chunk" format. */ static bool parseAuxData(const u1* data, DexFile* pDexFile) { const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset); u4 indexMapType = 0; /* v1.0 format? */ if (*pAux == 0) { LOGV("+++ found OLD dex format\n"); pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1); return true; } LOGV("+++ found NEW dex format\n"); /* process chunks until we see the end marker */ while (*pAux != kDexChunkEnd) { u4 size = *(pAux+1); u1* data = (u1*) (pAux + 2); switch (*pAux) { case kDexChunkClassLookup: pDexFile->pClassLookup = (const DexClassLookup*) data; break; case kDexChunkReducingIndexMap: LOGI("+++ found reducing index map, size=%u\n", size); if (!parseIndexMap(pDexFile, data, size, false)) { LOGE("Failed parsing reducing index map\n"); return false; } indexMapType = *pAux; break; case kDexChunkExpandingIndexMap: LOGI("+++ found expanding index map, size=%u\n", size); if (!parseIndexMap(pDexFile, data, size, true)) { LOGE("Failed parsing expanding index map\n"); return false; } indexMapType = *pAux; break; case kDexChunkRegisterMaps: LOGV("+++ found register maps, size=%u\n", size); pDexFile->pRegisterMapPool = data; break; default: LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n", *pAux, (char) ((*pAux) >> 24), (char) ((*pAux) >> 16), (char) ((*pAux) >> 8), (char) (*pAux), size); break; } /* * Advance pointer, padding to 64-bit boundary. The extra "+8" is * for the type/size header. */ size = (size + 8 + 7) & ~7; pAux += size / sizeof(u4); } #if 0 // TODO: propagate expected map type from the VM through the API /* * If we're configured to expect an index map, and we don't find one, * reject this DEX so we'll regenerate it. Also, if we found an * "expanding" map but we're not configured to use it, we have to fail * because the constants aren't usable without translation. */ if (indexMapType != expectedIndexMapType) { LOGW("Incompatible index map configuration: found 0x%04x, need %d\n", indexMapType, DVM_REDUCE_CONSTANTS); return false; } #endif return true; } /* * Parse an optimized or unoptimized .dex file sitting in memory. This is * called after the byte-ordering and structure alignment has been fixed up. * * On success, return a newly-allocated DexFile. */ DexFile* dexFileParse(const u1* data, size_t length, int flags) { DexFile* pDexFile = NULL; const DexHeader* pHeader; const u1* magic; int result = -1; if (length < sizeof(DexHeader)) { LOGE("too short to be a valid .dex\n"); goto bail; /* bad file format */ } pDexFile = (DexFile*) malloc(sizeof(DexFile)); if (pDexFile == NULL) goto bail; /* alloc failure */ memset(pDexFile, 0, sizeof(DexFile)); /* * Peel off the optimized header. */ if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { magic = data; if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { LOGE("bad opt version (0x%02x %02x %02x %02x)\n", magic[4], magic[5], magic[6], magic[7]); goto bail; } pDexFile->pOptHeader = (const DexOptHeader*) data; LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); /* locate some auxillary data tables */ if (!parseAuxData(data, pDexFile)) goto bail; /* ignore the opt header and appended data from here on out */ data += pDexFile->pOptHeader->dexOffset; length -= pDexFile->pOptHeader->dexOffset; if (pDexFile->pOptHeader->dexLength > length) { LOGE("File truncated? stored len=%d, rem len=%d\n", pDexFile->pOptHeader->dexLength, (int) length); goto bail; } length = pDexFile->pOptHeader->dexLength; } dexFileSetupBasicPointers(pDexFile, data); pHeader = pDexFile->pHeader; magic = pHeader->magic; if (memcmp(magic, DEX_MAGIC, 4) != 0) { /* not expected */ LOGE("bad magic number (0x%02x %02x %02x %02x)\n", magic[0], magic[1], magic[2], magic[3]); goto bail; } if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { LOGE("bad dex version (0x%02x %02x %02x %02x)\n", magic[4], magic[5], magic[6], magic[7]); goto bail; } /* * Verify the checksum(s). This is reasonably quick, but does require * touching every byte in the DEX file. The base checksum changes after * byte-swapping and DEX optimization. */ if (flags & kDexParseVerifyChecksum) { u4 adler = dexComputeChecksum(pHeader); if (adler != pHeader->checksum) { LOGE("ERROR: bad checksum (%08x vs %08x)\n", adler, pHeader->checksum); if (!(flags & kDexParseContinueOnError)) goto bail; } else { LOGV("+++ adler32 checksum (%08x) verified\n", adler); } const DexOptHeader* pOptHeader = pDexFile->pOptHeader; if (pOptHeader != NULL) { adler = dexComputeOptChecksum(pOptHeader); if (adler != pOptHeader->checksum) { LOGE("ERROR: bad opt checksum (%08x vs %08x)\n", adler, pOptHeader->checksum); if (!(flags & kDexParseContinueOnError)) goto bail; } else { LOGV("+++ adler32 opt checksum (%08x) verified\n", adler); } } } /* * Verify the SHA-1 digest. (Normally we don't want to do this -- * the digest is used to uniquely identify the original DEX file, and * can't be computed for verification after the DEX is byte-swapped * and optimized.) */ if (kVerifySignature) { unsigned char sha1Digest[kSHA1DigestLen]; const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + kSHA1DigestLen; dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { char tmpBuf1[kSHA1DigestOutputLen]; char tmpBuf2[kSHA1DigestOutputLen]; LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", dexSHA1DigestToStr(sha1Digest, tmpBuf1), dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); if (!(flags & kDexParseContinueOnError)) goto bail; } else { LOGV("+++ sha1 digest verified\n"); } } if (pHeader->fileSize != length) { LOGE("ERROR: stored file size (%d) != expected (%d)\n", (int) pHeader->fileSize, (int) length); if (!(flags & kDexParseContinueOnError)) goto bail; } if (pHeader->classDefsSize == 0) { LOGE("ERROR: DEX file has no classes in it, failing\n"); goto bail; } /* * Success! */ result = 0; bail: if (result != 0 && pDexFile != NULL) { dexFileFree(pDexFile); pDexFile = NULL; } return pDexFile; } /* * Free up the DexFile and any associated data structures. * * Note we may be called with a partially-initialized DexFile. */ void dexFileFree(DexFile* pDexFile) { if (pDexFile == NULL) return; free(pDexFile); } /* * Look up a class definition entry by descriptor. * * "descriptor" should look like "Landroid/debug/Stuff;". */ const DexClassDef* dexFindClass(const DexFile* pDexFile, const char* descriptor) { const DexClassLookup* pLookup = pDexFile->pClassLookup; u4 hash; int idx, mask; hash = classDescriptorHash(descriptor); mask = pLookup->numEntries - 1; idx = hash & mask; /* * Search until we find a matching entry or an empty slot. */ while (true) { int offset; offset = pLookup->table[idx].classDescriptorOffset; if (offset == 0) return NULL; if (pLookup->table[idx].classDescriptorHash == hash) { const char* str; str = (const char*) (pDexFile->baseAddr + offset); if (strcmp(str, descriptor) == 0) { return (const DexClassDef*) (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); } } idx = (idx + 1) & mask; } } /* * Compute the DEX file checksum for a memory-mapped DEX file. */ u4 dexComputeChecksum(const DexHeader* pHeader) { const u1* start = (const u1*) pHeader; uLong adler = adler32(0L, Z_NULL, 0); const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); } /* * Compute the checksum on the data appended to the DEX file by dexopt. */ static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader) { const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset; const u1* end = (const u1*) pOptHeader + pOptHeader->auxOffset + pOptHeader->auxLength; uLong adler = adler32(0L, Z_NULL, 0); return (u4) adler32(adler, start, end - start); } /* * Compute the size, in bytes, of a DexCode. */ size_t dexGetDexCodeSize(const DexCode* pCode) { /* * The catch handler data is the last entry. It has a variable number * of variable-size pieces, so we need to create an iterator. */ u4 handlersSize; u4 offset; u4 ui; if (pCode->triesSize != 0) { handlersSize = dexGetHandlersSize(pCode); offset = dexGetFirstHandlerOffset(pCode); } else { handlersSize = 0; offset = 0; } for (ui = 0; ui < handlersSize; ui++) { DexCatchIterator iterator; dexCatchIteratorInit(&iterator, pCode, offset); offset = dexCatchIteratorGetEndOffset(&iterator, pCode); } const u1* handlerData = dexGetCatchHandlerData(pCode); //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n", // pCode, handlerData, offset); /* return the size of the catch handler + everything before it */ return (handlerData - (u1*) pCode) + offset; } /* * =========================================================================== * Debug info * =========================================================================== */ /* * Decode the arguments in a method signature, which looks something * like "(ID[Ljava/lang/String;)V". * * Returns the type signature letter for the next argument, or ')' if * there are no more args. Advances "pSig" to point to the character * after the one returned. */ static char decodeSignature(const char** pSig) { const char* sig = *pSig; if (*sig == '(') sig++; if (*sig == 'L') { /* object ref */ while (*++sig != ';') ; *pSig = sig+1; return 'L'; } if (*sig == '[') { /* array; advance past array type */ while (*++sig == '[') ; if (*sig == 'L') { while (*++sig != ';') ; } *pSig = sig+1; return '['; } if (*sig == '\0') return *sig; /* don't advance further */ *pSig = sig+1; return *sig; } /* * returns the length of a type string, given the start of the * type string. Used for the case where the debug info format * references types that are inside a method type signature. */ static int typeLength (const char *type) { // Assumes any leading '(' has already been gobbled const char *end = type; decodeSignature(&end); return end - type; } /* * Reads a string index as encoded for the debug info format, * returning a string pointer or NULL as appropriate. */ static const char* readStringIdx(const DexFile* pDexFile, const u1** pStream) { u4 stringIdx = readUnsignedLeb128(pStream); // Remember, encoded string indicies have 1 added to them. if (stringIdx == 0) { return NULL; } else { return dexStringById(pDexFile, stringIdx - 1); } } /* * Reads a type index as encoded for the debug info format, returning * a string pointer for its descriptor or NULL as appropriate. */ static const char* readTypeIdx(const DexFile* pDexFile, const u1** pStream) { u4 typeIdx = readUnsignedLeb128(pStream); // Remember, encoded type indicies have 1 added to them. if (typeIdx == 0) { return NULL; } else { return dexStringByTypeIdx(pDexFile, typeIdx - 1); } } /* access_flag value indicating that a method is static */ #define ACC_STATIC 0x0008 typedef struct LocalInfo { const char *name; const char *descriptor; const char *signature; u2 startAddress; bool live; } LocalInfo; static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress, LocalInfo *localInReg, DexDebugNewLocalCb localCb) { if (localCb != NULL && localInReg[reg].live) { localCb(cnxt, reg, localInReg[reg].startAddress, endAddress, localInReg[reg].name, localInReg[reg].descriptor, localInReg[reg].signature == NULL ? "" : localInReg[reg].signature ); } } // TODO optimize localCb == NULL case void dexDecodeDebugInfo( const DexFile* pDexFile, const DexCode* pCode, const char* classDescriptor, u4 protoIdx, u4 accessFlags, DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, void* cnxt) { const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode); u4 line; u4 parametersSize; u4 address = 0; LocalInfo localInReg[pCode->registersSize]; u4 insnsSize = pCode->insnsSize; DexProto proto = { pDexFile, protoIdx }; memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize); if (stream == NULL) { goto end; } line = readUnsignedLeb128(&stream); parametersSize = readUnsignedLeb128(&stream); u2 argReg = pCode->registersSize - pCode->insSize; if ((accessFlags & ACC_STATIC) == 0) { /* * The code is an instance method, which means that there is * an initial this parameter. Also, the proto list should * contain exactly one fewer argument word than the insSize * indicates. */ assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1)); localInReg[argReg].name = "this"; localInReg[argReg].descriptor = classDescriptor; localInReg[argReg].startAddress = 0; localInReg[argReg].live = true; argReg++; } else { assert(pCode->insSize == dexProtoComputeArgsSize(&proto)); } DexParameterIterator iterator; dexParameterIteratorInit(&iterator, &proto); while (parametersSize-- != 0) { const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); const char *name; int reg; if ((argReg >= pCode->registersSize) || (descriptor == NULL)) { goto invalid_stream; } name = readStringIdx(pDexFile, &stream); reg = argReg; switch (descriptor[0]) { case 'D': case 'J': argReg += 2; break; default: argReg += 1; break; } if (name != NULL) { localInReg[reg].name = name; localInReg[reg].descriptor = descriptor; localInReg[reg].signature = NULL; localInReg[reg].startAddress = address; localInReg[reg].live = true; } } for (;;) { u1 opcode = *stream++; u2 reg; switch (opcode) { case DBG_END_SEQUENCE: goto end; case DBG_ADVANCE_PC: address += readUnsignedLeb128(&stream); break; case DBG_ADVANCE_LINE: line += readSignedLeb128(&stream); break; case DBG_START_LOCAL: case DBG_START_LOCAL_EXTENDED: reg = readUnsignedLeb128(&stream); if (reg > pCode->registersSize) goto invalid_stream; // Emit what was previously there, if anything emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); localInReg[reg].name = readStringIdx(pDexFile, &stream); localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream); if (opcode == DBG_START_LOCAL_EXTENDED) { localInReg[reg].signature = readStringIdx(pDexFile, &stream); } else { localInReg[reg].signature = NULL; } localInReg[reg].startAddress = address; localInReg[reg].live = true; break; case DBG_END_LOCAL: reg = readUnsignedLeb128(&stream); if (reg > pCode->registersSize) goto invalid_stream; emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); localInReg[reg].live = false; break; case DBG_RESTART_LOCAL: reg = readUnsignedLeb128(&stream); if (reg > pCode->registersSize) goto invalid_stream; if (localInReg[reg].name == NULL || localInReg[reg].descriptor == NULL) { goto invalid_stream; } /* * If the register is live, the "restart" is superfluous, * and we don't want to mess with the existing start address. */ if (!localInReg[reg].live) { localInReg[reg].startAddress = address; localInReg[reg].live = true; } break; case DBG_SET_PROLOGUE_END: case DBG_SET_EPILOGUE_BEGIN: case DBG_SET_FILE: break; default: { int adjopcode = opcode - DBG_FIRST_SPECIAL; address += adjopcode / DBG_LINE_RANGE; line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE); if (posCb != NULL) { int done; done = posCb(cnxt, address, line); if (done) { // early exit goto end; } } break; } } } end: { int reg; for (reg = 0; reg < pCode->registersSize; reg++) { emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb); } } return; invalid_stream: IF_LOGE() { char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto); LOGE("Invalid debug info stream. class %s; proto %s", classDescriptor, methodDescriptor); free(methodDescriptor); } }