/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Read-only access to Zip archives, with minimal heap allocation. */ #include "ZipArchive.h" #include <zlib.h> #include <stdlib.h> #include <string.h> #include <fcntl.h> #include <errno.h> /* * Zip file constants. */ #define kEOCDSignature 0x06054b50 #define kEOCDLen 22 #define kEOCDNumEntries 8 // offset to #of entries in file #define kEOCDFileOffset 16 // offset to central directory #define kMaxCommentLen 65535 // longest possible in ushort #define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) #define kLFHSignature 0x04034b50 #define kLFHLen 30 // excluding variable-len fields #define kLFHNameLen 26 // offset to filename length #define kLFHExtraLen 28 // offset to extra length #define kCDESignature 0x02014b50 #define kCDELen 46 // excluding variable-len fields #define kCDEMethod 10 // offset to compression method #define kCDEModWhen 12 // offset to modification timestamp #define kCDECRC 16 // offset to entry CRC #define kCDECompLen 20 // offset to compressed length #define kCDEUncompLen 24 // offset to uncompressed length #define kCDENameLen 28 // offset to filename length #define kCDEExtraLen 30 // offset to extra length #define kCDECommentLen 32 // offset to comment length #define kCDELocalOffset 42 // offset to local hdr /* * The values we return for ZipEntry use 0 as an invalid value, so we * want to adjust the hash table index by a fixed amount. Using a large * value helps insure that people don't mix & match arguments, e.g. with * entry indices. */ #define kZipEntryAdj 10000 /* * Convert a ZipEntry to a hash table index, verifying that it's in a * valid range. */ static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry) { long ent = ((long) entry) - kZipEntryAdj; if (ent < 0 || ent >= pArchive->mHashTableSize || pArchive->mHashTable[ent].name == NULL) { LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent); return -1; } return ent; } /* * Simple string hash function for non-null-terminated strings. */ static unsigned int computeHash(const char* str, int len) { unsigned int hash = 0; while (len--) hash = hash * 31 + *str++; return hash; } /* * Add a new entry to the hash table. */ static void addToHash(ZipArchive* pArchive, const char* str, int strLen, unsigned int hash) { const int hashTableSize = pArchive->mHashTableSize; int ent = hash & (hashTableSize - 1); /* * We over-allocated the table, so we're guaranteed to find an empty slot. */ while (pArchive->mHashTable[ent].name != NULL) ent = (ent + 1) & (hashTableSize-1); pArchive->mHashTable[ent].name = str; pArchive->mHashTable[ent].nameLen = strLen; } /* * Get 2 little-endian bytes. */ static u2 get2LE(unsigned char const* pSrc) { return pSrc[0] | (pSrc[1] << 8); } /* * Get 4 little-endian bytes. */ static u4 get4LE(unsigned char const* pSrc) { u4 result; result = pSrc[0]; result |= pSrc[1] << 8; result |= pSrc[2] << 16; result |= pSrc[3] << 24; return result; } /* * Parse the Zip archive, verifying its contents and initializing internal * data structures. */ static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap) { #define CHECK_OFFSET(_off) { \ if ((unsigned int) (_off) >= maxOffset) { \ LOGE("ERROR: bad offset %u (max %d): %s\n", \ (unsigned int) (_off), maxOffset, #_off); \ goto bail; \ } \ } bool result = false; const unsigned char* basePtr = (const unsigned char*)pMap->addr; const unsigned char* ptr; size_t length = pMap->length; unsigned int i, numEntries, cdOffset; unsigned int val; /* * The first 4 bytes of the file will either be the local header * signature for the first file (kLFHSignature) or, if the archive doesn't * have any files in it, the end-of-central-directory signature * (kEOCDSignature). */ val = get4LE(basePtr); if (val == kEOCDSignature) { LOGI("Found Zip archive, but it looks empty\n"); goto bail; } else if (val != kLFHSignature) { LOGV("Not a Zip archive (found 0x%08x)\n", val); goto bail; } /* * Find the EOCD. We'll find it immediately unless they have a file * comment. */ ptr = basePtr + length - kEOCDLen; while (ptr >= basePtr) { if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature) break; ptr--; } if (ptr < basePtr) { LOGI("Could not find end-of-central-directory in Zip\n"); goto bail; } /* * There are two interesting items in the EOCD block: the number of * entries in the file, and the file offset of the start of the * central directory. * * (There's actually a count of the #of entries in this file, and for * all files which comprise a spanned archive, but for our purposes * we're only interested in the current file. Besides, we expect the * two to be equivalent for our stuff.) */ numEntries = get2LE(ptr + kEOCDNumEntries); cdOffset = get4LE(ptr + kEOCDFileOffset); /* valid offsets are [0,EOCD] */ unsigned int maxOffset; maxOffset = (ptr - basePtr) +1; LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset); if (numEntries == 0 || cdOffset >= length) { LOGW("Invalid entries=%d offset=%d (len=%zd)\n", numEntries, cdOffset, length); goto bail; } /* * Create hash table. We have a minimum 75% load factor, possibly as * low as 50% after we round off to a power of 2. There must be at * least one unused entry to avoid an infinite loop during creation. */ pArchive->mNumEntries = numEntries; pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3); pArchive->mHashTable = (ZipHashEntry*) calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry)); /* * Walk through the central directory, adding entries to the hash * table. */ ptr = basePtr + cdOffset; for (i = 0; i < numEntries; i++) { unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; const unsigned char* localHdr; unsigned int hash; if (get4LE(ptr) != kCDESignature) { LOGW("Missed a central dir sig (at %d)\n", i); goto bail; } if (ptr + kCDELen > basePtr + length) { LOGW("Ran off the end (at %d)\n", i); goto bail; } localHdrOffset = get4LE(ptr + kCDELocalOffset); CHECK_OFFSET(localHdrOffset); fileNameLen = get2LE(ptr + kCDENameLen); extraLen = get2LE(ptr + kCDEExtraLen); commentLen = get2LE(ptr + kCDECommentLen); //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n", // i, localHdrOffset, fileNameLen, extraLen, commentLen); //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen); /* add the CDE filename to the hash table */ hash = computeHash((const char*)ptr + kCDELen, fileNameLen); addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash); localHdr = basePtr + localHdrOffset; if (get4LE(localHdr) != kLFHSignature) { LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i); goto bail; } ptr += kCDELen + fileNameLen + extraLen + commentLen; CHECK_OFFSET(ptr - basePtr); } result = true; bail: return result; #undef CHECK_OFFSET } /* * Open the specified file read-only. We memory-map the entire thing and * parse the contents. * * This will be called on non-Zip files, especially during VM startup, so * we don't want to be too noisy about certain types of failure. (Do * we want a "quiet" flag?) * * On success, we fill out the contents of "pArchive" and return 0. */ int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive) { int fd, err; LOGV("Opening archive '%s' %p\n", fileName, pArchive); fd = open(fileName, O_RDONLY, 0); if (fd < 0) { err = errno ? errno : -1; LOGV("Unable to open '%s': %s\n", fileName, strerror(err)); return err; } return dexZipPrepArchive(fd, fileName, pArchive); } /* * Prepare to access a ZipArchive in an open file descriptor. */ int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive) { MemMapping map; int err; map.addr = NULL; memset(pArchive, 0, sizeof(*pArchive)); pArchive->mFd = fd; if (sysMapFileInShmem(pArchive->mFd, &map) != 0) { err = -1; LOGW("Map of '%s' failed\n", debugFileName); goto bail; } if (map.length < kEOCDLen) { err = -1; LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length); goto bail; } if (!parseZipArchive(pArchive, &map)) { err = -1; LOGV("Parsing '%s' failed\n", debugFileName); goto bail; } /* success */ err = 0; sysCopyMap(&pArchive->mMap, &map); map.addr = NULL; bail: if (err != 0) dexZipCloseArchive(pArchive); if (map.addr != NULL) sysReleaseShmem(&map); return err; } /* * Close a ZipArchive, closing the file and freeing the contents. * * NOTE: the ZipArchive may not have been fully created. */ void dexZipCloseArchive(ZipArchive* pArchive) { LOGV("Closing archive %p\n", pArchive); if (pArchive->mFd >= 0) close(pArchive->mFd); sysReleaseShmem(&pArchive->mMap); free(pArchive->mHashTable); pArchive->mFd = -1; pArchive->mNumEntries = -1; pArchive->mHashTableSize = -1; pArchive->mHashTable = NULL; } /* * Find a matching entry. * * Returns 0 if not found. */ ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName) { int nameLen = strlen(entryName); unsigned int hash = computeHash(entryName, nameLen); const int hashTableSize = pArchive->mHashTableSize; int ent = hash & (hashTableSize-1); while (pArchive->mHashTable[ent].name != NULL) { if (pArchive->mHashTable[ent].nameLen == nameLen && memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0) { /* match */ return (ZipEntry) (ent + kZipEntryAdj); } ent = (ent + 1) & (hashTableSize-1); } return NULL; } #if 0 /* * Find the Nth entry. * * This currently involves walking through the sparse hash table, counting * non-empty entries. If we need to speed this up we can either allocate * a parallel lookup table or (perhaps better) provide an iterator interface. */ ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx) { if (idx < 0 || idx >= pArchive->mNumEntries) { LOGW("Invalid index %d\n", idx); return NULL; } int ent; for (ent = 0; ent < pArchive->mHashTableSize; ent++) { if (pArchive->mHashTable[ent].name != NULL) { if (idx-- == 0) return (ZipEntry) (ent + kZipEntryAdj); } } return NULL; } #endif /* * Get the useful fields from the zip entry. * * Returns "false" if the offsets to the fields or the contents of the fields * appear to be bogus. */ bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry, int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) { int ent = entryToIndex(pArchive, entry); if (ent < 0) return false; /* * Recover the start of the central directory entry from the filename * pointer. */ const unsigned char* basePtr = (const unsigned char*) pArchive->mMap.addr; const unsigned char* ptr = (const unsigned char*) pArchive->mHashTable[ent].name; size_t zipLength = pArchive->mMap.length; ptr -= kCDELen; int method = get2LE(ptr + kCDEMethod); if (pMethod != NULL) *pMethod = method; if (pModWhen != NULL) *pModWhen = get4LE(ptr + kCDEModWhen); if (pCrc32 != NULL) *pCrc32 = get4LE(ptr + kCDECRC); /* * We need to make sure that the lengths are not so large that somebody * trying to map the compressed or uncompressed data runs off the end * of the mapped region. */ unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset); if (localHdrOffset + kLFHLen >= zipLength) { LOGE("ERROR: bad local hdr offset in zip\n"); return false; } const unsigned char* localHdr = basePtr + localHdrOffset; off_t dataOffset = localHdrOffset + kLFHLen + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen); if ((unsigned long) dataOffset >= zipLength) { LOGE("ERROR: bad data offset in zip\n"); return false; } if (pCompLen != NULL) { *pCompLen = get4LE(ptr + kCDECompLen); if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) { LOGE("ERROR: bad compressed length in zip\n"); return false; } } if (pUncompLen != NULL) { *pUncompLen = get4LE(ptr + kCDEUncompLen); if (*pUncompLen < 0) { LOGE("ERROR: negative uncompressed length in zip\n"); return false; } if (method == kCompressStored && (size_t)(dataOffset + *pUncompLen) >= zipLength) { LOGE("ERROR: bad uncompressed length in zip\n"); return false; } } if (pOffset != NULL) { *pOffset = dataOffset; } return true; } /* * Uncompress "deflate" data from one buffer to an open file descriptor. */ static bool inflateToFile(int fd, const void* inBuf, long uncompLen, long compLen) { bool result = false; const int kWriteBufSize = 32768; unsigned char writeBuf[kWriteBufSize]; z_stream zstream; int zerr; /* * Initialize the zlib stream struct. */ memset(&zstream, 0, sizeof(zstream)); zstream.zalloc = Z_NULL; zstream.zfree = Z_NULL; zstream.opaque = Z_NULL; zstream.next_in = (Bytef*)inBuf; zstream.avail_in = compLen; zstream.next_out = (Bytef*) writeBuf; zstream.avail_out = sizeof(writeBuf); zstream.data_type = Z_UNKNOWN; /* * Use the undocumented "negative window bits" feature to tell zlib * that there's no zlib header waiting for it. */ zerr = inflateInit2(&zstream, -MAX_WBITS); if (zerr != Z_OK) { if (zerr == Z_VERSION_ERROR) { LOGE("Installed zlib is not compatible with linked version (%s)\n", ZLIB_VERSION); } else { LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); } goto bail; } /* * Loop while we have more to do. */ do { /* * Expand data. */ zerr = inflate(&zstream, Z_NO_FLUSH); if (zerr != Z_OK && zerr != Z_STREAM_END) { LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", zerr, zstream.next_in, zstream.avail_in, zstream.next_out, zstream.avail_out); goto z_bail; } /* write when we're full or when we're done */ if (zstream.avail_out == 0 || (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) { long writeSize = zstream.next_out - writeBuf; int cc = write(fd, writeBuf, writeSize); if (cc != (int) writeSize) { if (cc < 0) { LOGW("write failed in inflate: %s\n", strerror(errno)); } else { LOGW("partial write in inflate (%d vs %ld)\n", cc, writeSize); } goto z_bail; } zstream.next_out = writeBuf; zstream.avail_out = sizeof(writeBuf); } } while (zerr == Z_OK); assert(zerr == Z_STREAM_END); /* other errors should've been caught */ /* paranoia */ if ((long) zstream.total_out != uncompLen) { LOGW("Size mismatch on inflated file (%ld vs %ld)\n", zstream.total_out, uncompLen); goto z_bail; } result = true; z_bail: inflateEnd(&zstream); /* free up any allocated structures */ bail: return result; } /* * Uncompress an entry, in its entirety, to an open file descriptor. * * TODO: this doesn't verify the data's CRC, but probably should (especially * for uncompressed data). */ bool dexZipExtractEntryToFile(const ZipArchive* pArchive, const ZipEntry entry, int fd) { bool result = false; int ent = entryToIndex(pArchive, entry); if (ent < 0) return -1; const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr; int method; long uncompLen, compLen; off_t offset; if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen, &offset, NULL, NULL)) { goto bail; } if (method == kCompressStored) { ssize_t actual; actual = write(fd, basePtr + offset, uncompLen); if (actual < 0) { LOGE("Write failed: %s\n", strerror(errno)); goto bail; } else if (actual != uncompLen) { LOGE("Partial write during uncompress (%d of %ld)\n", (int) actual, uncompLen); goto bail; } else { LOGI("+++ successful write\n"); } } else { if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen)) goto bail; } result = true; bail: return result; }