/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Read-only access to Zip archives, with minimal heap allocation.
*/
#include "ZipArchive.h"
#include <zlib.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
/*
* Zip file constants.
*/
#define kEOCDSignature 0x06054b50
#define kEOCDLen 22
#define kEOCDNumEntries 8 // offset to #of entries in file
#define kEOCDSize 12 // size of the central directory
#define kEOCDFileOffset 16 // offset to central directory
#define kMaxCommentLen 65535 // longest possible in ushort
#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen)
#define kLFHSignature 0x04034b50
#define kLFHLen 30 // excluding variable-len fields
#define kLFHNameLen 26 // offset to filename length
#define kLFHExtraLen 28 // offset to extra length
#define kCDESignature 0x02014b50
#define kCDELen 46 // excluding variable-len fields
#define kCDEMethod 10 // offset to compression method
#define kCDEModWhen 12 // offset to modification timestamp
#define kCDECRC 16 // offset to entry CRC
#define kCDECompLen 20 // offset to compressed length
#define kCDEUncompLen 24 // offset to uncompressed length
#define kCDENameLen 28 // offset to filename length
#define kCDEExtraLen 30 // offset to extra length
#define kCDECommentLen 32 // offset to comment length
#define kCDELocalOffset 42 // offset to local hdr
/*
* The values we return for ZipEntry use 0 as an invalid value, so we
* want to adjust the hash table index by a fixed amount. Using a large
* value helps insure that people don't mix & match arguments, e.g. with
* entry indices.
*/
#define kZipEntryAdj 10000
/*
* Convert a ZipEntry to a hash table index, verifying that it's in a
* valid range.
*/
static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
{
long ent = ((long) entry) - kZipEntryAdj;
if (ent < 0 || ent >= pArchive->mHashTableSize ||
pArchive->mHashTable[ent].name == NULL)
{
LOGW("Zip: invalid ZipEntry %p (%ld)\n", entry, ent);
return -1;
}
return ent;
}
/*
* Simple string hash function for non-null-terminated strings.
*/
static unsigned int computeHash(const char* str, int len)
{
unsigned int hash = 0;
while (len--)
hash = hash * 31 + *str++;
return hash;
}
/*
* Add a new entry to the hash table.
*/
static void addToHash(ZipArchive* pArchive, const char* str, int strLen,
unsigned int hash)
{
const int hashTableSize = pArchive->mHashTableSize;
int ent = hash & (hashTableSize - 1);
/*
* We over-allocated the table, so we're guaranteed to find an empty slot.
*/
while (pArchive->mHashTable[ent].name != NULL)
ent = (ent + 1) & (hashTableSize-1);
pArchive->mHashTable[ent].name = str;
pArchive->mHashTable[ent].nameLen = strLen;
}
/*
* Get 2 little-endian bytes.
*/
static u2 get2LE(unsigned char const* pSrc)
{
return pSrc[0] | (pSrc[1] << 8);
}
/*
* Get 4 little-endian bytes.
*/
static u4 get4LE(unsigned char const* pSrc)
{
u4 result;
result = pSrc[0];
result |= pSrc[1] << 8;
result |= pSrc[2] << 16;
result |= pSrc[3] << 24;
return result;
}
/*
* Find the zip Central Directory and memory-map it.
*
* On success, returns 0 after populating fields from the EOCD area:
* mDirectoryOffset
* mDirectoryMap
* mNumEntries
*/
static int mapCentralDirectory(int fd, const char* debugFileName,
ZipArchive* pArchive)
{
u1* scanBuf = NULL;
int result = -1;
/*
* Get and test file length.
*/
off_t fileLength = lseek(fd, 0, SEEK_END);
if (fileLength < kEOCDLen) {
LOGV("Zip: length %ld is too small to be zip\n", (long) fileLength);
goto bail;
}
/*
* Perform the traditional EOCD snipe hunt.
*
* We're searching for the End of Central Directory magic number,
* which appears at the start of the EOCD block. It's followed by
* 18 bytes of EOCD stuff and up to 64KB of archive comment. We
* need to read the last part of the file into a buffer, dig through
* it to find the magic number, parse some values out, and use those
* to determine the extent of the CD.
*
* We start by pulling in the last part of the file.
*/
size_t readAmount = kMaxEOCDSearch;
if (readAmount > (size_t) fileLength)
readAmount = fileLength;
off_t searchStart = fileLength - readAmount;
scanBuf = (u1*) malloc(readAmount);
if (lseek(fd, searchStart, SEEK_SET) != searchStart) {
LOGW("Zip: seek %ld failed: %s\n", (long) searchStart, strerror(errno));
goto bail;
}
ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scanBuf, readAmount));
if (actual != (ssize_t) readAmount) {
LOGW("Zip: read %zd failed: %s\n", readAmount, strerror(errno));
goto bail;
}
/*
* Scan backward for the EOCD magic. In an archive without a trailing
* comment, we'll find it on the first try. (We may want to consider
* doing an initial minimal read; if we don't find it, retry with a
* second read as above.)
*/
int i;
for (i = readAmount - kEOCDLen; i >= 0; i--) {
if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
LOGV("+++ Found EOCD at buf+%d\n", i);
break;
}
}
if (i < 0) {
LOGD("Zip: EOCD not found, %s is not zip\n", debugFileName);
goto bail;
}
off_t eocdOffset = searchStart + i;
const u1* eocdPtr = scanBuf + i;
assert(eocdOffset < fileLength);
/*
* Grab the CD offset and size, and the number of entries in the
* archive. Verify that they look reasonable.
*/
u4 numEntries = get2LE(eocdPtr + kEOCDNumEntries);
u4 dirSize = get4LE(eocdPtr + kEOCDSize);
u4 dirOffset = get4LE(eocdPtr + kEOCDFileOffset);
if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) {
LOGW("Zip: bad offsets (dir %ld, size %u, eocd %ld)\n",
(long) dirOffset, dirSize, (long) eocdOffset);
goto bail;
}
if (numEntries == 0) {
LOGW("Zip: empty archive?\n");
goto bail;
}
LOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n",
numEntries, dirSize, dirOffset);
/*
* It all looks good. Create a mapping for the CD, and set the fields
* in pArchive.
*/
if (sysMapFileSegmentInShmem(fd, dirOffset, dirSize,
&pArchive->mDirectoryMap) != 0)
{
LOGW("Zip: cd map failed\n");
goto bail;
}
pArchive->mNumEntries = numEntries;
pArchive->mDirectoryOffset = dirOffset;
result = 0;
bail:
free(scanBuf);
return result;
}
/*
* Parses the Zip archive's Central Directory. Allocates and populates the
* hash table.
*
* Returns 0 on success.
*/
static int parseZipArchive(ZipArchive* pArchive)
{
int result = -1;
const u1* cdPtr = (const u1*)pArchive->mDirectoryMap.addr;
size_t cdLength = pArchive->mDirectoryMap.length;
int numEntries = pArchive->mNumEntries;
/*
* Create hash table. We have a minimum 75% load factor, possibly as
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
pArchive->mHashTable = (ZipHashEntry*)
calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
/*
* Walk through the central directory, adding entries to the hash
* table and verifying values.
*/
const u1* ptr = cdPtr;
int i;
for (i = 0; i < numEntries; i++) {
if (get4LE(ptr) != kCDESignature) {
LOGW("Zip: missed a central dir sig (at %d)\n", i);
goto bail;
}
if (ptr + kCDELen > cdPtr + cdLength) {
LOGW("Zip: ran off the end (at %d)\n", i);
goto bail;
}
long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
if (localHdrOffset >= pArchive->mDirectoryOffset) {
LOGW("Zip: bad LFH offset %ld at entry %d\n", localHdrOffset, i);
goto bail;
}
unsigned int fileNameLen, extraLen, commentLen, hash;
fileNameLen = get2LE(ptr + kCDENameLen);
extraLen = get2LE(ptr + kCDEExtraLen);
commentLen = get2LE(ptr + kCDECommentLen);
/* add the CDE filename to the hash table */
hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
ptr += kCDELen + fileNameLen + extraLen + commentLen;
if ((size_t)(ptr - cdPtr) > cdLength) {
LOGW("Zip: bad CD advance (%d vs %zd) at entry %d\n",
(int) (ptr - cdPtr), cdLength, i);
goto bail;
}
}
LOGV("+++ zip good scan %d entries\n", numEntries);
result = 0;
bail:
return result;
}
/*
* Open the specified file read-only. We examine the contents and verify
* that it appears to be a valid zip file.
*
* This will be called on non-Zip files, especially during VM startup, so
* we don't want to be too noisy about certain types of failure. (Do
* we want a "quiet" flag?)
*
* On success, we fill out the contents of "pArchive" and return 0. On
* failure we return the errno value.
*/
int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
{
int fd, err;
LOGV("Opening as zip '%s' %p\n", fileName, pArchive);
memset(pArchive, 0, sizeof(ZipArchive));
fd = open(fileName, O_RDONLY, 0);
if (fd < 0) {
err = errno ? errno : -1;
LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
return err;
}
return dexZipPrepArchive(fd, fileName, pArchive);
}
/*
* Prepare to access a ZipArchive through an open file descriptor.
*
* On success, we fill out the contents of "pArchive" and return 0.
*/
int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
{
int result = -1;
memset(pArchive, 0, sizeof(*pArchive));
pArchive->mFd = fd;
if (mapCentralDirectory(fd, debugFileName, pArchive) != 0)
goto bail;
if (parseZipArchive(pArchive) != 0) {
LOGV("Zip: parsing '%s' failed\n", debugFileName);
goto bail;
}
/* success */
result = 0;
bail:
if (result != 0)
dexZipCloseArchive(pArchive);
return result;
}
/*
* Close a ZipArchive, closing the file and freeing the contents.
*
* NOTE: the ZipArchive may not have been fully created.
*/
void dexZipCloseArchive(ZipArchive* pArchive)
{
LOGV("Closing archive %p\n", pArchive);
if (pArchive->mFd >= 0)
close(pArchive->mFd);
sysReleaseShmem(&pArchive->mDirectoryMap);
free(pArchive->mHashTable);
/* ensure nobody tries to use the ZipArchive after it's closed */
pArchive->mDirectoryOffset = -1;
pArchive->mFd = -1;
pArchive->mNumEntries = -1;
pArchive->mHashTableSize = -1;
pArchive->mHashTable = NULL;
}
/*
* Find a matching entry.
*
* Returns 0 if not found.
*/
ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
{
int nameLen = strlen(entryName);
unsigned int hash = computeHash(entryName, nameLen);
const int hashTableSize = pArchive->mHashTableSize;
int ent = hash & (hashTableSize-1);
while (pArchive->mHashTable[ent].name != NULL) {
if (pArchive->mHashTable[ent].nameLen == nameLen &&
memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
{
/* match */
return (ZipEntry)(long)(ent + kZipEntryAdj);
}
ent = (ent + 1) & (hashTableSize-1);
}
return NULL;
}
#if 0
/*
* Find the Nth entry.
*
* This currently involves walking through the sparse hash table, counting
* non-empty entries. If we need to speed this up we can either allocate
* a parallel lookup table or (perhaps better) provide an iterator interface.
*/
ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
{
if (idx < 0 || idx >= pArchive->mNumEntries) {
LOGW("Invalid index %d\n", idx);
return NULL;
}
int ent;
for (ent = 0; ent < pArchive->mHashTableSize; ent++) {
if (pArchive->mHashTable[ent].name != NULL) {
if (idx-- == 0)
return (ZipEntry) (ent + kZipEntryAdj);
}
}
return NULL;
}
#endif
/*
* Get the useful fields from the zip entry.
*
* Returns non-zero if the contents of the fields (particularly the data
* offset) appear to be bogus.
*/
int dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
int* pMethod, size_t* pUncompLen, size_t* pCompLen, off_t* pOffset,
long* pModWhen, long* pCrc32)
{
int ent = entryToIndex(pArchive, entry);
if (ent < 0)
return -1;
/*
* Recover the start of the central directory entry from the filename
* pointer. The filename is the first entry past the fixed-size data,
* so we can just subtract back from that.
*/
const unsigned char* basePtr = (const unsigned char*)
pArchive->mDirectoryMap.addr;
const unsigned char* ptr = (const unsigned char*)
pArchive->mHashTable[ent].name;
off_t cdOffset = pArchive->mDirectoryOffset;
ptr -= kCDELen;
int method = get2LE(ptr + kCDEMethod);
if (pMethod != NULL)
*pMethod = method;
if (pModWhen != NULL)
*pModWhen = get4LE(ptr + kCDEModWhen);
if (pCrc32 != NULL)
*pCrc32 = get4LE(ptr + kCDECRC);
size_t compLen = get4LE(ptr + kCDECompLen);
if (pCompLen != NULL)
*pCompLen = compLen;
size_t uncompLen = get4LE(ptr + kCDEUncompLen);
if (pUncompLen != NULL)
*pUncompLen = uncompLen;
/*
* If requested, determine the offset of the start of the data. All we
* have is the offset to the Local File Header, which is variable size,
* so we have to read the contents of the struct to figure out where
* the actual data starts.
*
* We also need to make sure that the lengths are not so large that
* somebody trying to map the compressed or uncompressed data runs
* off the end of the mapped region.
*
* Note we don't verify compLen/uncompLen if they don't request the
* dataOffset, because dataOffset is expensive to determine. However,
* if they don't have the file offset, they're not likely to be doing
* anything with the contents.
*/
if (pOffset != NULL) {
long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
if (localHdrOffset + kLFHLen >= cdOffset) {
LOGW("Zip: bad local hdr offset in zip\n");
return -1;
}
u1 lfhBuf[kLFHLen];
if (lseek(pArchive->mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
LOGW("Zip: failed seeking to lfh at offset %ld\n", localHdrOffset);
return -1;
}
ssize_t actual =
TEMP_FAILURE_RETRY(read(pArchive->mFd, lfhBuf, sizeof(lfhBuf)));
if (actual != sizeof(lfhBuf)) {
LOGW("Zip: failed reading lfh from offset %ld\n", localHdrOffset);
return -1;
}
if (get4LE(lfhBuf) != kLFHSignature) {
LOGW("Zip: didn't find signature at start of lfh, offset=%ld\n",
localHdrOffset);
return -1;
}
off_t dataOffset = localHdrOffset + kLFHLen
+ get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
if (dataOffset >= cdOffset) {
LOGW("Zip: bad data offset %ld in zip\n", (long) dataOffset);
return -1;
}
/* check lengths */
if ((off_t)(dataOffset + compLen) > cdOffset) {
LOGW("Zip: bad compressed length in zip (%ld + %zd > %ld)\n",
(long) dataOffset, compLen, (long) cdOffset);
return -1;
}
if (method == kCompressStored &&
(off_t)(dataOffset + uncompLen) > cdOffset)
{
LOGW("Zip: bad uncompressed length in zip (%ld + %zd > %ld)\n",
(long) dataOffset, uncompLen, (long) cdOffset);
return -1;
}
*pOffset = dataOffset;
}
return 0;
}
/*
* Uncompress "deflate" data from the archive's file to an open file
* descriptor.
*/
static int inflateToFile(int inFd, int outFd, size_t uncompLen, size_t compLen)
{
int result = -1;
const size_t kBufSize = 32768;
unsigned char* readBuf = (unsigned char*) malloc(kBufSize);
unsigned char* writeBuf = (unsigned char*) malloc(kBufSize);
z_stream zstream;
int zerr;
if (readBuf == NULL || writeBuf == NULL)
goto bail;
/*
* Initialize the zlib stream struct.
*/
memset(&zstream, 0, sizeof(zstream));
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
zstream.next_in = NULL;
zstream.avail_in = 0;
zstream.next_out = (Bytef*) writeBuf;
zstream.avail_out = kBufSize;
zstream.data_type = Z_UNKNOWN;
/*
* Use the undocumented "negative window bits" feature to tell zlib
* that there's no zlib header waiting for it.
*/
zerr = inflateInit2(&zstream, -MAX_WBITS);
if (zerr != Z_OK) {
if (zerr == Z_VERSION_ERROR) {
LOGE("Installed zlib is not compatible with linked version (%s)\n",
ZLIB_VERSION);
} else {
LOGW("Call to inflateInit2 failed (zerr=%d)\n", zerr);
}
goto bail;
}
/*
* Loop while we have more to do.
*/
do {
/* read as much as we can */
if (zstream.avail_in == 0) {
size_t getSize = (compLen > kBufSize) ? kBufSize : compLen;
ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, readBuf, getSize));
if (actual != (ssize_t) getSize) {
LOGW("Zip: inflate read failed (%d vs %zd)\n",
(int)actual, getSize);
goto z_bail;
}
compLen -= getSize;
zstream.next_in = readBuf;
zstream.avail_in = getSize;
}
/* uncompress the data */
zerr = inflate(&zstream, Z_NO_FLUSH);
if (zerr != Z_OK && zerr != Z_STREAM_END) {
LOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
zerr, zstream.next_in, zstream.avail_in,
zstream.next_out, zstream.avail_out);
goto z_bail;
}
/* write when we're full or when we're done */
if (zstream.avail_out == 0 ||
(zerr == Z_STREAM_END && zstream.avail_out != kBufSize))
{
size_t writeSize = zstream.next_out - writeBuf;
if (sysWriteFully(outFd, writeBuf, writeSize, "Zip inflate") != 0)
goto z_bail;
zstream.next_out = writeBuf;
zstream.avail_out = kBufSize;
}
} while (zerr == Z_OK);
assert(zerr == Z_STREAM_END); /* other errors should've been caught */
/* paranoia */
if (zstream.total_out != uncompLen) {
LOGW("Zip: size mismatch on inflated file (%ld vs %zd)\n",
zstream.total_out, uncompLen);
goto z_bail;
}
result = 0;
z_bail:
inflateEnd(&zstream); /* free up any allocated structures */
bail:
free(readBuf);
free(writeBuf);
return result;
}
/*
* Copy bytes from input to output.
*/
static int copyFileToFile(int inFd, int outFd, size_t uncompLen)
{
const size_t kBufSize = 32768;
unsigned char buf[kBufSize];
while (uncompLen != 0) {
size_t getSize = (uncompLen > kBufSize) ? kBufSize : uncompLen;
ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, buf, getSize));
if (actual != (ssize_t) getSize) {
LOGW("Zip: copy read failed (%d vs %zd)\n", (int)actual, getSize);
return -1;
}
if (sysWriteFully(outFd, buf, getSize, "Zip copy") != 0)
return -1;
uncompLen -= getSize;
}
return 0;
}
/*
* Uncompress an entry, in its entirety, to an open file descriptor.
*
* TODO: this doesn't verify the data's CRC, but probably should (especially
* for uncompressed data).
*/
int dexZipExtractEntryToFile(const ZipArchive* pArchive,
const ZipEntry entry, int fd)
{
int result = -1;
int ent = entryToIndex(pArchive, entry);
if (ent < 0) {
LOGW("Zip: extract can't find entry %p\n", entry);
goto bail;
}
int method;
size_t uncompLen, compLen;
off_t dataOffset;
if (dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
&dataOffset, NULL, NULL) != 0)
{
goto bail;
}
if (lseek(pArchive->mFd, dataOffset, SEEK_SET) != dataOffset) {
LOGW("Zip: lseek to data at %ld failed\n", (long) dataOffset);
goto bail;
}
if (method == kCompressStored) {
if (copyFileToFile(pArchive->mFd, fd, uncompLen) != 0)
goto bail;
} else {
if (inflateToFile(pArchive->mFd, fd, uncompLen, compLen) != 0)
goto bail;
}
result = 0;
bail:
return result;
}