/* * Copyright 2013 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "SkPdfNativeDoc.h" #include <stdio.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include "SkPdfMapper_autogen.h" #include "SkPdfNativeObject.h" #include "SkPdfNativeTokenizer.h" #include "SkPdfReporter.h" #include "SkStream.h" // TODO(edisonn): for some reason on mac these files are found here, but are found from headers //#include "SkPdfFileTrailerDictionary_autogen.h" //#include "SkPdfCatalogDictionary_autogen.h" //#include "SkPdfPageObjectDictionary_autogen.h" //#include "SkPdfPageTreeNodeDictionary_autogen.h" #include "SkPdfHeaders_autogen.h" static long getFileSize(const char* filename) { struct stat stat_buf; int rc = stat(filename, &stat_buf); return rc == 0 ? (long)stat_buf.st_size : -1; } static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) { while (current > start && !isPdfEOL(*(current - 1))) { current--; } return current; } static const unsigned char* previousLineHome(const unsigned char* start, const unsigned char* current) { if (current > start && isPdfEOL(*(current - 1))) { current--; } // allows CR+LF, LF+CR but not two CR+CR or LF+LF if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) { current--; } while (current > start && !isPdfEOL(*(current - 1))) { current--; } return current; } static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) { while (current < end && !isPdfEOL(*current)) { current++; } current++; if (current < end && isPdfEOL(*current) && *current != *(current - 1)) { current++; } return current; } SkPdfNativeDoc* gDoc = NULL; SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream) : fAllocator(new SkPdfAllocator()) , fFileContent(NULL) , fContentLength(0) , fRootCatalogRef(NULL) , fRootCatalog(NULL) { size_t size = stream->getLength(); void* ptr = sk_malloc_throw(size); stream->read(ptr, size); init(ptr, size); } SkPdfNativeDoc::SkPdfNativeDoc(const char* path) : fAllocator(new SkPdfAllocator()) , fFileContent(NULL) , fContentLength(0) , fRootCatalogRef(NULL) , fRootCatalog(NULL) { gDoc = this; FILE* file = fopen(path, "r"); // TODO(edisonn): put this in a function that can return NULL if (file) { size_t size = getFileSize(path); void* content = sk_malloc_throw(size); bool ok = (0 != fread(content, size, 1, file)); fclose(file); if (!ok) { sk_free(content); SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "could not read file", NULL, NULL); // TODO(edisonn): not nice to return like this from constructor, create a static // function that can report NULL for failures. return; // Doc will have 0 pages } init(content, size); } } void SkPdfNativeDoc::init(const void* bytes, size_t length) { fFileContent = (const unsigned char*)bytes; fContentLength = length; const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1); const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine); const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine); if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find startxref", NULL, NULL); } long xrefByteOffset = atol((const char*)xrefByteOffsetLine); bool storeCatalog = true; while (xrefByteOffset >= 0) { const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine); xrefByteOffset = -1; if (trailerStart < xrefstartKeywordLine) { this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false); storeCatalog = false; } } // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper if (fRootCatalogRef) { fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) { SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); if (tree && tree->isDictionary() && tree->valid()) { fillPages(tree); } } } if (pages() == 0) { // TODO(edisonn): probably it would be better to return NULL and make a clean document. loadWithoutXRef(); } // TODO(edisonn): corrupted pdf, read it from beginning and rebuild // (xref, trailer, or just read all objects) } void SkPdfNativeDoc::loadWithoutXRef() { const unsigned char* current = fFileContent; const unsigned char* end = fFileContent + fContentLength; // TODO(edisonn): read pdf version current = ignoreLine(current, end); current = skipPdfWhiteSpaces(current, end); while (current < end) { SkPdfNativeObject token; current = nextObject(current, end, &token, NULL, NULL); if (token.isInteger()) { int id = (int)token.intValue(); token.reset(); current = nextObject(current, end, &token, NULL, NULL); // TODO(edisonn): generation ignored for now (used in pdfs with updates) // int generation = (int)token.intValue(); token.reset(); current = nextObject(current, end, &token, NULL, NULL); // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring. if (!token.isKeyword("obj")) { SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find obj", NULL, NULL); continue; } while (fObjects.count() < id + 1) { reset(fObjects.append()); } fObjects[id].fOffset = current - fFileContent; SkPdfNativeObject* obj = fAllocator->allocObject(); current = nextObject(current, end, obj, fAllocator, this); fObjects[id].fResolvedReference = obj; fObjects[id].fObj = obj; fObjects[id].fIsReferenceResolved = true; } else if (token.isKeyword("trailer")) { long dummy; current = readTrailer(current, end, true, &dummy, true); } else if (token.isKeyword("startxref")) { token.reset(); current = nextObject(current, end, &token, NULL, NULL); // ignore startxref } current = skipPdfWhiteSpaces(current, end); } // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we // might not need it. if (!fRootCatalogRef) { for (unsigned int i = 0 ; i < objects(); i++) { SkPdfNativeObject* obj = object(i); SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL; if (root && root->isReference()) { fRootCatalogRef = root; } } } if (fRootCatalogRef) { fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) { SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); if (tree && tree->isDictionary() && tree->valid()) { fillPages(tree); } } } } SkPdfNativeDoc::~SkPdfNativeDoc() { sk_free((void*)fFileContent); delete fAllocator; } const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd) { SkPdfNativeObject xref; const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL); if (!xref.isKeyword("xref")) { SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref", NULL, NULL); return trailerEnd; } SkPdfNativeObject token; while (current < trailerEnd) { token.reset(); const unsigned char* previous = current; current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Done readCrossReferenceSection", NULL, NULL); return previous; } int startId = (int)token.intValue(); token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); return current; } int entries = (int)token.intValue(); for (int i = 0; i < entries; i++) { token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); return current; } int offset = (int)token.intValue(); token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isInteger()) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); return current; } int generation = (int)token.intValue(); token.reset(); current = nextObject(current, trailerEnd, &token, NULL, NULL); if (!token.isKeyword() || token.lenstr() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection: f or n expected", &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); return current; } this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f'); } } SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Unexpected end of readCrossReferenceSection", NULL, NULL); return current; } const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword) { *prev = -1; const unsigned char* current = trailerStart; if (!skipKeyword) { SkPdfNativeObject trailerKeyword; // Use null allocator, and let it just fail if memory, it should not crash. current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL); if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() || strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readTrailer: trailer keyword expected", &trailerKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); return current; } } SkPdfNativeObject token; current = nextObject(current, trailerEnd, &token, fAllocator, NULL); if (!token.isDictionary()) { return current; } SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; if (!trailer->valid()) { return current; } if (storeCatalog) { SkPdfNativeObject* ref = trailer->Root(NULL); if (ref == NULL || !ref->isReference()) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readTrailer: unexpected root reference", ref, SkPdfNativeObject::kReference_PdfObjectType, NULL); return current; } fRootCatalogRef = ref; } if (trailer->has_Prev()) { *prev = (long)trailer->Prev(NULL); } return current; } void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { // TODO(edisonn): security here, verify id while (fObjects.count() < id + 1) { this->reset(fObjects.append()); } fObjects[id].fOffset = offset; fObjects[id].fObj = NULL; fObjects[id].fResolvedReference = NULL; fObjects[id].fIsReferenceResolved = false; } SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) { long startOffset = fObjects[id].fOffset; //long endOffset = fObjects[id].fOffsetEnd; // TODO(edisonn): use hinted endOffset const unsigned char* current = fFileContent + startOffset; const unsigned char* end = fFileContent + fContentLength; SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this); SkPdfNativeObject idObj; SkPdfNativeObject generationObj; SkPdfNativeObject objKeyword; SkPdfNativeObject* dict = fAllocator->allocObject(); current = nextObject(current, end, &idObj, NULL, NULL); if (current >= end) { SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id", NULL, NULL); return NULL; } current = nextObject(current, end, &generationObj, NULL, NULL); if (current >= end) { SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading generation", NULL, NULL); return NULL; } current = nextObject(current, end, &objKeyword, NULL, NULL); if (current >= end) { SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading keyword obj", NULL, NULL); return NULL; } if (!idObj.isInteger() || id != idObj.intValue()) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id", &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); } // TODO(edisonn): verify that the generation is the right one if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected generation", &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); } if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) { SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected obj keyword", &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); } current = nextObject(current, end, dict, fAllocator, this); // TODO(edisonn): report warning/error - verify that the last token is endobj return dict; } void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) { SkPdfArray* kids = tree->Kids(this); if (kids == NULL) { *fPages.append() = (SkPdfPageObjectDictionary*)tree; return; } int cnt = (int) kids->size(); for (int i = 0; i < cnt; i++) { SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i)); if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) { *fPages.append() = (SkPdfPageObjectDictionary*)obj; } else { // TODO(edisonn): verify that it is a page tree indeed fillPages((SkPdfPageTreeNodeDictionary*)obj); } } } int SkPdfNativeDoc::pages() const { return fPages.count(); } SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) { SkASSERT(page >= 0 && page < fPages.count()); return fPages[page]; } SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) { SkASSERT(page >= 0 && page < fPages.count()); return fPages[page]->Resources(this); } // TODO(edisonn): Partial implemented. // Move the logics directly in the code generator for inheritable and default values? SkRect SkPdfNativeDoc::MediaBox(int page) { SkPdfPageObjectDictionary* current = fPages[page]; while (!current->has_MediaBox() && current->has_Parent()) { current = (SkPdfPageObjectDictionary*)current->Parent(this); } if (current) { return current->MediaBox(this); } return SkRect::MakeEmpty(); } size_t SkPdfNativeDoc::objects() const { return fObjects.count(); } SkPdfNativeObject* SkPdfNativeDoc::object(int i) { SkASSERT(!(i < 0 || i > fObjects.count())); if (i < 0 || i > fObjects.count()) { return NULL; } if (fObjects[i].fObj == NULL) { fObjects[i].fObj = readObject(i); // TODO(edisonn): For perf, when we read the cross reference sections, we should take // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad // stream, and fail quickly, in case we default to sequential stream read. } return fObjects[i].fObj; } const SkPdfMapper* SkPdfNativeDoc::mapper() const { return fMapper; } SkPdfReal* SkPdfNativeDoc::createReal(double value) const { SkPdfNativeObject* obj = fAllocator->allocObject(); SkPdfNativeObject::makeReal(value, obj); TRACK_OBJECT_SRC(obj); return (SkPdfReal*)obj; } SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const { SkPdfNativeObject* obj = fAllocator->allocObject(); SkPdfNativeObject::makeInteger(value, obj); TRACK_OBJECT_SRC(obj); return (SkPdfInteger*)obj; } SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const { SkPdfNativeObject* obj = fAllocator->allocObject(); SkPdfNativeObject::makeString(sz, len, obj); TRACK_OBJECT_SRC(obj); return (SkPdfString*)obj; } SkPdfAllocator* SkPdfNativeDoc::allocator() const { return fAllocator; } SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) { if (ref && ref->isReference()) { int id = ref->referenceId(); // TODO(edisonn): generation/updates not supported now //int gen = ref->referenceGeneration(); // TODO(edisonn): verify id and gen expected if (id < 0 || id >= fObjects.count()) { SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "resolve reference id out of bounds", NULL, NULL); return NULL; } if (fObjects[id].fIsReferenceResolved) { SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity, kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL); return fObjects[id].fResolvedReference; } // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow // here unless we check for resolved reference on next line. // Determine if the pdf is corrupted, or we have a bug here. // Avoids recursive calls fObjects[id].fIsReferenceResolved = true; if (fObjects[id].fObj == NULL) { fObjects[id].fObj = readObject(id); } if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) { if (!fObjects[id].fObj->isReference()) { fObjects[id].fResolvedReference = fObjects[id].fObj; } else { fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj); } } return fObjects[id].fResolvedReference; } return (SkPdfNativeObject*)ref; } size_t SkPdfNativeDoc::bytesUsed() const { return fAllocator->bytesUsed() + fContentLength + fObjects.count() * sizeof(PublicObjectEntry) + fPages.count() * sizeof(SkPdfPageObjectDictionary*) + sizeof(*this); }