C++程序  |  583行  |  20.74 KB

/*
 * Copyright 2013 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "SkPdfNativeDoc.h"

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>

#include "SkPdfMapper_autogen.h"
#include "SkPdfNativeObject.h"
#include "SkPdfNativeTokenizer.h"
#include "SkPdfReporter.h"
#include "SkStream.h"

// TODO(edisonn): for some reason on mac these files are found here, but are found from headers
//#include "SkPdfFileTrailerDictionary_autogen.h"
//#include "SkPdfCatalogDictionary_autogen.h"
//#include "SkPdfPageObjectDictionary_autogen.h"
//#include "SkPdfPageTreeNodeDictionary_autogen.h"
#include "SkPdfHeaders_autogen.h"

static long getFileSize(const char* filename)
{
    struct stat stat_buf;
    int rc = stat(filename, &stat_buf);
    return rc == 0 ? (long)stat_buf.st_size : -1;
}

static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
    while (current > start && !isPdfEOL(*(current - 1))) {
        current--;
    }
    return current;
}

static const unsigned char* previousLineHome(const unsigned char* start,
                                             const unsigned char* current) {
    if (current > start && isPdfEOL(*(current - 1))) {
        current--;
    }

    // allows CR+LF, LF+CR but not two CR+CR or LF+LF
    if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
        current--;
    }

    while (current > start && !isPdfEOL(*(current - 1))) {
        current--;
    }

    return current;
}

static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
    while (current < end && !isPdfEOL(*current)) {
        current++;
    }
    current++;
    if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
        current++;
    }
    return current;
}

SkPdfNativeDoc* gDoc = NULL;

SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
        : fAllocator(new SkPdfAllocator())
        , fFileContent(NULL)
        , fContentLength(0)
        , fRootCatalogRef(NULL)
        , fRootCatalog(NULL) {
    size_t size = stream->getLength();
    void* ptr = sk_malloc_throw(size);
    stream->read(ptr, size);

    init(ptr, size);
}

SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
        : fAllocator(new SkPdfAllocator())
        , fFileContent(NULL)
        , fContentLength(0)
        , fRootCatalogRef(NULL)
        , fRootCatalog(NULL) {
    gDoc = this;
    FILE* file = fopen(path, "r");
    // TODO(edisonn): put this in a function that can return NULL
    if (file) {
        size_t size = getFileSize(path);
        void* content = sk_malloc_throw(size);
        bool ok = (0 != fread(content, size, 1, file));
        fclose(file);
        if (!ok) {
            sk_free(content);
            SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
                        "could not read file", NULL, NULL);
            // TODO(edisonn): not nice to return like this from constructor, create a static
            // function that can report NULL for failures.
            return;  // Doc will have 0 pages
        }

        init(content, size);
    }
}

void SkPdfNativeDoc::init(const void* bytes, size_t length) {
    fFileContent = (const unsigned char*)bytes;
    fContentLength = length;
    const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
    const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
    const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);

    if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
        SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
                    "Could not find startxref", NULL, NULL);
    }

    long xrefByteOffset = atol((const char*)xrefByteOffsetLine);

    bool storeCatalog = true;
    while (xrefByteOffset >= 0) {
        const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset,
                                                                            xrefstartKeywordLine);
        xrefByteOffset = -1;
        if (trailerStart < xrefstartKeywordLine) {
            this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
            storeCatalog = false;
        }
    }

    // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
    // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper

    if (fRootCatalogRef) {
        fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
        if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
            SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
            if (tree && tree->isDictionary() && tree->valid()) {
                fillPages(tree);
            }
        }
    }

    if (pages() == 0) {
        // TODO(edisonn): probably it would be better to return NULL and make a clean document.
        loadWithoutXRef();
    }

    // TODO(edisonn): corrupted pdf, read it from beginning and rebuild
    // (xref, trailer, or just read all objects)
}

void SkPdfNativeDoc::loadWithoutXRef() {
    const unsigned char* current = fFileContent;
    const unsigned char* end = fFileContent + fContentLength;

    // TODO(edisonn): read pdf version
    current = ignoreLine(current, end);

    current = skipPdfWhiteSpaces(current, end);
    while (current < end) {
        SkPdfNativeObject token;
        current = nextObject(current, end, &token, NULL, NULL);
        if (token.isInteger()) {
            int id = (int)token.intValue();

            token.reset();
            current = nextObject(current, end, &token, NULL, NULL);
            // TODO(edisonn): generation ignored for now (used in pdfs with updates)
            // int generation = (int)token.intValue();

            token.reset();
            current = nextObject(current, end, &token, NULL, NULL);
            // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring.
            if (!token.isKeyword("obj")) {
                SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
                            "Could not find obj", NULL, NULL);
                continue;
            }

            while (fObjects.count() < id + 1) {
                reset(fObjects.append());
            }

            fObjects[id].fOffset = current - fFileContent;

            SkPdfNativeObject* obj = fAllocator->allocObject();
            current = nextObject(current, end, obj, fAllocator, this);

            fObjects[id].fResolvedReference = obj;
            fObjects[id].fObj = obj;
            fObjects[id].fIsReferenceResolved = true;
        } else if (token.isKeyword("trailer")) {
            long dummy;
            current = readTrailer(current, end, true, &dummy, true);
        } else if (token.isKeyword("startxref")) {
            token.reset();
            current = nextObject(current, end, &token, NULL, NULL);  // ignore startxref
        }

        current = skipPdfWhiteSpaces(current, end);
    }

    // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we
    // might not need it.
    if (!fRootCatalogRef) {
        for (unsigned int i = 0 ; i < objects(); i++) {
            SkPdfNativeObject* obj = object(i);
            SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
            if (root && root->isReference()) {
                fRootCatalogRef = root;
            }
        }
    }

    if (fRootCatalogRef) {
        fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
        if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
            SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
            if (tree && tree->isDictionary() && tree->valid()) {
                fillPages(tree);
            }
        }
    }


}

SkPdfNativeDoc::~SkPdfNativeDoc() {
    sk_free((void*)fFileContent);
    delete fAllocator;
}

const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart,
                                                               const unsigned char* trailerEnd) {
    SkPdfNativeObject xref;
    const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL);

    if (!xref.isKeyword("xref")) {
        SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref",
                    NULL, NULL);
        return trailerEnd;
    }

    SkPdfNativeObject token;
    while (current < trailerEnd) {
        token.reset();
        const unsigned char* previous = current;
        current = nextObject(current, trailerEnd, &token, NULL, NULL);
        if (!token.isInteger()) {
            SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
                        "Done readCrossReferenceSection", NULL, NULL);
            return previous;
        }

        int startId = (int)token.intValue();
        token.reset();
        current = nextObject(current, trailerEnd, &token, NULL, NULL);

        if (!token.isInteger()) {
            SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection",
                                      &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
            return current;
        }

        int entries = (int)token.intValue();

        for (int i = 0; i < entries; i++) {
            token.reset();
            current = nextObject(current, trailerEnd, &token, NULL, NULL);
            if (!token.isInteger()) {
                SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                          "readCrossReferenceSection",
                                          &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
                return current;
            }
            int offset = (int)token.intValue();

            token.reset();
            current = nextObject(current, trailerEnd, &token, NULL, NULL);
            if (!token.isInteger()) {
                SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                          "readCrossReferenceSection",
                                          &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
                return current;
            }
            int generation = (int)token.intValue();

            token.reset();
            current = nextObject(current, trailerEnd, &token, NULL, NULL);
            if (!token.isKeyword() || token.lenstr() != 1 ||
                (*token.c_str() != 'f' && *token.c_str() != 'n')) {
                SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                          "readCrossReferenceSection: f or n expected",
                                          &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
                return current;
            }

            this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
        }
    }
    SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
                "Unexpected end of readCrossReferenceSection", NULL, NULL);
    return current;
}

const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart,
                                                 const unsigned char* trailerEnd,
                                                 bool storeCatalog, long* prev, bool skipKeyword) {
    *prev = -1;

    const unsigned char* current = trailerStart;
    if (!skipKeyword) {
        SkPdfNativeObject trailerKeyword;
        // Use null allocator, and let it just fail if memory, it should not crash.
        current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL);

        if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
            strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
            SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                      "readTrailer: trailer keyword expected",
                                      &trailerKeyword,
                                      SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
            return current;
        }
    }

    SkPdfNativeObject token;
    current = nextObject(current, trailerEnd, &token, fAllocator, NULL);
    if (!token.isDictionary()) {
        return current;
    }
    SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
    if (!trailer->valid()) {
        return current;
    }

    if (storeCatalog) {
        SkPdfNativeObject* ref = trailer->Root(NULL);
        if (ref == NULL || !ref->isReference()) {
            SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                      "readTrailer: unexpected root reference",
                                      ref, SkPdfNativeObject::kReference_PdfObjectType, NULL);
            return current;
        }
        fRootCatalogRef = ref;
    }

    if (trailer->has_Prev()) {
        *prev = (long)trailer->Prev(NULL);
    }

    return current;
}

void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
    // TODO(edisonn): security here, verify id
    while (fObjects.count() < id + 1) {
        this->reset(fObjects.append());
    }

    fObjects[id].fOffset = offset;
    fObjects[id].fObj = NULL;
    fObjects[id].fResolvedReference = NULL;
    fObjects[id].fIsReferenceResolved = false;
}

SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
    long startOffset = fObjects[id].fOffset;
    //long endOffset = fObjects[id].fOffsetEnd;
    // TODO(edisonn): use hinted endOffset
    const unsigned char* current = fFileContent + startOffset;
    const unsigned char* end = fFileContent + fContentLength;

    SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this);

    SkPdfNativeObject idObj;
    SkPdfNativeObject generationObj;
    SkPdfNativeObject objKeyword;
    SkPdfNativeObject* dict = fAllocator->allocObject();

    current = nextObject(current, end, &idObj, NULL, NULL);
    if (current >= end) {
        SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id",
                    NULL, NULL);
        return NULL;
    }

    current = nextObject(current, end, &generationObj, NULL, NULL);
    if (current >= end) {
        SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
                    "reading generation", NULL, NULL);
        return NULL;
    }

    current = nextObject(current, end, &objKeyword, NULL, NULL);
    if (current >= end) {
        SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
                    "reading keyword obj", NULL, NULL);
        return NULL;
    }

    if (!idObj.isInteger() || id != idObj.intValue()) {
        SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id",
                                  &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
    }

    // TODO(edisonn): verify that the generation is the right one
    if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) {
        SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                  "readObject: unexpected generation",
                                  &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
    }

    if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
        SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
                                  "readObject: unexpected obj keyword",
                                  &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
    }

    current = nextObject(current, end, dict, fAllocator, this);

    // TODO(edisonn): report warning/error - verify that the last token is endobj

    return dict;
}

void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
    SkPdfArray* kids = tree->Kids(this);
    if (kids == NULL) {
        *fPages.append() = (SkPdfPageObjectDictionary*)tree;
        return;
    }

    int cnt = (int) kids->size();
    for (int i = 0; i < cnt; i++) {
        SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
        if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
            *fPages.append() = (SkPdfPageObjectDictionary*)obj;
        } else {
            // TODO(edisonn): verify that it is a page tree indeed
            fillPages((SkPdfPageTreeNodeDictionary*)obj);
        }
    }
}

int SkPdfNativeDoc::pages() const {
    return fPages.count();
}

SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
    SkASSERT(page >= 0 && page < fPages.count());
    return fPages[page];
}


SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
    SkASSERT(page >= 0 && page < fPages.count());
    return fPages[page]->Resources(this);
}

// TODO(edisonn): Partial implemented.
// Move the logics directly in the code generator for inheritable and default values?
SkRect SkPdfNativeDoc::MediaBox(int page) {
    SkPdfPageObjectDictionary* current = fPages[page];
    while (!current->has_MediaBox() && current->has_Parent()) {
        current = (SkPdfPageObjectDictionary*)current->Parent(this);
    }
    if (current) {
        return current->MediaBox(this);
    }
    return SkRect::MakeEmpty();
}

size_t SkPdfNativeDoc::objects() const {
    return fObjects.count();
}

SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
    SkASSERT(!(i < 0 || i > fObjects.count()));

    if (i < 0 || i > fObjects.count()) {
        return NULL;
    }

    if (fObjects[i].fObj == NULL) {
        fObjects[i].fObj = readObject(i);
        // TODO(edisonn): For perf, when we read the cross reference sections, we should take
        // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad
        // stream, and fail quickly, in case we default to sequential stream read.
    }

    return fObjects[i].fObj;
}

const SkPdfMapper* SkPdfNativeDoc::mapper() const {
    return fMapper;
}

SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
    SkPdfNativeObject* obj = fAllocator->allocObject();
    SkPdfNativeObject::makeReal(value, obj);
    TRACK_OBJECT_SRC(obj);
    return (SkPdfReal*)obj;
}

SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
    SkPdfNativeObject* obj = fAllocator->allocObject();
    SkPdfNativeObject::makeInteger(value, obj);
    TRACK_OBJECT_SRC(obj);
    return (SkPdfInteger*)obj;
}

SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
    SkPdfNativeObject* obj = fAllocator->allocObject();
    SkPdfNativeObject::makeString(sz, len, obj);
    TRACK_OBJECT_SRC(obj);
    return (SkPdfString*)obj;
}

SkPdfAllocator* SkPdfNativeDoc::allocator() const {
    return fAllocator;
}

SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
    if (ref && ref->isReference()) {
        int id = ref->referenceId();
        // TODO(edisonn): generation/updates not supported now
        //int gen = ref->referenceGeneration();

        // TODO(edisonn): verify id and gen expected
        if (id < 0 || id >= fObjects.count()) {
            SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
                        "resolve reference id out of bounds", NULL, NULL);
            return NULL;
        }

        if (fObjects[id].fIsReferenceResolved) {
            SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity,
                          kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL);
            return fObjects[id].fResolvedReference;
        }

        // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow
        // here unless we check for resolved reference on next line.
        // Determine if the pdf is corrupted, or we have a bug here.

        // Avoids recursive calls
        fObjects[id].fIsReferenceResolved = true;

        if (fObjects[id].fObj == NULL) {
            fObjects[id].fObj = readObject(id);
        }

        if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) {
            if (!fObjects[id].fObj->isReference()) {
                fObjects[id].fResolvedReference = fObjects[id].fObj;
            } else {
                fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
            }
        }

        return fObjects[id].fResolvedReference;
    }

    return (SkPdfNativeObject*)ref;
}

size_t SkPdfNativeDoc::bytesUsed() const {
    return fAllocator->bytesUsed() +
           fContentLength +
           fObjects.count() * sizeof(PublicObjectEntry) +
           fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
           sizeof(*this);
}