C++程序  |  607行  |  16.02 KB

// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "core/fpdfapi/page/cpdf_streamparser.h"

#include <limits.h>

#include <memory>
#include <utility>

#include "core/fpdfapi/cpdf_modulemgr.h"
#include "core/fpdfapi/page/cpdf_docpagedata.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_boolean.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_null.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_decode.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcodec/fx_codec.h"
#include "core/fxcrt/fx_ext.h"

namespace {

const uint32_t kMaxNestedParsingLevel = 512;
const uint32_t kMaxWordBuffer = 256;
const FX_STRSIZE kMaxStringLength = 32767;

uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
                            uint8_t*& dest_buf,
                            uint32_t& dest_size) {
  if (!pDecoder)
    return FX_INVALID_OFFSET;
  int ncomps = pDecoder->CountComps();
  int bpc = pDecoder->GetBPC();
  int width = pDecoder->GetWidth();
  int height = pDecoder->GetHeight();
  int pitch = (width * ncomps * bpc + 7) / 8;
  if (height == 0 || pitch > (1 << 30) / height)
    return FX_INVALID_OFFSET;

  dest_buf = FX_Alloc2D(uint8_t, pitch, height);
  dest_size = pitch * height;  // Safe since checked alloc returned.
  for (int row = 0; row < height; row++) {
    const uint8_t* pLine = pDecoder->GetScanline(row);
    if (!pLine)
      break;

    FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
  }
  return pDecoder->GetSrcOffset();
}

uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf,
                                uint32_t limit,
                                int width,
                                int height,
                                CFX_ByteString& decoder,
                                CPDF_Dictionary* pParam,
                                uint8_t*& dest_buf,
                                uint32_t& dest_size) {
  if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
    std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
        FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
    return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
  }
  if (decoder == "ASCII85Decode" || decoder == "A85")
    return A85Decode(src_buf, limit, dest_buf, dest_size);
  if (decoder == "ASCIIHexDecode" || decoder == "AHx")
    return HexDecode(src_buf, limit, dest_buf, dest_size);
  if (decoder == "FlateDecode" || decoder == "Fl") {
    return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size,
                                    dest_buf, dest_size);
  }
  if (decoder == "LZWDecode" || decoder == "LZW") {
    return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
                                    dest_size);
  }
  if (decoder == "DCTDecode" || decoder == "DCT") {
    std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
        CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
            src_buf, limit, width, height, 0,
            !pParam || pParam->GetIntegerFor("ColorTransform", 1));
    return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
  }
  if (decoder == "RunLengthDecode" || decoder == "RL")
    return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
  dest_size = 0;
  dest_buf = 0;
  return (uint32_t)-1;
}

}  // namespace

CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
    : m_pBuf(pData),
      m_Size(dwSize),
      m_Pos(0),
      m_pPool(nullptr) {}

CPDF_StreamParser::CPDF_StreamParser(
    const uint8_t* pData,
    uint32_t dwSize,
    const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
    : m_pBuf(pData),
      m_Size(dwSize),
      m_Pos(0),
      m_pPool(pPool) {}

CPDF_StreamParser::~CPDF_StreamParser() {}

std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
    CPDF_Document* pDoc,
    std::unique_ptr<CPDF_Dictionary> pDict,
    CPDF_Object* pCSObj) {
  if (m_Pos == m_Size)
    return nullptr;

  if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
    m_Pos++;

  CFX_ByteString Decoder;
  CPDF_Dictionary* pParam = nullptr;
  CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
  if (pFilter) {
    if (CPDF_Array* pArray = pFilter->AsArray()) {
      Decoder = pArray->GetStringAt(0);
      CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
      if (pParams)
        pParam = pParams->GetDictAt(0);
    } else {
      Decoder = pFilter->GetString();
      pParam = pDict->GetDictFor("DecodeParms");
    }
  }
  uint32_t width = pDict->GetIntegerFor("Width");
  uint32_t height = pDict->GetIntegerFor("Height");
  uint32_t OrigSize = 0;
  if (pCSObj) {
    uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
    uint32_t nComponents = 1;
    CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
    if (pCS) {
      nComponents = pCS->CountComponents();
      pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
    } else {
      nComponents = 3;
    }
    uint32_t pitch = width;
    if (bpc && pitch > INT_MAX / bpc)
      return nullptr;

    pitch *= bpc;
    if (nComponents && pitch > INT_MAX / nComponents)
      return nullptr;

    pitch *= nComponents;
    if (pitch > INT_MAX - 7)
      return nullptr;

    pitch += 7;
    pitch /= 8;
    OrigSize = pitch;
  } else {
    if (width > INT_MAX - 7)
      return nullptr;

    OrigSize = ((width + 7) / 8);
  }
  if (height && OrigSize > INT_MAX / height)
    return nullptr;

  OrigSize *= height;
  std::unique_ptr<uint8_t, FxFreeDeleter> pData;
  uint32_t dwStreamSize;
  if (Decoder.IsEmpty()) {
    if (OrigSize > m_Size - m_Pos)
      OrigSize = m_Size - m_Pos;
    pData.reset(FX_Alloc(uint8_t, OrigSize));
    FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
    dwStreamSize = OrigSize;
    m_Pos += OrigSize;
  } else {
    uint8_t* pIgnore = nullptr;
    uint32_t dwDestSize = OrigSize;
    dwStreamSize =
        PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
                               Decoder, pParam, pIgnore, dwDestSize);
    FX_Free(pIgnore);
    if (static_cast<int>(dwStreamSize) < 0)
      return nullptr;

    uint32_t dwSavePos = m_Pos;
    m_Pos += dwStreamSize;
    while (1) {
      uint32_t dwPrevPos = m_Pos;
      CPDF_StreamParser::SyntaxType type = ParseNextElement();
      if (type == CPDF_StreamParser::EndOfData)
        break;

      if (type != CPDF_StreamParser::Keyword) {
        dwStreamSize += m_Pos - dwPrevPos;
        continue;
      }
      if (GetWord() == "EI") {
        m_Pos = dwPrevPos;
        break;
      }
      dwStreamSize += m_Pos - dwPrevPos;
    }
    m_Pos = dwSavePos;
    pData.reset(FX_Alloc(uint8_t, dwStreamSize));
    FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
    m_Pos += dwStreamSize;
  }
  pDict->SetNewFor<CPDF_Number>("Length", (int)dwStreamSize);
  return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
                                         std::move(pDict));
}

CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
  m_pLastObj.reset();
  m_WordSize = 0;
  if (!PositionIsInBounds())
    return EndOfData;

  int ch = m_pBuf[m_Pos++];
  while (1) {
    while (PDFCharIsWhitespace(ch)) {
      if (!PositionIsInBounds())
        return EndOfData;

      ch = m_pBuf[m_Pos++];
    }

    if (ch != '%')
      break;

    while (1) {
      if (!PositionIsInBounds())
        return EndOfData;

      ch = m_pBuf[m_Pos++];
      if (PDFCharIsLineEnding(ch))
        break;
    }
  }

  if (PDFCharIsDelimiter(ch) && ch != '/') {
    m_Pos--;
    m_pLastObj = ReadNextObject(false, false, 0);
    return Others;
  }

  bool bIsNumber = true;
  while (1) {
    if (m_WordSize < kMaxWordBuffer)
      m_WordBuffer[m_WordSize++] = ch;

    if (!PDFCharIsNumeric(ch))
      bIsNumber = false;

    if (!PositionIsInBounds())
      break;

    ch = m_pBuf[m_Pos++];

    if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
      m_Pos--;
      break;
    }
  }

  m_WordBuffer[m_WordSize] = 0;
  if (bIsNumber)
    return Number;

  if (m_WordBuffer[0] == '/')
    return Name;

  if (m_WordSize == 4) {
    if (memcmp(m_WordBuffer, "true", 4) == 0) {
      m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
      return Others;
    }
    if (memcmp(m_WordBuffer, "null", 4) == 0) {
      m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
      return Others;
    }
  } else if (m_WordSize == 5) {
    if (memcmp(m_WordBuffer, "false", 5) == 0) {
      m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
      return Others;
    }
  }
  return Keyword;
}

std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
    bool bAllowNestedArray,
    bool bInArray,
    uint32_t dwRecursionLevel) {
  bool bIsNumber;
  // Must get the next word before returning to avoid infinite loops.
  GetNextWord(bIsNumber);
  if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
    return nullptr;

  if (bIsNumber) {
    m_WordBuffer[m_WordSize] = 0;
    return pdfium::MakeUnique<CPDF_Number>(
        CFX_ByteStringC(m_WordBuffer, m_WordSize));
  }

  int first_char = m_WordBuffer[0];
  if (first_char == '/') {
    CFX_ByteString name =
        PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
    return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
  }

  if (first_char == '(') {
    CFX_ByteString str = ReadString();
    return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
  }

  if (first_char == '<') {
    if (m_WordSize == 1)
      return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);

    auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
    while (1) {
      GetNextWord(bIsNumber);
      if (m_WordSize == 2 && m_WordBuffer[0] == '>')
        break;

      if (!m_WordSize || m_WordBuffer[0] != '/')
        return nullptr;

      CFX_ByteString key =
          PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
      std::unique_ptr<CPDF_Object> pObj =
          ReadNextObject(true, bInArray, dwRecursionLevel + 1);
      if (!pObj)
        return nullptr;

      if (!key.IsEmpty())
        pDict->SetFor(key, std::move(pObj));
    }
    return std::move(pDict);
  }

  if (first_char == '[') {
    if ((!bAllowNestedArray && bInArray))
      return nullptr;

    auto pArray = pdfium::MakeUnique<CPDF_Array>();
    while (1) {
      std::unique_ptr<CPDF_Object> pObj =
          ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
      if (pObj) {
        pArray->Add(std::move(pObj));
        continue;
      }
      if (!m_WordSize || m_WordBuffer[0] == ']')
        break;
    }
    return std::move(pArray);
  }

  if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
    return pdfium::MakeUnique<CPDF_Boolean>(false);

  if (m_WordSize == 4) {
    if (memcmp(m_WordBuffer, "true", 4) == 0)
      return pdfium::MakeUnique<CPDF_Boolean>(true);
    if (memcmp(m_WordBuffer, "null", 4) == 0)
      return pdfium::MakeUnique<CPDF_Null>();
  }

  return nullptr;
}

// TODO(npm): the following methods are almost identical in cpdf_syntaxparser
void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
  m_WordSize = 0;
  bIsNumber = true;
  if (!PositionIsInBounds())
    return;

  int ch = m_pBuf[m_Pos++];
  while (1) {
    while (PDFCharIsWhitespace(ch)) {
      if (!PositionIsInBounds()) {
        return;
      }
      ch = m_pBuf[m_Pos++];
    }

    if (ch != '%')
      break;

    while (1) {
      if (!PositionIsInBounds())
        return;
      ch = m_pBuf[m_Pos++];
      if (PDFCharIsLineEnding(ch))
        break;
    }
  }

  if (PDFCharIsDelimiter(ch)) {
    bIsNumber = false;
    m_WordBuffer[m_WordSize++] = ch;
    if (ch == '/') {
      while (1) {
        if (!PositionIsInBounds())
          return;
        ch = m_pBuf[m_Pos++];
        if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
          m_Pos--;
          return;
        }

        if (m_WordSize < kMaxWordBuffer)
          m_WordBuffer[m_WordSize++] = ch;
      }
    } else if (ch == '<') {
      if (!PositionIsInBounds())
        return;
      ch = m_pBuf[m_Pos++];
      if (ch == '<')
        m_WordBuffer[m_WordSize++] = ch;
      else
        m_Pos--;
    } else if (ch == '>') {
      if (!PositionIsInBounds())
        return;
      ch = m_pBuf[m_Pos++];
      if (ch == '>')
        m_WordBuffer[m_WordSize++] = ch;
      else
        m_Pos--;
    }
    return;
  }

  while (1) {
    if (m_WordSize < kMaxWordBuffer)
      m_WordBuffer[m_WordSize++] = ch;
    if (!PDFCharIsNumeric(ch))
      bIsNumber = false;

    if (!PositionIsInBounds())
      return;
    ch = m_pBuf[m_Pos++];
    if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
      m_Pos--;
      break;
    }
  }
}

CFX_ByteString CPDF_StreamParser::ReadString() {
  if (!PositionIsInBounds())
    return CFX_ByteString();

  uint8_t ch = m_pBuf[m_Pos++];
  CFX_ByteTextBuf buf;
  int parlevel = 0;
  int status = 0;
  int iEscCode = 0;
  while (1) {
    switch (status) {
      case 0:
        if (ch == ')') {
          if (parlevel == 0) {
            if (buf.GetLength() > kMaxStringLength) {
              return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
            }
            return buf.MakeString();
          }
          parlevel--;
          buf.AppendChar(')');
        } else if (ch == '(') {
          parlevel++;
          buf.AppendChar('(');
        } else if (ch == '\\') {
          status = 1;
        } else {
          buf.AppendChar((char)ch);
        }
        break;
      case 1:
        if (ch >= '0' && ch <= '7') {
          iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
          status = 2;
          break;
        }
        if (ch == 'n') {
          buf.AppendChar('\n');
        } else if (ch == 'r') {
          buf.AppendChar('\r');
        } else if (ch == 't') {
          buf.AppendChar('\t');
        } else if (ch == 'b') {
          buf.AppendChar('\b');
        } else if (ch == 'f') {
          buf.AppendChar('\f');
        } else if (ch == '\r') {
          status = 4;
          break;
        } else if (ch == '\n') {
        } else {
          buf.AppendChar(ch);
        }
        status = 0;
        break;
      case 2:
        if (ch >= '0' && ch <= '7') {
          iEscCode =
              iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
          status = 3;
        } else {
          buf.AppendChar(iEscCode);
          status = 0;
          continue;
        }
        break;
      case 3:
        if (ch >= '0' && ch <= '7') {
          iEscCode =
              iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
          buf.AppendChar(iEscCode);
          status = 0;
        } else {
          buf.AppendChar(iEscCode);
          status = 0;
          continue;
        }
        break;
      case 4:
        status = 0;
        if (ch != '\n') {
          continue;
        }
        break;
    }
    if (!PositionIsInBounds())
      break;

    ch = m_pBuf[m_Pos++];
  }
  if (PositionIsInBounds())
    ++m_Pos;

  if (buf.GetLength() > kMaxStringLength) {
    return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
  }
  return buf.MakeString();
}

CFX_ByteString CPDF_StreamParser::ReadHexString() {
  if (!PositionIsInBounds())
    return CFX_ByteString();

  CFX_ByteTextBuf buf;
  bool bFirst = true;
  int code = 0;
  while (PositionIsInBounds()) {
    int ch = m_pBuf[m_Pos++];

    if (ch == '>')
      break;

    if (!std::isxdigit(ch))
      continue;

    int val = FXSYS_toHexDigit(ch);
    if (bFirst) {
      code = val * 16;
    } else {
      code += val;
      buf.AppendByte((uint8_t)code);
    }
    bFirst = !bFirst;
  }
  if (!bFirst)
    buf.AppendChar((char)code);

  if (buf.GetLength() > kMaxStringLength)
    return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);

  return buf.MakeString();
}

bool CPDF_StreamParser::PositionIsInBounds() const {
  return m_Pos < m_Size;
}