/* * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file picokpdf.c * * knowledge handling for pdf * * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland * All rights reserved. * * History: * - 2009-04-20 -- initial version * */ #include "picoos.h" #include "picodbg.h" #include "picoknow.h" #include "picokpdf.h" #ifdef __cplusplus extern "C" { #endif #if 0 } #endif /* ************************************************************/ /* pdf */ /* ************************************************************/ /* * @addtogroup picokpdf * overview: format of knowledge base pdf file This is the format for the dur pdf file: - Numframes: 1 uint16 - Vecsize: 1 uint8 - sampperframe: 1 uint8 - Phonquantlen: 1 uint8 - Phonquant: Phonquantlen uint8 - Statequantlen: 1 uint8 - Statequantlen: Statequantlen uint8 - And then numframes x vecsize uint8 This is the format for mul (mgc and lfz) pdf files: - numframes: 1 uint16 - vecsize: 1 uint8 - numstates: 1 uint8 - numframesperstate: numstates uint16 - ceporder: 1 uint8 - numvuv 1 uint8 - numdeltas: 1 uint8 - scmeanpow: 1 uint8 - maxbigpow: 1 uint8 - scmeanpowum KPDF_NUMSTREAMS * ceporder uint8 - scivarpow KPDF_NUMSTREAMS * ceporder uint8 And then numframes x vecsize uint8 */ /* ************************************************************/ /* pdf data defines */ /* may not be changed with current implementation */ /* ************************************************************/ #define KPDF_NUMSTREAMS 3 /* coeff, delta, deltadelta */ /* ************************************************************/ /* pdf loading */ /* ************************************************************/ static pico_status_t kpdfDURInitialize(register picoknow_KnowledgeBase this, picoos_Common common) { picokpdf_pdfdur_t *pdfdur; picoos_uint16 pos; if (NULL == this || NULL == this->subObj) { return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL); } pdfdur = (picokpdf_pdfdur_t *)this->subObj; pos = 0; pdfdur->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 | this->base[pos]; pos += 2; pdfdur->vecsize = this->base[pos++]; pdfdur->sampperframe = this->base[pos++]; pdfdur->phonquantlen = this->base[pos++]; pdfdur->phonquant = &(this->base[pos]); pos += pdfdur->phonquantlen; pdfdur->statequantlen = this->base[pos++]; pdfdur->statequant = &(this->base[pos]); pos += pdfdur->statequantlen; pdfdur->content = &(this->base[pos]); PICODBG_DEBUG(("numframes %d, vecsize %d, phonquantlen %d, " "statequantlen %d", pdfdur->numframes, pdfdur->vecsize, pdfdur->phonquantlen, pdfdur->statequantlen)); if ((picoos_uint32)(pos + (pdfdur->numframes * pdfdur->vecsize)) != this->size) { PICODBG_DEBUG(("header-spec size %d, kb-size %d", pos + (pdfdur->numframes * pdfdur->vecsize), this->size)); return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, NULL, NULL); } PICODBG_DEBUG(("dur pdf initialized")); return PICO_OK; } static picoos_uint8 convScaleFactorToBig(picoos_uint8 pow, picoos_uint8 bigpow) { if (pow > 0x0F) { pow = bigpow + (0xFF - pow + 1); /* take 2's complement of negative pow */ } else if (bigpow >= pow) { pow = bigpow - pow; } else { /* error: bigpow is smaller than input pow */ return 0; } return pow; } static pico_status_t kpdfMULInitialize(register picoknow_KnowledgeBase this, picoos_Common common) { picokpdf_pdfmul_t *pdfmul; picoos_uint16 pos; picoos_uint8 scmeanpow, maxbigpow, nummean; picoos_uint8 i; if (NULL == this || NULL == this->subObj) { return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL); } pdfmul = (picokpdf_pdfmul_t *)this->subObj; pos = 0; pdfmul->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 | this->base[pos]; pos += 2; pdfmul->vecsize = this->base[pos++]; pdfmul->numstates = this->base[pos++]; { pdfmul->stateoffset[0] = (picoos_uint16) 0; for (i=1; i<pdfmul->numstates; i++) { pdfmul->stateoffset[i] = pdfmul->stateoffset[i-1] + (this->base[pos] | ((picoos_uint16) this->base[pos+1] << 8)); pos += 2; } pos += 2; /* we don't need the last number if we only need the offset (i.e. how to get to the vector start) */ } pdfmul->ceporder = this->base[pos++]; pdfmul->numvuv = this->base[pos++]; pdfmul->numdeltas = this->base[pos++]; scmeanpow = this->base[pos++]; maxbigpow = this->base[pos++]; if (maxbigpow < PICOKPDF_BIG_POW) { PICODBG_ERROR(("bigpow %i is larger than maxbigpow %i defined in pdf lingware", PICOKPDF_BIG_POW, maxbigpow)); return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL); } pdfmul->bigpow = PICOKPDF_BIG_POW; /* what we have to use is the smaller number! */ pdfmul->amplif = this->base[pos++]; /* bigpow corrected by scmeanpow, multiply means by 2^meanpow to obtain fixed point representation */ pdfmul->meanpow = convScaleFactorToBig(scmeanpow, pdfmul->bigpow); if (0 == pdfmul->meanpow) { PICODBG_ERROR(("error in convScaleFactorToBig")); return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL); } nummean = 3*pdfmul->ceporder; pdfmul->meanpowUm = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8)); pdfmul->ivarpow = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8)); if ((NULL == pdfmul->meanpowUm) || (NULL == pdfmul->ivarpow)) { picoos_deallocate(common->mm,(void *) &(pdfmul->meanpowUm)); picoos_deallocate(common->mm,(void *) &(pdfmul->ivarpow)); return picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM,NULL,NULL); } /* read meanpowUm and convert on the fly */ /* meaning of meanpowUm becomes: multiply means from pdf stream by 2^meanpowUm * to achieve fixed point scaling by big */ for (i=0; i<nummean; i++) { pdfmul->meanpowUm[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow); } /*read ivarpow and convert on the fly */ for (i=0; i<nummean; i++) { pdfmul->ivarpow[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow); } /* check numdeltas */ if ((pdfmul->numdeltas == 0xFF) && (pdfmul->vecsize != (pdfmul->numvuv + pdfmul->ceporder * 3 * (2+1)))) { PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas")); return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL); } /* vecsize: 1 uint8 for vuv + ceporder short for static means + numdeltas uint8 and short for sparse delta means + ceporder*3 uint8 for static and delta inverse variances */ if ((pdfmul->numdeltas != 0xFF) && (pdfmul->vecsize != pdfmul->numvuv+pdfmul->ceporder*2+pdfmul->numdeltas*3+pdfmul->ceporder*3)) { PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas\n" "vecsize = %i while numvuv+ceporder*2 + numdeltas*3 + ceporder*3 = %i", pdfmul->vecsize, pdfmul->numvuv + pdfmul->ceporder*2 + pdfmul->numdeltas * 3 + pdfmul->ceporder * 3)); return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL); } pdfmul->content = &(this->base[pos]); PICODBG_DEBUG(("numframes %d, vecsize %d, numstates %d, ceporder %d, " "numvuv %d, numdeltas %d, meanpow %d, bigpow %d", pdfmul->numframes, pdfmul->vecsize, pdfmul->numstates, pdfmul->ceporder, pdfmul->numvuv, pdfmul->numdeltas, pdfmul->meanpow, pdfmul->bigpow)); if ((picoos_uint32)(pos + (pdfmul->numframes * pdfmul->vecsize)) != this->size) { PICODBG_DEBUG(("header-spec size %d, kb-size %d", pos + (pdfmul->numframes * pdfmul->vecsize), this->size)); return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, NULL, NULL); } PICODBG_DEBUG(("mul pdf initialized")); return PICO_OK; } static pico_status_t kpdfPHSInitialize(register picoknow_KnowledgeBase this, picoos_Common common) { picokpdf_pdfphs_t *pdfphs; picoos_uint16 pos; if (NULL == this || NULL == this->subObj) { return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL); } pdfphs = (picokpdf_pdfphs_t *)this->subObj; pos = 0; pdfphs->numvectors = ((picoos_uint16)(this->base[pos+1])) << 8 | this->base[pos]; pos += 2; pdfphs->indexBase = &(this->base[pos]); pdfphs->contentBase = pdfphs->indexBase + pdfphs->numvectors * sizeof(picoos_uint32); PICODBG_DEBUG(("phs pdf initialized")); return PICO_OK; } static pico_status_t kpdfMULSubObjDeallocate(register picoknow_KnowledgeBase this, picoos_MemoryManager mm) { picokpdf_pdfmul_t *pdfmul; if ((NULL != this) && (NULL != this->subObj)) { pdfmul = (picokpdf_pdfmul_t *)this->subObj; picoos_deallocate(mm,(void *) &(pdfmul->meanpowUm)); picoos_deallocate(mm,(void *) &(pdfmul->ivarpow)); picoos_deallocate(mm, (void *) &(this->subObj)); } return PICO_OK; } static pico_status_t kpdfDURSubObjDeallocate(register picoknow_KnowledgeBase this, picoos_MemoryManager mm) { if (NULL != this) { picoos_deallocate(mm, (void *) &this->subObj); } return PICO_OK; } static pico_status_t kpdfPHSSubObjDeallocate(register picoknow_KnowledgeBase this, picoos_MemoryManager mm) { if (NULL != this) { picoos_deallocate(mm, (void *) &this->subObj); } return PICO_OK; } /* we don't offer a specialized constructor for a *KnowledgeBase but * instead a "specializer" of an allready existing generic * picoknow_KnowledgeBase */ pico_status_t picokpdf_specializePdfKnowledgeBase(picoknow_KnowledgeBase this, picoos_Common common, const picokpdf_kpdftype_t kpdftype) { pico_status_t status; if (NULL == this) { return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL); } switch (kpdftype) { case PICOKPDF_KPDFTYPE_DUR: this->subDeallocate = kpdfDURSubObjDeallocate; this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfdur_t)); if (NULL == this->subObj) { return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); } status = kpdfDURInitialize(this, common); break; case PICOKPDF_KPDFTYPE_MUL: this->subDeallocate = kpdfMULSubObjDeallocate; this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfmul_t)); if (NULL == this->subObj) { return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); } status = kpdfMULInitialize(this, common); break; case PICOKPDF_KPDFTYPE_PHS: this->subDeallocate = kpdfPHSSubObjDeallocate; this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfphs_t)); if (NULL == this->subObj) { return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); } status = kpdfPHSInitialize(this, common); break; default: return picoos_emRaiseException(common->em, PICO_ERR_OTHER, NULL, NULL); } if (status != PICO_OK) { picoos_deallocate(common->mm, (void *) &this->subObj); return picoos_emRaiseException(common->em, status, NULL, NULL); } return PICO_OK; } /* ************************************************************/ /* pdf getPdf* */ /* ************************************************************/ picokpdf_PdfDUR picokpdf_getPdfDUR(picoknow_KnowledgeBase this) { return ((NULL == this) ? NULL : ((picokpdf_PdfDUR) this->subObj)); } picokpdf_PdfMUL picokpdf_getPdfMUL(picoknow_KnowledgeBase this) { return ((NULL == this) ? NULL : ((picokpdf_PdfMUL) this->subObj)); } picokpdf_PdfPHS picokpdf_getPdfPHS(picoknow_KnowledgeBase this) { return ((NULL == this) ? NULL : ((picokpdf_PdfPHS) this->subObj)); } #ifdef __cplusplus } #endif /* end */