/* ** ** Copyright 2008, Google Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** ** http://www.apache.org/licenses/LICENSE-2.0 ** ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. */ #include <nativehelper/jni.h> #include <assert.h> #include <dirent.h> #include <ctype.h> #include "baseapi.h" #include "varable.h" #include "tessvars.h" #ifdef HAVE_LIBLEPT // Include leptonica library only if autoconf (or makefile etc) tell us to. #include "allheaders.h" #endif #define DEBUG 0 #if DEBUG #include <stdio.h> BOOL_VAR (tessedit_write_images, TRUE, "Capture the image from the IPE"); #endif #define LOG_NDEBUG 0 #define LOG_TAG "OcrLib(native)" #include <utils/Log.h> #define TESSBASE "/sdcard/" static jfieldID field_mNativeData; struct native_data_t { native_data_t() : image_obj(NULL), image_buffer(NULL) {} tesseract::TessBaseAPI api; jbyteArray image_obj; jbyte* image_buffer; #ifdef HAVE_LIBLEPT PIX* pix; #endif }; static inline native_data_t * get_native_data(JNIEnv *env, jobject object) { return (native_data_t *)(env->GetIntField(object, field_mNativeData)); } struct language_info_t { language_info_t(char *lang, int shards) : lang(strdup(lang)), shards(shards) { } ~language_info_t() { free(lang); } language_info_t *next; char *lang; int shards; }; static struct language_info_t *languages; static int num_languages; static language_info_t* find_language(const char *lang) { LOGV(__FUNCTION__); language_info_t *trav = languages; while (trav) { if (!strcmp(trav->lang, lang)) { return trav; } trav = trav->next; } return NULL; } static void add_language(char *lang, int shards) { LOGV(__FUNCTION__); language_info_t *trav = find_language(lang); if (trav) { if (shards > trav->shards) { LOGI("UPDATE LANG %s SHARDS %d", lang, shards); trav->shards = shards; } return; } LOGI("ADD NEW LANG %s SHARDS %d", lang, shards); trav = new language_info_t(lang, shards); trav->next = languages; languages = trav; num_languages++; } static void free_languages() { LOGV(__FUNCTION__); language_info_t *trav = languages, *old; while (trav) { old = trav; LOGI("FREE LANG %s\n", trav->lang); trav = trav->next; delete old; } num_languages = 0; } static int get_num_languages() { return num_languages; } static language_info_t *iter; static language_info_t* language_iter_init() { iter = languages; return iter; } static language_info_t* language_iter_next() { if (iter) iter = iter->next; return iter; } #if DEBUG #include <string.h> #include <errno.h> #include <unistd.h> #include <stdlib.h> #include <fcntl.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> #define FAILIF(cond, msg...) do { \ if (cond) { \ LOGE("%s(%d): ", __FILE__, __LINE__); \ LOGE(msg); \ return; \ } \ } while(0) void test_ocr(const char *infile, int x, int y, int bpp, const char *outfile, const char *lang, const char *ratings, const char *tessdata) { void *buffer; struct stat s; int ifd, ofd; LOGI("input file %s\n", infile); ifd = open(infile, O_RDONLY); FAILIF(ifd < 0, "open(%s): %s\n", infile, strerror(errno)); FAILIF(fstat(ifd, &s) < 0, "fstat(%d): %s\n", ifd, strerror(errno)); LOGI("file size %lld\n", s.st_size); buffer = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, ifd, 0); FAILIF(buffer == MAP_FAILED, "mmap(): %s\n", strerror(errno)); LOGI("infile mmapped at %p\n", buffer); FAILIF(!tessdata, "You must specify a path for tessdata.\n"); tesseract::TessBaseAPI api; LOGI("tessdata %s\n", tessdata); LOGI("lang %s\n", lang); FAILIF(api.Init(tessdata, lang), "could not initialize tesseract\n"); if (ratings) { LOGI("ratings %s\n", ratings); api.ReadConfigFile(ratings, false); } LOGI("set image x=%d, y=%d bpp=%d\n", x, y, bpp); FAILIF(!bpp || bpp == 2 || bpp > 4, "Invalid value %d of bpp\n", bpp); api.SetImage((const unsigned char *)buffer, x, y, bpp, bpp*x); LOGI("set rectangle to cover entire image\n"); api.SetRectangle(0, 0, x, y); LOGI("set page seg mode to single character\n"); api.SetPageSegMode(tesseract::PSM_SINGLE_CHAR); LOGI("recognize\n"); char * text = api.GetUTF8Text(); if (tessedit_write_images) { page_image.write("tessinput.tif"); } FAILIF(text == NULL, "didn't recognize\n"); FILE* fp = fopen(outfile, "w"); if (fp != NULL) { LOGI("write to output %s\n", outfile); fwrite(text, strlen(text), 1, fp); fclose(fp); } else LOGI("could not write to output %s\n", outfile); int mean_confidence = api.MeanTextConf(); LOGI("mean confidence: %d\n", mean_confidence); int* confs = api.AllWordConfidences(); int len, *trav; for (len = 0, trav = confs; *trav != -1; trav++, len++) LOGI("confidence %d: %d\n", len, *trav); free(confs); LOGI("clearing api\n"); api.Clear(); LOGI("clearing adaptive classifier\n"); api.ClearAdaptiveClassifier(); LOGI("clearing text\n"); delete [] text; } #endif jboolean ocr_open(JNIEnv *env, jobject thiz, jstring lang) { LOGV(__FUNCTION__); native_data_t *nat = get_native_data(env, thiz); if (lang == NULL) { LOGE("lang string is null!"); return JNI_FALSE; } const char *c_lang = env->GetStringUTFChars(lang, NULL); if (c_lang == NULL) { LOGE("could not extract lang string!"); return JNI_FALSE; } jboolean res = JNI_TRUE; LOGI("lang %s\n", c_lang); if (nat->api.Init(TESSBASE, c_lang)) { LOGE("could not initialize tesseract!"); res = JNI_FALSE; } else { LOGI("lang %s initialization complete\n", c_lang); } env->ReleaseStringUTFChars(lang, c_lang); LOGI("successfully initialized tesseract!"); return res; } static void dump_debug_data(char *text) { #if DEBUG if (tessedit_write_images) { page_image.write(TESSBASE "tessinput.tif"); } if (text) { const char *outfile = TESSBASE "out.txt"; LOGI("write to output %s\n", outfile); FILE* fp = fopen(outfile, "w"); if (fp != NULL) { fwrite(text, strlen(text), 1, fp); fclose(fp); } } #endif } #ifdef HAVE_LIBLEPT void ocr_set_encoded(JNIEnv *env, jobject thiz, jbyteArray image) { LOGV(__FUNCTION__); LOGI("set encoded\n"); native_data_t *nat = get_native_data(env, thiz); LOG_ASSERT(nat->image_obj == NULL && nat->image_buffer == NULL, "image %p and/or image_buffer %p are not NULL!", nat->image_obj, nat->image_buffer); nat->image_obj = (jbyteArray)env->NewGlobalRef(image); nat->image_buffer = env->GetByteArrayElements(nat->image_obj, NULL); LOG_ASSERT(nat->image_buffer != NULL, "image buffer is NULL!"); int length = env->GetArrayLength(image); nat->pix = pixReadMem((const l_uint8 *)nat->image_buffer, length); nat->api.SetImage(nat->pix); } void ocr_normalize_bg(JNIEnv *env, jobject thiz, jint reduction, jint size, jint bgval) { LOGV(__FUNCTION__); LOGI("norm bg\n"); native_data_t *nat = get_native_data(env, thiz); LOG_ASSERT(nat->pix != NULL, "pix %p is NULL!", nat->pix); PIX *pixg; pixg = pixConvertTo8(nat->pix, 0); pixDestroy(&nat->pix); nat->pix = pixBackgroundNormMorph(pixg, NULL, reduction, size, bgval); pixDestroy(&pixg); } #endif void ocr_set_image(JNIEnv *env, jobject thiz, jbyteArray image, jint width, jint height, jint bpp) { LOGV(__FUNCTION__); LOGI("set image x=%d, y=%d, bpp=%d\n", width, height, bpp); native_data_t *nat = get_native_data(env, thiz); LOG_ASSERT(nat->image_obj == NULL && nat->image_buffer == NULL, "image %p and/or image_buffer %p are not NULL!", nat->image_obj, nat->image_buffer); nat->image_obj = (jbyteArray)env->NewGlobalRef(image); nat->image_buffer = env->GetByteArrayElements(nat->image_obj, NULL); LOG_ASSERT(nat->image_buffer != NULL, "image buffer is NULL!"); nat->api.SetImage((const unsigned char *)nat->image_buffer, width, height, bpp, bpp*width); } void ocr_release_image(JNIEnv *env, jobject thiz) { LOGV(__FUNCTION__); native_data_t *nat = get_native_data(env, thiz); if (nat->image_buffer != NULL) { LOGI("releasing image buffer"); env->ReleaseByteArrayElements(nat->image_obj, nat->image_buffer, JNI_ABORT); env->DeleteGlobalRef(nat->image_obj); nat->image_obj = NULL; nat->image_buffer = NULL; #ifdef HAVE_LIBLEPT pixDestroy(&nat->pix); nat->pix = NULL; #endif } } void ocr_set_rectangle(JNIEnv *env, jobject thiz, jint left, jint top, jint width, jint height) { LOGV(__FUNCTION__); // Restrict recognition to a sub-rectangle of the image. Call after SetImage. // Each SetRectangle clears the recogntion results so multiple rectangles // can be recognized with the same image. native_data_t *nat = get_native_data(env, thiz); LOGI("set rectangle left=%d, top=%d, width=%d, height=%d\n", left, top, width, height); LOG_ASSERT(nat->image_obj != NULL && nat->image_buffer != NULL, "image and/or image_buffer are NULL!"); nat->api.SetRectangle(left, top, width, height); } jstring ocr_recognize(JNIEnv *env, jobject thiz) { LOGV(__FUNCTION__); native_data_t *nat = get_native_data(env, thiz); LOG_ASSERT(nat->image_obj != NULL && nat->image_buffer != NULL, "image and/or image_buffer are NULL!"); LOGI("BEFORE RECOGNIZE"); char * text = nat->api.GetUTF8Text(); LOGI("AFTER RECOGNIZE"); dump_debug_data(text); // Will that work on a NULL? return env->NewStringUTF(text); } static jint ocr_mean_confidence(JNIEnv *env, jobject thiz) { LOGV(__FUNCTION__); native_data_t *nat = get_native_data(env, thiz); // Returns the (average) confidence value between 0 and 100. return nat->api.MeanTextConf(); } static jintArray ocr_word_confidences(JNIEnv *env, jobject thiz) { LOGV(__FUNCTION__); // Returns all word confidences (between 0 and 100) in an array, terminated // by -1. The calling function must delete [] after use. // The number of confidences should correspond to the number of space- // delimited words in GetUTF8Text. native_data_t *nat = get_native_data(env, thiz); int* confs = nat->api.AllWordConfidences(); if (confs == NULL) { LOGE("Could not get word-confidence values!"); return NULL; } int len, *trav; for (len = 0, trav = confs; *trav != -1; trav++, len++); LOG_ASSERT(confs != NULL, "Confidence array has %d elements", len); jintArray ret = env->NewIntArray(len); LOG_ASSERT(ret != NULL, "Could not create Java confidence array!"); env->SetIntArrayRegion(ret, 0, len, confs); delete [] confs; return ret; } static jboolean ocr_set_variable(JNIEnv *env, jobject thiz, jstring var, jstring value) { LOGV(__FUNCTION__); // Set the value of an internal "variable" (of either old or new types). // Supply the name of the variable and the value as a string, just as // you would in a config file. // Returns false if the name lookup failed. // Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. // Or SetVariable("bln_numericmode", "1"); to set numeric-only mode. // SetVariable may be used before Init, but settings will revert to // defaults on End(). native_data_t *nat = get_native_data(env, thiz); const char *c_var = env->GetStringUTFChars(var, NULL); const char *c_value = env->GetStringUTFChars(value, NULL); jboolean set = nat->api.SetVariable(c_var, c_value) ? JNI_TRUE : JNI_FALSE; env->ReleaseStringUTFChars(var, c_var); env->ReleaseStringUTFChars(value, c_value); return set; } static void ocr_clear_results(JNIEnv *env, jobject thiz) { LOGV(__FUNCTION__); // Free up recognition results and any stored image data, without actually // freeing any recognition data that would be time-consuming to reload. // Afterwards, you must call SetImage or TesseractRect before doing // any Recognize or Get* operation. LOGI("releasing all memory"); native_data_t *nat = get_native_data(env, thiz); nat->api.Clear(); // Call between pages or documents etc to free up memory and forget // adaptive data. LOGI("clearing adaptive classifier"); nat->api.ClearAdaptiveClassifier(); } static void ocr_close(JNIEnv *env, jobject thiz) { LOGV(__FUNCTION__); // Close down tesseract and free up all memory. End() is equivalent to // destructing and reconstructing your TessBaseAPI. Once End() has been // used, none of the other API functions may be used other than Init and // anything declared above it in the class definition. native_data_t *nat = get_native_data(env, thiz); nat->api.End(); } static void ocr_set_page_seg_mode(JNIEnv *env, jobject thiz, jint mode) { LOGV(__FUNCTION__); native_data_t *nat = get_native_data(env, thiz); nat->api.SetPageSegMode((tesseract::PageSegMode)mode); } static jobjectArray ocr_get_languages(JNIEnv *env, jclass clazz) { LOGV(__FUNCTION__); DIR *tessdata = opendir(TESSBASE "tessdata"); if (tessdata == NULL) { LOGE("Could not open tessdata directory %s", TESSBASE "tessdata"); return NULL; } dirent *ent; LOGI("readdir"); while ((ent = readdir(tessdata))) { char *where, *stem; int shard = -1; if (ent->d_type == 0x08 && (where = strstr(ent->d_name, ".traineddata"))) { *where = 0; if (where != ent->d_name) { where--; // skip the dot while(where != ent->d_name) { if(!isdigit(*where)) break; where--; // it's a digit, backtrack } // we backtracked one too much char *end = ++where; // if there was a number, it will be written in // shard, otherwise shard will remain -1. sscanf(end, "%d", &shard); *end = 0; add_language(ent->d_name, shard + 1); } } } closedir(tessdata); { jclass stringClass = env->FindClass("java/lang/String"); jobjectArray langsArray = env->NewObjectArray(get_num_languages(), stringClass, NULL); LOG_ASSERT(langsArray != NULL, "Could not create Java object array!"); int i = 0; language_info_t *it = language_iter_init(); for (; it; i++, it = language_iter_next()) { env->SetObjectArrayElement(langsArray, i, env->NewStringUTF(it->lang)); } return langsArray; } } static jint ocr_get_shards(JNIEnv *env, jclass clazz, jstring lang) { int ret = -1; const char *c_lang = env->GetStringUTFChars(lang, NULL); if (c_lang == NULL) { LOGE("could not extract lang string!"); return ret; } language_info_t* lang_entry = find_language(c_lang); if (lang_entry) ret = lang_entry->shards; LOGI("shards for lang %s: %d\n", c_lang, ret); env->ReleaseStringUTFChars(lang, c_lang); return ret; } static void class_init(JNIEnv* env, jclass clazz) { LOGV(__FUNCTION__); field_mNativeData = env->GetFieldID(clazz, "mNativeData", "I"); } static void initialize_native_data(JNIEnv* env, jobject object) { LOGV(__FUNCTION__); native_data_t *nat = new native_data_t; if (nat == NULL) { LOGE("%s: out of memory!", __FUNCTION__); return; } env->SetIntField(object, field_mNativeData, (jint)nat); } static void cleanup_native_data(JNIEnv* env, jobject object) { LOGV(__FUNCTION__); native_data_t *nat = get_native_data(env, object); if (nat) delete nat; free_languages(); } static JNINativeMethod methods[] = { /* name, signature, funcPtr */ {"classInitNative", "()V", (void*)class_init}, {"initializeNativeDataNative", "()V", (void *)initialize_native_data}, {"cleanupNativeDataNative", "()V", (void *)cleanup_native_data}, {"openNative", "(Ljava/lang/String;)Z", (void*)ocr_open}, #ifdef HAVE_LIBLEPT {"setEncodedNative", "([B)V", (void*)ocr_set_encoded}, {"normalizeBgNative", "(III)V", (void*)ocr_normalize_bg}, #endif {"setImageNative", "([BIII)V", (void*)ocr_set_image}, {"releaseImageNative", "()V", (void*)ocr_release_image}, {"setRectangleNative", "(IIII)V", (void*)ocr_set_rectangle}, {"recognizeNative", "()Ljava/lang/String;", (void*)ocr_recognize}, {"clearResultsNative", "()V", (void*)ocr_clear_results}, {"closeNative", "()V", (void*)ocr_close}, {"meanConfidenceNative", "()I", (void*)ocr_mean_confidence}, {"wordConfidencesNative", "()[I", (void*)ocr_word_confidences}, {"setVariableNative", "(Ljava/lang/String;Ljava/lang/String;)Z", (void*)ocr_set_variable}, {"setPageSegModeNative", "(I)V", (void*)ocr_set_page_seg_mode}, {"getLanguagesNative", "()[Ljava/lang/String;", (void*)ocr_get_languages}, {"getShardsNative", "(Ljava/lang/String;)I", (void*)ocr_get_shards}, }; /* * Register several native methods for one class. */ static int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMethod* gMethods, int numMethods) { jclass clazz = env->FindClass(className); if (clazz == NULL) { LOGE("Native registration unable to find class %s", className); return JNI_FALSE; } if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) { LOGE("RegisterNatives failed for %s", className); return JNI_FALSE; } return JNI_TRUE; } /* * Set some test stuff up. * * Returns the JNI version on success, -1 on failure. */ typedef union { JNIEnv* env; void* venv; } UnionJNIEnvToVoid; jint JNI_OnLoad(JavaVM* vm, void* reserved) { UnionJNIEnvToVoid uenv; uenv.venv = NULL; JNIEnv* env = NULL; if (vm->GetEnv(&uenv.venv, JNI_VERSION_1_4) != JNI_OK) { LOGE("GetEnv failed\n"); return (jint)-1; } env = uenv.env; assert(env != NULL); LOGI("In OcrLib JNI_OnLoad\n"); if (JNI_FALSE == registerNativeMethods(env, "com/android/ocr/OcrLib", methods, sizeof(methods) / sizeof(methods[0]))) { LOGE("OcrLib native registration failed\n"); return (jint)-1; } /* success -- return valid version number */ LOGI("OcrLib native registration succeeded!\n"); return (jint)JNI_VERSION_1_4; }