19 #define _USE_MATH_DEFINES
32 #ifndef DISABLED_LEGACY_ENGINE
39 #ifndef DISABLED_LEGACY_ENGINE
44 #if defined(USE_OPENCL)
78 #include <allheaders.h>
80 # include <curl/curl.h>
93 # include <sys/stat.h>
94 # include <sys/types.h>
100 static BOOL_VAR(stream_filelist,
false,
"Stream a filelist from stdin");
101 static STRING_VAR(document_title,
"",
"Title of output document (used for hOCR and PDF output)");
114 static const char *kOldVarsFile =
"failed_vars.txt";
116 #ifndef DISABLED_LEGACY_ENGINE
121 static const char *kInputFile =
"noname.tif";
122 static const char kUnknownFontName[] =
"UnknownFont";
124 static STRING_VAR(classify_font_name, kUnknownFontName,
125 "Default font name to be used in training");
132 static void ExtractFontName(
const char* filename, std::string* fontname) {
133 *fontname = classify_font_name;
134 if (*fontname == kUnknownFontName) {
137 const char *basename = strrchr(filename,
'/');
138 const char *firstdot = strchr(basename ? basename : filename,
'.');
139 const char *lastdot = strrchr(filename,
'.');
140 if (firstdot != lastdot && firstdot !=
nullptr && lastdot !=
nullptr) {
142 *fontname = firstdot;
143 fontname->resize(lastdot - firstdot);
151 static void addAvailableLanguages(
const std::string &datadir,
const std::string &base,
152 std::vector<std::string> *langs) {
154 if (!base2.empty()) {
157 const size_t extlen =
sizeof(kTrainedDataSuffix);
159 WIN32_FIND_DATA data;
160 HANDLE handle = FindFirstFile((datadir + base2 +
"*").c_str(), &data);
161 if (handle != INVALID_HANDLE_VALUE) {
164 char *name = data.cFileName;
166 if (name[0] !=
'.') {
167 if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY) {
168 addAvailableLanguages(datadir, base2 + name, langs);
170 size_t len = strlen(name);
171 if (len > extlen && name[len - extlen] ==
'.' &&
172 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
173 name[len - extlen] =
'\0';
174 langs->push_back(base2 + name);
178 result = FindNextFile(handle, &data);
183 DIR *dir = opendir((datadir + base).c_str());
184 if (dir !=
nullptr) {
186 while ((de = readdir(dir))) {
187 char *name = de->d_name;
189 if (name[0] !=
'.') {
191 if (stat((datadir + base2 + name).c_str(), &st) == 0 && (st.st_mode & S_IFDIR) == S_IFDIR) {
192 addAvailableLanguages(datadir, base2 + name, langs);
194 size_t len = strlen(name);
195 if (len > extlen && name[len - extlen] ==
'.' &&
196 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
197 name[len - extlen] =
'\0';
198 langs->push_back(base2 + name);
209 : tesseract_(nullptr)
210 , osd_tesseract_(nullptr)
211 , equ_detect_(nullptr)
217 thresholder_(nullptr)
218 , paragraph_models_(nullptr)
219 , block_list_(nullptr)
222 , recognition_done_(false)
251 ds_device device = OpenclDevice::getDeviceSelection();
252 if (device.type == DS_DEVICE_OPENCL_DEVICE) {
253 *data =
new cl_device_id;
254 memcpy(*data, &device.oclDeviceID,
sizeof(cl_device_id));
255 return sizeof(cl_device_id);
292 auto *p = ParamUtils::FindParam<IntParam>(name,
GlobalParams()->int_params,
297 *value = (int32_t)(*p);
302 auto *p = ParamUtils::FindParam<BoolParam>(name,
GlobalParams()->bool_params,
312 auto *p = ParamUtils::FindParam<StringParam>(name,
GlobalParams()->string_params,
314 return (p !=
nullptr) ? p->c_str() :
nullptr;
318 auto *p = ParamUtils::FindParam<DoubleParam>(name,
GlobalParams()->double_params,
323 *value = (double)(*p);
332 #ifndef DISABLED_LEGACY_ENGINE
337 for (
int font_index = 1; font_index < fontinfo_size; ++font_index) {
339 fprintf(fp,
"ID=%3d: %s is_italic=%s is_bold=%s"
340 " is_fixed_pitch=%s is_serif=%s is_fraktur=%s\n",
341 font_index, font.
name,
343 font.
is_bold() ?
"true" :
"false",
366 int configs_size,
const std::vector<std::string> *vars_vec,
367 const std::vector<std::string> *vars_values,
bool set_only_non_debug_params) {
368 return Init(datapath, 0, language,
oem, configs, configs_size, vars_vec, vars_values,
369 set_only_non_debug_params,
nullptr);
376 char **configs,
int configs_size,
const std::vector<std::string> *vars_vec,
377 const std::vector<std::string> *vars_values,
bool set_only_non_debug_params,
379 if (language ==
nullptr) {
382 if (data ==
nullptr) {
385 std::string datapath = data_size == 0 ? data : language;
401 bool reset_classifier =
true;
403 reset_classifier =
false;
405 if (reader !=
nullptr) {
409 if (data_size != 0) {
413 configs_size, vars_vec, vars_values, set_only_non_debug_params,
428 #ifndef DISABLED_LEGACY_ENGINE
430 if (reset_classifier) {
459 for (
int i = 0; i < num_subs; ++i) {
472 std::sort(langs->begin(), langs->end());
483 #ifndef DISABLED_LEGACY_ENGINE
512 tesseract_->tessedit_pageseg_mode.set_value(mode);
537 int bytes_per_line,
int left,
int top,
int width,
int height) {
544 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
545 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, bytes_per_pixel,
552 #ifndef DISABLED_LEGACY_ENGINE
574 int bytes_per_pixel,
int bytes_per_line) {
585 tprintf(
"Please call SetImage before SetSourceResolution.\n");
599 if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
601 Pix *p1 = pixRemoveAlpha(pix);
603 (void)pixCopy(pix, p1);
656 int **blockids,
int **paraids) {
700 const int raw_padding, Pixa **pixa,
int **blockids,
703 if (page_it ==
nullptr) {
706 if (page_it ==
nullptr) {
711 int component_count = 0;
712 int left, top, right, bottom;
717 if (page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom) &&
721 }
while (page_it->Next(level));
726 if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
730 }
while (page_it->Next(level));
733 Boxa *boxa = boxaCreate(component_count);
734 if (pixa !=
nullptr) {
735 *pixa = pixaCreate(component_count);
737 if (blockids !=
nullptr) {
738 *blockids =
new int[component_count];
740 if (paraids !=
nullptr) {
741 *paraids =
new int[component_count];
746 int component_index = 0;
749 bool got_bounding_box;
751 got_bounding_box = page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom);
753 got_bounding_box = page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom);
755 if (got_bounding_box && (!text_only ||
PTIsTextType(page_it->BlockType()))) {
756 Box *lbox = boxCreate(left, top, right - left, bottom - top);
757 boxaAddBox(boxa, lbox, L_INSERT);
758 if (pixa !=
nullptr) {
761 pix = page_it->GetImage(level, raw_padding,
GetInputImage(), &left, &top);
763 pix = page_it->GetBinaryImage(level);
765 pixaAddPix(*pixa, pix, L_INSERT);
766 pixaAddBox(*pixa, lbox, L_CLONE);
768 if (paraids !=
nullptr) {
769 (*paraids)[component_index] = paraid;
770 if (page_it->IsAtFinalElement(
RIL_PARA, level)) {
774 if (blockids !=
nullptr) {
775 (*blockids)[component_index] = blockid;
776 if (page_it->IsAtFinalElement(
RIL_BLOCK, level)) {
783 }
while (page_it->Next(level));
846 #ifndef DISABLED_LEGACY_ENGINE
847 if (
tesseract_->tessedit_resegment_from_line_boxes) {
849 }
else if (
tesseract_->tessedit_resegment_from_boxes) {
862 if (
tesseract_->tessedit_train_line_recognizer) {
869 #ifndef DISABLED_LEGACY_ENGINE
870 if (
tesseract_->tessedit_make_boxes_from_boxes) {
878 #ifndef GRAPHICS_DISABLED
886 #ifndef DISABLED_LEGACY_ENGINE
887 }
else if (
tesseract_->tessedit_train_from_boxes) {
888 std::string fontname;
891 }
else if (
tesseract_->tessedit_ambigs_training) {
895 training_output_file);
896 fclose(training_output_file);
900 bool wait_for_text =
true;
902 if (!wait_for_text) {
947 bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf,
const char *retry_config,
949 int tessedit_page_number) {
950 if (!flist && !buf) {
953 unsigned page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
956 std::vector<std::string> lines;
959 for (
const auto ch : *buf) {
961 lines.push_back(line);
969 lines.push_back(line);
977 for (
unsigned i = 0; i < page; i++) {
979 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr) {
986 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
993 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr) {
997 if (page >= lines.size()) {
1000 snprintf(pagename,
sizeof(pagename),
"%s", lines[page].c_str());
1003 Pix *pix = pixRead(pagename);
1004 if (pix ==
nullptr) {
1005 tprintf(
"Image file %s cannot be read!\n", pagename);
1008 tprintf(
"Page %u : %s\n", page, pagename);
1009 bool r =
ProcessPage(pix, page, pagename, retry_config, timeout_millisec, renderer);
1014 if (tessedit_page_number >= 0) {
1027 bool TessBaseAPI::ProcessPagesMultipageTiff(
const l_uint8 *data,
size_t size,
const char *filename,
1028 const char *retry_config,
int timeout_millisec,
1030 int tessedit_page_number) {
1032 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1035 if (tessedit_page_number >= 0) {
1036 page = tessedit_page_number;
1037 pix = (data) ? pixReadMemTiff(data, size, page) : pixReadTiff(filename, page);
1039 pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
1040 : pixReadFromMultipageTiff(filename, &offset);
1042 if (pix ==
nullptr) {
1045 if (offset || page > 0) {
1047 tprintf(
"Page %d\n", page + 1);
1049 auto page_string = std::to_string(page);
1050 SetVariable(
"applybox_page", page_string.c_str());
1051 bool r =
ProcessPage(pix, page, filename, retry_config, timeout_millisec, renderer);
1056 if (tessedit_page_number >= 0) {
1071 #ifndef DISABLED_LEGACY_ENGINE
1083 static size_t WriteMemoryCallback(
void *contents,
size_t size,
size_t nmemb,
void *userp) {
1084 size = size * nmemb;
1085 auto *buf =
reinterpret_cast<std::string *
>(userp);
1086 buf->append(
reinterpret_cast<const char *
>(contents), size);
1104 bool stdInput = !strcmp(filename,
"stdin") || !strcmp(filename,
"-");
1107 if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1108 tprintf(
"ERROR: cin to binary: %s", strerror(errno));
1112 if (stream_filelist) {
1113 return ProcessPagesFileList(stdin,
nullptr, retry_config, timeout_millisec, renderer,
1121 const l_uint8 *data =
nullptr;
1123 buf.assign((std::istreambuf_iterator<char>(std::cin)), (std::istreambuf_iterator<char>()));
1124 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1125 }
else if (strstr(filename,
"://") !=
nullptr) {
1128 CURL *curl = curl_easy_init();
1129 if (curl ==
nullptr) {
1130 fprintf(stderr,
"Error, curl_easy_init failed\n");
1134 auto error = [curl, &curlcode](
const char *
function) {
1135 fprintf(stderr,
"Error, %s failed with error %s\n",
function, curl_easy_strerror(curlcode));
1136 curl_easy_cleanup(curl);
1139 curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1140 if (curlcode != CURLE_OK) {
1141 return error(
"curl_easy_setopt");
1143 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1144 if (curlcode != CURLE_OK) {
1145 return error(
"curl_easy_setopt");
1147 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1148 if (curlcode != CURLE_OK) {
1149 return error(
"curl_easy_setopt");
1151 curlcode = curl_easy_perform(curl);
1152 if (curlcode != CURLE_OK) {
1153 return error(
"curl_easy_perform");
1155 curl_easy_cleanup(curl);
1156 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1159 fprintf(stderr,
"Error, this tesseract has no URL support\n");
1164 if (FILE *
file = fopen(filename,
"rb")) {
1167 fprintf(stderr,
"Error, cannot read input file %s: %s\n", filename, strerror(errno));
1175 (data !=
nullptr) ? findFileFormatBuffer(data, &format) : findFileFormat(filename, &format);
1178 if (r != 0 || format == IFF_UNKNOWN) {
1180 if (data !=
nullptr) {
1183 std::ifstream t(filename);
1184 std::string u((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
1187 return ProcessPagesFileList(
nullptr, &s, retry_config, timeout_millisec, renderer,
1192 bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || format == IFF_TIFF_RLE ||
1193 format == IFF_TIFF_G3 || format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1194 #if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1195 format == IFF_TIFF_JPEG ||
1197 format == IFF_TIFF_ZIP);
1202 pix = (data !=
nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1203 if (pix ==
nullptr) {
1209 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
1215 r = (tiff) ? ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, timeout_millisec,
1217 :
ProcessPage(pix, 0, filename, retry_config, timeout_millisec, renderer);
1223 if (!r || (renderer && !renderer->
EndDocument())) {
1230 const char *retry_config,
int timeout_millisec,
1234 bool failed =
false;
1238 if (! std::unique_ptr<const PageIterator>(
AnalyseLayout())) {
1243 }
else if (timeout_millisec > 0) {
1246 monitor.
cancel =
nullptr;
1259 std::string output_filename =
output_file_ +
".processed";
1260 if (page_index > 0) {
1261 output_filename += std::to_string(page_index);
1263 output_filename +=
".tif";
1264 pixWrite(output_filename.c_str(), page_pix, IFF_TIFF_G4);
1265 pixDestroy(&page_pix);
1268 if (failed && retry_config !=
nullptr && retry_config[0] !=
'\0') {
1270 FILE *fp = fopen(kOldVarsFile,
"wb");
1271 if (fp ==
nullptr) {
1272 tprintf(
"Error, failed to open file \"%s\"\n", kOldVarsFile);
1285 if (renderer && !failed) {
1286 failed = !renderer->
AddImage(
this);
1344 std::string text(
"");
1350 const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(
RIL_PARA));
1351 text += para_text.get();
1353 char *result =
new char[text.length() + 1];
1354 strncpy(result, text.c_str(), text.length() + 1);
1359 int left, top, right, bottom;
1360 it->
BoundingBox(level, &left, &top, &right, &bottom);
1361 text +=
"\t" + std::to_string(left);
1362 text +=
"\t" + std::to_string(top);
1363 text +=
"\t" + std::to_string(right - left);
1364 text +=
"\t" + std::to_string(bottom - top);
1377 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1378 int page_id = page_number + 1;
1380 int page_num = page_id;
1386 std::string tsv_str;
1387 tsv_str +=
"1\t" + std::to_string(page_num);
1388 tsv_str +=
"\t" + std::to_string(block_num);
1389 tsv_str +=
"\t" + std::to_string(par_num);
1390 tsv_str +=
"\t" + std::to_string(line_num);
1391 tsv_str +=
"\t" + std::to_string(word_num);
1392 tsv_str +=
"\t" + std::to_string(
rect_left_);
1393 tsv_str +=
"\t" + std::to_string(
rect_top_);
1396 tsv_str +=
"\t-1\t\n";
1406 if (res_it->IsAtBeginningOf(
RIL_BLOCK)) {
1411 tsv_str +=
"2\t" + std::to_string(page_num);
1412 tsv_str +=
"\t" + std::to_string(block_num);
1413 tsv_str +=
"\t" + std::to_string(par_num);
1414 tsv_str +=
"\t" + std::to_string(line_num);
1415 tsv_str +=
"\t" + std::to_string(word_num);
1416 AddBoxToTSV(res_it.get(),
RIL_BLOCK, tsv_str);
1417 tsv_str +=
"\t-1\t\n";
1419 if (res_it->IsAtBeginningOf(
RIL_PARA)) {
1423 tsv_str +=
"3\t" + std::to_string(page_num);
1424 tsv_str +=
"\t" + std::to_string(block_num);
1425 tsv_str +=
"\t" + std::to_string(par_num);
1426 tsv_str +=
"\t" + std::to_string(line_num);
1427 tsv_str +=
"\t" + std::to_string(word_num);
1428 AddBoxToTSV(res_it.get(),
RIL_PARA, tsv_str);
1429 tsv_str +=
"\t-1\t\n";
1434 tsv_str +=
"4\t" + std::to_string(page_num);
1435 tsv_str +=
"\t" + std::to_string(block_num);
1436 tsv_str +=
"\t" + std::to_string(par_num);
1437 tsv_str +=
"\t" + std::to_string(line_num);
1438 tsv_str +=
"\t" + std::to_string(word_num);
1440 tsv_str +=
"\t-1\t\n";
1444 int left, top, right, bottom;
1445 res_it->BoundingBox(
RIL_WORD, &left, &top, &right, &bottom);
1447 tsv_str +=
"5\t" + std::to_string(page_num);
1448 tsv_str +=
"\t" + std::to_string(block_num);
1449 tsv_str +=
"\t" + std::to_string(par_num);
1450 tsv_str +=
"\t" + std::to_string(line_num);
1451 tsv_str +=
"\t" + std::to_string(word_num);
1452 tsv_str +=
"\t" + std::to_string(left);
1453 tsv_str +=
"\t" + std::to_string(top);
1454 tsv_str +=
"\t" + std::to_string(right - left);
1455 tsv_str +=
"\t" + std::to_string(bottom - top);
1456 tsv_str +=
"\t" + std::to_string(res_it->Confidence(
RIL_WORD));
1471 tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(
RIL_SYMBOL)).get();
1478 char *ret =
new char[tsv_str.length() + 1];
1479 strcpy(ret, tsv_str.c_str());
1519 char *result =
new char[total_length];
1521 int output_length = 0;
1524 int left, top, right, bottom;
1529 for (
int i = 0; text[i] !=
'\0'; ++i) {
1530 if (text[i] ==
' ') {
1534 snprintf(result + output_length, total_length - output_length,
"%s %d %d %d %d %d\n",
1536 output_length += strlen(result + output_length);
1552 const int kUniChs[] = {0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0};
1554 const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0};
1565 bool tilde_crunch_written =
false;
1566 bool last_char_was_newline =
true;
1567 bool last_char_was_tilde =
false;
1571 char *result =
new char[total_length];
1578 (!tilde_crunch_written ||
1585 last_char_was_tilde =
false;
1587 if (!last_char_was_tilde) {
1589 last_char_was_tilde =
true;
1591 tilde_crunch_written =
true;
1592 last_char_was_newline =
false;
1597 tilde_crunch_written =
false;
1601 int length = lengths.length();
1605 if (last_char_was_tilde && word->
word->
space() == 0 && wordstr[offset] ==
' ') {
1609 offset = lengths[i++];
1611 if (i < length && wordstr[offset] != 0) {
1612 if (!last_char_was_newline) {
1615 last_char_was_newline =
false;
1617 for (; i < length; offset += lengths[i++]) {
1620 last_char_was_tilde =
true;
1625 UNICHAR ch(wordstr + offset, lengths[i]);
1627 for (
int j = 0;
kUniChs[j] != 0; ++j) {
1633 if (uni_ch <= 0xff) {
1634 *ptr++ =
static_cast<char>(uni_ch);
1635 last_char_was_tilde =
false;
1638 last_char_was_tilde =
true;
1647 tilde_crunch_written =
false;
1648 last_char_was_newline =
true;
1649 last_char_was_tilde =
false;
1657 #ifndef DISABLED_LEGACY_ENGINE
1669 const char **script_name,
float *script_conf) {
1683 *orient_deg = orient_id * 90;
1689 *script_name = script;
1707 const char *script_name;
1717 std::stringstream stream;
1719 stream.imbue(std::locale::classic());
1721 stream.precision(2);
1722 stream << std::fixed <<
"Page number: " << page_number <<
"\n"
1723 <<
"Orientation in degrees: " << orient_deg <<
"\n"
1724 <<
"Rotate: " << rotate <<
"\n"
1725 <<
"Orientation confidence: " << orient_conf <<
"\n"
1726 <<
"Script: " << script_name <<
"\n"
1727 <<
"Script confidence: " << script_conf <<
"\n";
1728 const std::string &text = stream.str();
1729 char *result =
new char[text.length() + 1];
1730 strcpy(result, text.c_str());
1765 int *conf =
new int[n_word + 1];
1770 int w_conf =
static_cast<int>(100 + 5 * choice->
certainty());
1778 conf[n_word++] = w_conf;
1784 #ifndef DISABLED_LEGACY_ENGINE
1798 bool success =
true;
1802 const std::unique_ptr<const char[]> text(
GetUTF8Text());
1804 tprintf(
"Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1806 if (text !=
nullptr) {
1809 if (word_res !=
nullptr) {
1814 for (t = 0; text[t] !=
'\0'; ++t) {
1815 if (text[t] ==
'\n' || text[t] ==
' ') {
1818 while (wordstr[w] ==
' ') {
1821 if (text[t] != wordstr[w]) {
1826 if (text[t] !=
'\0' || wordstr[w] !=
'\0') {
1829 std::vector<TBOX> boxes;
1834 if (pr_it.
word() ==
nullptr) {
1837 word_res = pr_it.
word();
1894 #ifndef DISABLED_LEGACY_ENGINE
1935 const std::unique_ptr<const PageIterator> it(
AnalyseLayout());
1936 if (it ==
nullptr) {
1946 *out_slope =
static_cast<float>(y2 - y1) / (x2 - x1);
1947 *out_offset =
static_cast<int>(y1 - *out_slope * x1);
1950 int left, top, right, bottom;
1951 if (!it->BoundingBox(
RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1959 *out_offset += bottom - std::max(left_y, right_y);
1962 *out_slope = -*out_slope;
1988 for (
int i = 0; i < num_subs; ++i) {
1997 tprintf(
"Please call Init before attempting to set an image.\n");
2015 if (*pix !=
nullptr) {
2024 "Warning: User defined image dpi is outside of expected range "
2034 tprintf(
"Warning: Invalid resolution %d dpi. Using %d instead.\n",
2043 Image pix_binary(*pix);
2057 auto [ok, pix_grey, pix_binary, pix_thresholds] =
thresholder_->
Threshold(
this, thresholding_method);
2079 "Estimated internal resolution %d out of range! "
2080 "Corrected to %d.\n",
2090 tprintf(
"Please call SetImage before attempting recognition.\n");
2101 #ifndef DISABLED_LEGACY_ENGINE
2111 #ifndef DISABLED_LEGACY_ENGINE
2117 tprintf(
"Warning: Could not set equation detector\n");
2126 #ifndef DISABLED_LEGACY_ENGINE
2128 if (strcmp(
language_.c_str(),
"osd") == 0) {
2135 "Warning: Auto orientation and script detection requested,"
2136 " but data path is undefined\n");
2140 nullptr, 0,
nullptr,
nullptr,
false, &mgr) == 0) {
2145 "Warning: Auto orientation and script detection requested,"
2146 " but osd language failed to load\n");
2199 int total_length = 2;
2200 int total_blobs = 0;
2205 if (choice !=
nullptr) {
2206 total_blobs += choice->
length() + 2;
2215 if (blob_count !=
nullptr) {
2216 *blob_count = total_blobs;
2218 return total_length;
2221 #ifndef DISABLED_LEGACY_ENGINE
2243 tesseract_->min_orientation_margin.set_value(margin);
2261 delete[] * block_orientation;
2262 *block_orientation =
nullptr;
2263 delete[] * vertical_writing;
2264 *vertical_writing =
nullptr;
2267 block_it.move_to_first();
2269 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2270 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2276 tprintf(
"WARNING: Found no blocks\n");
2279 *block_orientation =
new int[num_blocks];
2280 *vertical_writing =
new bool[num_blocks];
2281 block_it.move_to_first();
2283 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2284 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2287 FCOORD re_rotation = block_it.data()->re_rotation();
2288 float re_theta = re_rotation.
angle();
2289 FCOORD classify_rotation = block_it.data()->classify_rotation();
2290 float classify_theta = classify_rotation.
angle();
2291 double rot_theta = -(re_theta - classify_theta) * 2.0 / M_PI;
2292 if (rot_theta < 0) {
2295 int num_rotations =
static_cast<int>(rot_theta + 0.5);
2296 (*block_orientation)[i] = num_rotations;
2299 (*vertical_writing)[i] = classify_rotation.
y() != 0.0f;
2305 int debug_level = 0;
2312 std::vector<ParagraphModel *> models;
2341 for (ptr = text; *ptr; ptr++) {
struct TessResultRenderer TessResultRenderer
#define TESSERACT_VERSION_STR
#define BOOL_VAR(name, val, comment)
#define STRING_VAR(name, val, comment)
@ W_FUZZY_NON
fuzzy nonspace
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
const char kTesseractReject
const int kBytesPerBoxFileLine
TESS_API int OrientationIdToValue(const int &id)
bool PSM_OSD_ENABLED(int pageseg_mode)
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
void tprintf(const char *format,...)
int IntCastRounded(double x)
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
void chomp_string(char *str)
const int kBytesPer64BitNumber
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
const int kMaxBytesPerLine
int orientation_and_script_detection(const char *filename, OSResults *, tesseract::Tesseract *)
constexpr int kMaxCredibleResolution
std::string HOcrEscape(const char *text)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
const int kBytesPerNumber
tesseract::ParamsVectors * GlobalParams()
const int kNumbersPerBlob
bool(*)(const char *filename, std::vector< char > *data) FileReader
constexpr int kMinCredibleResolution
bool PTIsTextType(PolyBlockType type)
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
EquationDetect * equ_detect_
The equation detector.
const char * GetInitLanguagesAsString() const
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
const char * GetInputName()
std::string input_file_
Name used by training code.
virtual bool Threshold(Pix **pix)
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
int Recognize(ETEXT_DESC *monitor)
PAGE_RES * page_res_
The page-level data.
void SetPageSegMode(PageSegMode mode)
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
bool SetDebugVariable(const char *name, const char *value)
const char * GetDatapath()
bool GetVariableAsString(const char *name, std::string *val) const
void InitForAnalysePage()
Tesseract * tesseract_
The underlying data object.
bool GetIntVariable(const char *name, int *value) const
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
void SetRectangle(int left, int top, int width, int height)
MutableIterator * GetMutableIterator()
int IsValidWord(const char *word) const
bool SetVariable(const char *name, const char *value)
bool IsValidCharacter(const char *utf8_character) const
void DetectParagraphs(bool after_text_recognition)
static const char * Version()
Boxa * GetWords(Pixa **pixa)
std::string language_
Last initialized language.
int * AllWordConfidences()
int GetSourceYResolution()
void GetAvailableLanguagesAsVector(std::vector< std::string > *langs) const
void SetSourceResolution(int ppi)
void ReadDebugConfigFile(const char *filename)
ResultIterator * GetIterator()
bool GetTextDirection(int *out_offset, float *out_slope)
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
int TextLength(int *blob_count) const
std::string datapath_
Current location of tessdata.
int GetThresholdedImageScaleFactor() const
bool DetectOS(OSResults *)
PageSegMode GetPageSegMode() const
static void ClearPersistentCache()
std::vector< ParagraphModel * > * paragraph_models_
void SetDictFunc(DictFunc f)
bool recognition_done_
page_res_ contains recognition data.
const Dawg * GetDawg(int i) const
FileReader reader_
Reads files from any filesystem.
char * GetTSVText(int page_number)
void SetInputName(const char *name)
char * GetOsdText(int page_number)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
OcrEngineMode oem() const
void PrintVariables(FILE *fp) const
void GetLoadedLanguagesAsVector(std::vector< std::string > *langs) const
ImageThresholder * thresholder_
Image thresholding module.
static size_t getOpenCLDevice(void **device)
std::string output_file_
Name used by debug code.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
PageIterator * AnalyseLayout()
char * GetBoxText(int page_number)
const char * GetStringVariable(const char *name) const
void ReadConfigFile(const char *filename)
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
BLOCK_LIST * block_list_
The page layout.
void set_min_orientation_margin(double margin)
Boxa * GetStrips(Pixa **pixa, int **blockids)
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
void PrintFontsTable(FILE *fp) const
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
LTRResultIterator * GetLTRIterator()
Tesseract * osd_tesseract_
For orientation & script detection.
bool GetBoolVariable(const char *name, bool *value) const
void ClearAdaptiveClassifier()
bool GetDoubleVariable(const char *name, double *value) const
Pix * GetThresholdedImage()
const char * GetUnichar(int unichar_id) const
Boxa * GetConnectedComponents(Pixa **cc)
void SetInputImage(Pix *pix)
void SetOutputName(const char *name)
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Boxa * GetRegions(Pixa **pixa)
char * GetUTF8Text(PageIteratorLevel level) const
void * cancel_this
monitor-aware progress callback
void set_deadline_msecs(int32_t deadline_msecs)
CANCEL_FUNC cancel
for errcode use
TESS_API int get_best_script(int orientation_id) const
virtual bool Next(PageIteratorLevel level)
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool AddImage(TessBaseAPI *api)
bool BeginDocument(const char *title)
bool Next(PageIteratorLevel level) override
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
Image * mutable_pix_binary()
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const std::string &arg0, const std::string &textbase, const std::string &language, OcrEngineMode oem, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, TessdataManager *mgr)
void set_unlv_suspects(WERD_RES *word)
void set_pix_grey(Image grey_pix)
void SetBlackAndWhitelist()
bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename, BLOCK_LIST *block_list)
PAGE_RES * ApplyBoxes(const char *filename, bool find_segmentation, BLOCK_LIST *block_list)
int num_sub_langs() const
void TidyUp(PAGE_RES *page_res)
void read_config_file(const char *filename, SetParamConstraint constraint)
void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res)
void ReSegmentByClassification(PAGE_RES *page_res)
void set_pix_thresholds(Image thresholds)
Dict & getDict() override
Image pix_original() const
void recog_training_segmented(const char *filename, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void set_pix_original(Image original_pix)
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
void ResetAdaptiveClassifier()
int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
void set_source_resolution(int ppi)
void CorrectClassifyWords(PAGE_RES *page_res)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
void ResetDocumentDictionary()
FILE * init_recog_training(const char *filename)
PAGE_RES * SetupApplyBoxes(const std::vector< TBOX > &boxes, BLOCK_LIST *block_list)
Tesseract * get_sub_lang(int index) const
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
int GetScaledEstimatedResolution() const
virtual Image GetPixRectThresholds()
int GetSourceYResolution() const
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
bool IsEmpty() const
Return true if no image has been set.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
int GetScaledYResolution() const
virtual std::tuple< bool, Image, Image, Image > Threshold(TessBaseAPI *api, ThresholdMethod method)
void SetRectangle(int left, int top, int width, int height)
virtual Image GetPixRectGrey()
int GetScaleFactor() const
virtual bool ThresholdToPix(Image *pix)
Returns false on error.
bool IsBinary() const
Returns true if the source image is binary.
void SetSourceYResolution(int ppi)
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
bool is_fixed_pitch() const
WERD_CHOICE * best_choice
CRUNCH_MODE unlv_crunch_mode
void BestChoiceToCorrectText()
WERD_RES * restart_page()
float angle() const
find angle
const std::string & unichar_lengths() const
std::string & unichar_string()
bool flag(WERD_FLAGS mask) const
void set_text(const char *new_text)
std::vector< BoolParam * > bool_params
std::vector< StringParam * > string_params
std::vector< IntParam * > int_params
std::vector< DoubleParam * > double_params
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, std::string *value)
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
bool LoadMemBuffer(const char *name, const char *data, int size)
const char * get_script_from_script_id(int id) const
const char * id_to_unichar(UNICHAR_ID id) const
bool contains_unichar(const char *const unichar_repr) const
void LearnWord(const char *fontname, WERD_RES *word)
UnicityTable< FontInfo > & get_fontinfo_table()
bool WriteTRFile(const char *filename)
void InitAdaptiveClassifier(TessdataManager *mgr)
static DawgCache * GlobalDawgCache()
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
WERD_CHOICE * prev_word_best_choice_