tesseract  5.0.0
tesseract.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tesseract.cpp
3  * Description: Main program for merge of tess and editor.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23 
24 #include <cerrno> // for errno
25 #if defined(__USE_GNU)
26 # include <cfenv> // for feenableexcept
27 #endif
28 #include <climits> // for INT_MIN, INT_MAX
29 #include <cstdlib> // for std::getenv
30 #include <iostream>
31 #include <map> // for std::map
32 #include <memory> // std::unique_ptr
33 
34 #include <allheaders.h>
35 #include <tesseract/baseapi.h>
36 #include "dict.h"
37 #if defined(USE_OPENCL)
38 # include "openclwrapper.h" // for OpenclDevice
39 #endif
40 #include <tesseract/renderer.h>
41 #include "simddetect.h"
42 #include "tprintf.h" // for tprintf
43 
44 #ifdef _OPENMP
45 # include <omp.h>
46 #endif
47 
48 #if defined(HAVE_LIBARCHIVE)
49 # include <archive.h>
50 #endif
51 #if defined(HAVE_LIBCURL)
52 # include <curl/curl.h>
53 #endif
54 
55 #if defined(_WIN32)
56 # include <fcntl.h>
57 # include <io.h>
58 # if defined(HAVE_TIFFIO_H)
59 
60 # include <tiffio.h>
61 
62 static void Win32ErrorHandler(const char *module, const char *fmt, va_list ap) {
63  if (module != nullptr) {
64  fprintf(stderr, "%s: ", module);
65  }
66  vfprintf(stderr, fmt, ap);
67  fprintf(stderr, ".\n");
68 }
69 
70 static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) {
71  if (module != nullptr) {
72  fprintf(stderr, "%s: ", module);
73  }
74  fprintf(stderr, "Warning, ");
75  vfprintf(stderr, fmt, ap);
76  fprintf(stderr, ".\n");
77 }
78 
79 # endif /* HAVE_TIFFIO_H */
80 
81 class AutoWin32ConsoleOutputCP {
82 public:
83  explicit AutoWin32ConsoleOutputCP(UINT codeCP) {
84  oldCP_ = GetConsoleOutputCP();
85  SetConsoleOutputCP(codeCP);
86  }
87  ~AutoWin32ConsoleOutputCP() {
88  SetConsoleOutputCP(oldCP_);
89  }
90 
91 private:
92  UINT oldCP_;
93 };
94 
95 static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
96 
97 #endif // _WIN32
98 
99 using namespace tesseract;
100 
101 static void PrintVersionInfo() {
102  char *versionStrP;
103 
104  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
105 
106  versionStrP = getLeptonicaVersion();
107  printf(" %s\n", versionStrP);
108  lept_free(versionStrP);
109 
110  versionStrP = getImagelibVersions();
111  printf(" %s\n", versionStrP);
112  lept_free(versionStrP);
113 
114 #ifdef USE_OPENCL
115  cl_platform_id platform[4];
116  cl_uint num_platforms;
117 
118  printf(" OpenCL info:\n");
119  if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
120  printf(" Found %u platform(s).\n", num_platforms);
121  for (unsigned n = 0; n < num_platforms; n++) {
122  char info[256];
123  if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
124  printf(" Platform %u name: %s.\n", n + 1, info);
125  }
126  if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
127  printf(" Version: %s.\n", info);
128  }
129  cl_device_id devices[2];
130  cl_uint num_devices;
131  if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
132  printf(" Found %u device(s).\n", num_devices);
133  for (unsigned i = 0; i < num_devices; ++i) {
134  if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
135  printf(" Device %u name: %s.\n", i + 1, info);
136  }
137  }
138  }
139  }
140  }
141 #endif
142 #if defined(HAVE_NEON) || defined(__aarch64__)
144  printf(" Found NEON\n");
145 #else
147  printf(" Found AVX512BW\n");
148  }
150  printf(" Found AVX512F\n");
151  }
153  printf(" Found AVX2\n");
154  }
156  printf(" Found AVX\n");
157  }
159  printf(" Found FMA\n");
160  }
162  printf(" Found SSE4.1\n");
163  }
164 #endif
165 #ifdef _OPENMP
166  printf(" Found OpenMP %d\n", _OPENMP);
167 #endif
168 #if defined(HAVE_LIBARCHIVE)
169 # if ARCHIVE_VERSION_NUMBER >= 3002000
170  printf(" Found %s\n", archive_version_details());
171 # else
172  printf(" Found %s\n", archive_version_string());
173 # endif // ARCHIVE_VERSION_NUMBER
174 #endif // HAVE_LIBARCHIVE
175 #if defined(HAVE_LIBCURL)
176  printf(" Found %s\n", curl_version());
177 #endif
178 }
179 
180 static void PrintHelpForPSM() {
181  const char *msg =
182  "Page segmentation modes:\n"
183  " 0 Orientation and script detection (OSD) only.\n"
184  " 1 Automatic page segmentation with OSD.\n"
185  " 2 Automatic page segmentation, but no OSD, or OCR. (not "
186  "implemented)\n"
187  " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
188  " 4 Assume a single column of text of variable sizes.\n"
189  " 5 Assume a single uniform block of vertically aligned text.\n"
190  " 6 Assume a single uniform block of text.\n"
191  " 7 Treat the image as a single text line.\n"
192  " 8 Treat the image as a single word.\n"
193  " 9 Treat the image as a single word in a circle.\n"
194  " 10 Treat the image as a single character.\n"
195  " 11 Sparse text. Find as much text as possible in no"
196  " particular order.\n"
197  " 12 Sparse text with OSD.\n"
198  " 13 Raw line. Treat the image as a single text line,\n"
199  " bypassing hacks that are Tesseract-specific.\n";
200 
201 #ifdef DISABLED_LEGACY_ENGINE
202  const char *disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n";
203  printf("%s%s", msg, disabled_osd_msg);
204 #else
205  printf("%s", msg);
206 #endif
207 }
208 
209 #ifndef DISABLED_LEGACY_ENGINE
210 static void PrintHelpForOEM() {
211  const char *msg =
212  "OCR Engine modes:\n"
213  " 0 Legacy engine only.\n"
214  " 1 Neural nets LSTM engine only.\n"
215  " 2 Legacy + LSTM engines.\n"
216  " 3 Default, based on what is available.\n";
217 
218  printf("%s", msg);
219 }
220 #endif // ndef DISABLED_LEGACY_ENGINE
221 
222 static void PrintHelpExtra(const char *program) {
223  printf(
224  "Usage:\n"
225  " %s --help | --help-extra | --help-psm | "
226 #ifndef DISABLED_LEGACY_ENGINE
227  "--help-oem | "
228 #endif
229  "--version\n"
230  " %s --list-langs [--tessdata-dir PATH]\n"
231 #ifndef DISABLED_LEGACY_ENGINE
232  " %s --print-fonts-table [options...] [configfile...]\n"
233 #endif // ndef DISABLED_LEGACY_ENGINE
234  " %s --print-parameters [options...] [configfile...]\n"
235  " %s imagename|imagelist|stdin outputbase|stdout [options...] "
236  "[configfile...]\n"
237  "\n"
238  "OCR options:\n"
239  " --tessdata-dir PATH Specify the location of tessdata path.\n"
240  " --user-words PATH Specify the location of user words file.\n"
241  " --user-patterns PATH Specify the location of user patterns file.\n"
242  " --dpi VALUE Specify DPI for input image.\n"
243  " --loglevel LEVEL Specify logging level. LEVEL can be\n"
244  " ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL or OFF.\n"
245  " -l LANG[+LANG] Specify language(s) used for OCR.\n"
246  " -c VAR=VALUE Set value for config variables.\n"
247  " Multiple -c arguments are allowed.\n"
248  " --psm NUM Specify page segmentation mode.\n"
249 #ifndef DISABLED_LEGACY_ENGINE
250  " --oem NUM Specify OCR Engine mode.\n"
251 #endif
252  "NOTE: These options must occur before any configfile.\n"
253  "\n",
254  program, program, program, program
255 #ifndef DISABLED_LEGACY_ENGINE
256  , program
257 #endif // ndef DISABLED_LEGACY_ENGINE
258  );
259 
260  PrintHelpForPSM();
261 #ifndef DISABLED_LEGACY_ENGINE
262  printf("\n");
263  PrintHelpForOEM();
264 #endif
265 
266  printf(
267  "\n"
268  "Single options:\n"
269  " -h, --help Show minimal help message.\n"
270  " --help-extra Show extra help for advanced users.\n"
271  " --help-psm Show page segmentation modes.\n"
272 #ifndef DISABLED_LEGACY_ENGINE
273  " --help-oem Show OCR Engine modes.\n"
274 #endif
275  " -v, --version Show version information.\n"
276  " --list-langs List available languages for tesseract engine.\n"
277 #ifndef DISABLED_LEGACY_ENGINE
278  " --print-fonts-table Print tesseract fonts table.\n"
279 #endif // ndef DISABLED_LEGACY_ENGINE
280  " --print-parameters Print tesseract parameters.\n");
281 }
282 
283 static void PrintHelpMessage(const char *program) {
284  printf(
285  "Usage:\n"
286  " %s --help | --help-extra | --version\n"
287  " %s --list-langs\n"
288  " %s imagename outputbase [options...] [configfile...]\n"
289  "\n"
290  "OCR options:\n"
291  " -l LANG[+LANG] Specify language(s) used for OCR.\n"
292  "NOTE: These options must occur before any configfile.\n"
293  "\n"
294  "Single options:\n"
295  " --help Show this help message.\n"
296  " --help-extra Show extra help for advanced users.\n"
297  " --version Show version information.\n"
298  " --list-langs List available languages for tesseract "
299  "engine.\n",
300  program, program, program);
301 }
302 
303 static bool SetVariablesFromCLArgs(tesseract::TessBaseAPI &api, int argc, char **argv) {
304  bool success = true;
305  char opt1[256], opt2[255];
306  for (int i = 0; i < argc; i++) {
307  if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
308  strncpy(opt1, argv[i + 1], 255);
309  opt1[255] = '\0';
310  char *p = strchr(opt1, '=');
311  if (!p) {
312  fprintf(stderr, "Missing = in configvar assignment\n");
313  success = false;
314  break;
315  }
316  *p = 0;
317  strncpy(opt2, strchr(argv[i + 1], '=') + 1, sizeof(opt2) - 1);
318  opt2[254] = 0;
319  ++i;
320 
321  if (!api.SetVariable(opt1, opt2)) {
322  fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
323  }
324  }
325  }
326  return success;
327 }
328 
329 static void PrintLangsList(tesseract::TessBaseAPI &api) {
330  std::vector<std::string> languages;
331  api.GetAvailableLanguagesAsVector(&languages);
332  printf("List of available languages in \"%s\" (%zu):\n",
333  api.GetDatapath(), languages.size());
334  for (const auto &language : languages) {
335  printf("%s\n", language.c_str());
336  }
337  api.End();
338 }
339 
354 static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
356  api.SetPageSegMode(pagesegmode);
357  }
358 }
359 
360 static bool checkArgValues(int arg, const char *mode, int count) {
361  if (arg >= count || arg < 0) {
362  printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
363  return false;
364  }
365  return true;
366 }
367 
368 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
369 static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
370  const char **outputbase, const char **datapath, l_int32 *dpi,
371  bool *list_langs, bool *print_parameters, bool* print_fonts_table, std::vector<std::string> *vars_vec,
372  std::vector<std::string> *vars_values, l_int32 *arg_i,
373  tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode) {
374  bool noocr = false;
375  int i;
376  for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
377  if (*image != nullptr && *outputbase == nullptr) {
378  // outputbase follows image, don't allow options at that position.
379  *outputbase = argv[i];
380  } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
381  PrintHelpMessage(argv[0]);
382  noocr = true;
383  } else if (strcmp(argv[i], "--help-extra") == 0) {
384  PrintHelpExtra(argv[0]);
385  noocr = true;
386  } else if ((strcmp(argv[i], "--help-psm") == 0)) {
387  PrintHelpForPSM();
388  noocr = true;
389 #ifndef DISABLED_LEGACY_ENGINE
390  } else if ((strcmp(argv[i], "--help-oem") == 0)) {
391  PrintHelpForOEM();
392  noocr = true;
393 #endif
394  } else if ((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
395  PrintVersionInfo();
396  noocr = true;
397  } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
398  *lang = argv[i + 1];
399  ++i;
400  } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
401  *datapath = argv[i + 1];
402  ++i;
403  } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
404  *dpi = atoi(argv[i + 1]);
405  ++i;
406  } else if (strcmp(argv[i], "--loglevel") == 0 && i + 1 < argc) {
407  // Allow the log levels which are used by log4cxx.
408  const std::string loglevel_string = argv[++i];
409  static const std::map<const std::string, int> loglevels {
410  {"ALL", INT_MIN},
411  {"TRACE", 5000},
412  {"DEBUG", 10000},
413  {"INFO", 20000},
414  {"WARN", 30000},
415  {"ERROR", 40000},
416  {"FATAL", 50000},
417  {"OFF", INT_MAX},
418  };
419  try {
420  auto loglevel = loglevels.at(loglevel_string);
421  log_level = loglevel;
422  } catch(const std::out_of_range& e) {
423  // TODO: Allow numeric argument?
424  tprintf("Error, unsupported --loglevel %s\n", loglevel_string.c_str());
425  return false;
426  }
427  } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
428  vars_vec->push_back("user_words_file");
429  vars_values->push_back(argv[i + 1]);
430  ++i;
431  } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
432  vars_vec->push_back("user_patterns_file");
433  vars_values->push_back(argv[i + 1]);
434  ++i;
435  } else if (strcmp(argv[i], "--list-langs") == 0) {
436  noocr = true;
437  *list_langs = true;
438  } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
439  if (!checkArgValues(atoi(argv[i + 1]), "PSM", tesseract::PSM_COUNT)) {
440  return false;
441  }
442  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
443  ++i;
444  } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
445 #ifndef DISABLED_LEGACY_ENGINE
446  int oem = atoi(argv[i + 1]);
447  if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) {
448  return false;
449  }
450  *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
451 #endif
452  ++i;
453  } else if (strcmp(argv[i], "--print-parameters") == 0) {
454  noocr = true;
455  *print_parameters = true;
456 #ifndef DISABLED_LEGACY_ENGINE
457  } else if (strcmp(argv[i], "--print-fonts-table") == 0) {
458  noocr = true;
459  *print_fonts_table = true;
460 #endif // ndef DISABLED_LEGACY_ENGINE
461  } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
462  // handled properly after api init
463  ++i;
464  } else if (*image == nullptr) {
465  *image = argv[i];
466  } else {
467  // Unexpected argument.
468  fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
469  return false;
470  }
471  }
472 
473  *arg_i = i;
474 
475  if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
476  // OSD = orientation and script detection.
477  if (*lang != nullptr && strcmp(*lang, "osd")) {
478  // If the user explicitly specifies a language (other than osd)
479  // or a script, only orientation can be detected.
480  fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
481  } else {
482  // That mode requires osd.traineddata to detect orientation and script.
483  *lang = "osd";
484  }
485  }
486 
487  if (*outputbase == nullptr && noocr == false) {
488  PrintHelpMessage(argv[0]);
489  return false;
490  }
491 
492  return true;
493 }
494 
495 static void PreloadRenderers(tesseract::TessBaseAPI &api,
496  std::vector<std::unique_ptr<TessResultRenderer>> &renderers,
497  tesseract::PageSegMode pagesegmode, const char *outputbase) {
498  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
499 #ifndef DISABLED_LEGACY_ENGINE
500  renderers.push_back(std::make_unique<tesseract::TessOsdRenderer>(outputbase));
501 #endif // ndef DISABLED_LEGACY_ENGINE
502  } else {
503  bool error = false;
504  bool b;
505  api.GetBoolVariable("tessedit_create_hocr", &b);
506  if (b) {
507  bool font_info;
508  api.GetBoolVariable("hocr_font_info", &font_info);
509  auto renderer = std::make_unique<tesseract::TessHOcrRenderer>(outputbase, font_info);
510  if (renderer->happy()) {
511  renderers.push_back(std::move(renderer));
512  } else {
513  tprintf("Error, could not create hOCR output file: %s\n", strerror(errno));
514  error = true;
515  }
516  }
517 
518  api.GetBoolVariable("tessedit_create_alto", &b);
519  if (b) {
520  auto renderer = std::make_unique<tesseract::TessAltoRenderer>(outputbase);
521  if (renderer->happy()) {
522  renderers.push_back(std::move(renderer));
523  } else {
524  tprintf("Error, could not create ALTO output file: %s\n", strerror(errno));
525  error = true;
526  }
527  }
528 
529  api.GetBoolVariable("tessedit_create_tsv", &b);
530  if (b) {
531  bool font_info;
532  api.GetBoolVariable("hocr_font_info", &font_info);
533  auto renderer = std::make_unique<tesseract::TessTsvRenderer>(outputbase, font_info);
534  if (renderer->happy()) {
535  renderers.push_back(std::move(renderer));
536  } else {
537  tprintf("Error, could not create TSV output file: %s\n", strerror(errno));
538  error = true;
539  }
540  }
541 
542  api.GetBoolVariable("tessedit_create_pdf", &b);
543  if (b) {
544 #ifdef WIN32
545  if (_setmode(_fileno(stdout), _O_BINARY) == -1)
546  tprintf("ERROR: cin to binary: %s", strerror(errno));
547 #endif // WIN32
548  bool textonly;
549  api.GetBoolVariable("textonly_pdf", &textonly);
550  auto renderer = std::make_unique<tesseract::TessPDFRenderer>(outputbase, api.GetDatapath(), textonly);
551  if (renderer->happy()) {
552  renderers.push_back(std::move(renderer));
553  } else {
554  tprintf("Error, could not create PDF output file: %s\n", strerror(errno));
555  error = true;
556  }
557  }
558 
559  api.GetBoolVariable("tessedit_write_unlv", &b);
560  if (b) {
561  api.SetVariable("unlv_tilde_crunching", "true");
562  auto renderer = std::make_unique<tesseract::TessUnlvRenderer>(outputbase);
563  if (renderer->happy()) {
564  renderers.push_back(std::move(renderer));
565  } else {
566  tprintf("Error, could not create UNLV output file: %s\n", strerror(errno));
567  error = true;
568  }
569  }
570 
571  api.GetBoolVariable("tessedit_create_lstmbox", &b);
572  if (b) {
573  auto renderer = std::make_unique<tesseract::TessLSTMBoxRenderer>(outputbase);
574  if (renderer->happy()) {
575  renderers.push_back(std::move(renderer));
576  } else {
577  tprintf("Error, could not create LSTM BOX output file: %s\n", strerror(errno));
578  error = true;
579  }
580  }
581 
582  api.GetBoolVariable("tessedit_create_boxfile", &b);
583  if (b) {
584  auto renderer = std::make_unique<tesseract::TessBoxTextRenderer>(outputbase);
585  if (renderer->happy()) {
586  renderers.push_back(std::move(renderer));
587  } else {
588  tprintf("Error, could not create BOX output file: %s\n", strerror(errno));
589  error = true;
590  }
591  }
592 
593  api.GetBoolVariable("tessedit_create_wordstrbox", &b);
594  if (b) {
595  auto renderer = std::make_unique<tesseract::TessWordStrBoxRenderer>(outputbase);
596  if (renderer->happy()) {
597  renderers.push_back(std::move(renderer));
598  } else {
599  tprintf("Error, could not create WordStr BOX output file: %s\n", strerror(errno));
600  error = true;
601  }
602  }
603 
604  api.GetBoolVariable("tessedit_create_txt", &b);
605  if (b || (!error && renderers.empty())) {
606  // Create text output if no other output was requested
607  // even if text output was not explicitly requested unless
608  // there was an error.
609  auto renderer = std::make_unique<tesseract::TessTextRenderer>(outputbase);
610  if (renderer->happy()) {
611  renderers.push_back(std::move(renderer));
612  } else {
613  tprintf("Error, could not create TXT output file: %s\n", strerror(errno));
614  }
615  }
616  }
617 
618  // Null-out the renderers that are
619  // added to the root, and leave the root in the vector.
620  for (size_t r = 1; r < renderers.size(); ++r) {
621  renderers[0]->insert(renderers[r].get());
622  renderers[r].release(); // at the moment insert() is owning
623  }
624 }
625 
626 /**********************************************************************
627  * main()
628  *
629  **********************************************************************/
630 
631 int main(int argc, char **argv) {
632 #if defined(__USE_GNU)
633  // Raise SIGFPE.
634 # if defined(__clang__)
635  // clang creates code which causes some FP exceptions, so don't enable those.
636  feenableexcept(FE_DIVBYZERO);
637 # else
638  feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
639 # endif
640 #endif
641  const char *lang = nullptr;
642  const char *image = nullptr;
643  const char *outputbase = nullptr;
644  const char *datapath = nullptr;
645  bool list_langs = false;
646  bool print_parameters = false;
647  bool print_fonts_table = false;
648  l_int32 dpi = 0;
649  int arg_i = 1;
651 #ifdef DISABLED_LEGACY_ENGINE
652  auto enginemode = tesseract::OEM_LSTM_ONLY;
653 #else
655 #endif
656  std::vector<std::string> vars_vec;
657  std::vector<std::string> vars_values;
658 
659  if (std::getenv("LEPT_MSG_SEVERITY")) {
660  // Get Leptonica message level from environment variable.
661  setMsgSeverity(L_SEVERITY_EXTERNAL);
662  } else {
663  // Disable debugging and informational messages from Leptonica.
664  setMsgSeverity(L_SEVERITY_ERROR);
665  }
666 
667 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
668  /* Show libtiff errors and warnings on console (not in GUI). */
669  TIFFSetErrorHandler(Win32ErrorHandler);
670  TIFFSetWarningHandler(Win32WarningHandler);
671 #endif // HAVE_TIFFIO_H && _WIN32
672 
673  if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
674  &print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i, &pagesegmode, &enginemode)) {
675  return EXIT_FAILURE;
676  }
677 
678  bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
679 
680  if (lang == nullptr && in_recognition_mode) {
681  // Set default language model if none was given and a model file is needed.
682  lang = "eng";
683  }
684 
685  if (image == nullptr && in_recognition_mode) {
686  return EXIT_SUCCESS;
687  }
688 
689  // Call GlobalDawgCache here to create the global DawgCache object before
690  // the TessBaseAPI object. This fixes the order of destructor calls:
691  // first TessBaseAPI must be destructed, DawgCache must be the last object.
693 
694  TessBaseAPI api;
695 
696  api.SetOutputName(outputbase);
697 
698  const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
699  &vars_vec, &vars_values, false);
700 
701  if (!SetVariablesFromCLArgs(api, argc, argv)) {
702  return EXIT_FAILURE;
703  }
704 
705  // SIMD settings might be overridden by config variable.
707 
708  if (list_langs) {
709  PrintLangsList(api);
710  return EXIT_SUCCESS;
711  }
712 
713  if (init_failed) {
714  fprintf(stderr, "Could not initialize tesseract.\n");
715  return EXIT_FAILURE;
716  }
717 
718  if (print_parameters) {
719  FILE *fout = stdout;
720  fprintf(stdout, "Tesseract parameters:\n");
721  api.PrintVariables(fout);
722  api.End();
723  return EXIT_SUCCESS;
724  }
725 
726 #ifndef DISABLED_LEGACY_ENGINE
727  if (print_fonts_table) {
728  FILE* fout = stdout;
729  fprintf(stdout, "Tesseract fonts table:\n");
730  api.PrintFontsTable(fout);
731  api.End();
732  return EXIT_SUCCESS;
733  }
734 #endif // ndef DISABLED_LEGACY_ENGINE
735 
736  FixPageSegMode(api, pagesegmode);
737 
738  if (dpi) {
739  auto dpi_string = std::to_string(dpi);
740  api.SetVariable("user_defined_dpi", dpi_string.c_str());
741  }
742 
743  int ret_val = EXIT_SUCCESS;
744 
745  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
746  Pix *pixs = pixRead(image);
747  if (!pixs) {
748  fprintf(stderr, "Leptonica can't process input file: %s\n", image);
749  return 2;
750  }
751 
752  api.SetImage(pixs);
753 
754  tesseract::Orientation orientation;
755  tesseract::WritingDirection direction;
757  float deskew_angle;
758 
759  const std::unique_ptr<const tesseract::PageIterator> it(api.AnalyseLayout());
760  if (it) {
761  // TODO: Implement output of page segmentation, see documentation
762  // ("Automatic page segmentation, but no OSD, or OCR").
763  it->Orientation(&orientation, &direction, &order, &deskew_angle);
764  tprintf(
765  "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
766  "Deskew angle: %.4f\n",
767  orientation, direction, order, deskew_angle);
768  } else {
769  ret_val = EXIT_FAILURE;
770  }
771 
772  pixDestroy(&pixs);
773  return ret_val;
774  }
775 
776  // Set in_training_mode to true when using one of these configs:
777  // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
778  // In this mode no other OCR result files are written.
779  bool b = false;
780  bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
781  (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
782  (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
783  (api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
784 
785 #ifdef DISABLED_LEGACY_ENGINE
786  auto cur_psm = api.GetPageSegMode();
787  auto osd_warning = std::string("");
788  if (cur_psm == tesseract::PSM_OSD_ONLY) {
789  const char *disabled_osd_msg =
790  "\nERROR: The page segmentation mode 0 (OSD Only) is currently "
791  "disabled.\n\n";
792  fprintf(stderr, "%s", disabled_osd_msg);
793  return EXIT_FAILURE;
794  } else if (cur_psm == tesseract::PSM_AUTO_OSD) {
796  osd_warning +=
797  "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
798  "disabled. "
799  "Using PSM 3 (Auto) instead.\n\n";
800  } else if (cur_psm == tesseract::PSM_SPARSE_TEXT_OSD) {
802  osd_warning +=
803  "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
804  "currently disabled. "
805  "Using PSM 11 (Sparse text) instead.\n\n";
806  }
807 #endif // def DISABLED_LEGACY_ENGINE
808 
809  std::vector<std::unique_ptr<TessResultRenderer>> renderers;
810 
811  if (in_training_mode) {
812  renderers.push_back(nullptr);
813  } else if (outputbase != nullptr) {
814  PreloadRenderers(api, renderers, pagesegmode, outputbase);
815  }
816 
817  if (!renderers.empty()) {
818 #ifdef DISABLED_LEGACY_ENGINE
819  if (!osd_warning.empty()) {
820  fprintf(stderr, "%s", osd_warning.c_str());
821  }
822 #endif
823  bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0].get());
824  if (!succeed) {
825  fprintf(stderr, "Error during processing.\n");
826  ret_val = EXIT_FAILURE;
827  }
828  }
829 
830  return ret_val;
831 }
int main(int argc, char **argv)
Definition: tesseract.cpp:631
@ PSM_OSD_ONLY
Orientation and script detection only.
Definition: publictypes.h:160
@ PSM_COUNT
Number of enum entries.
Definition: publictypes.h:179
@ PSM_SPARSE_TEXT
Find as much text as possible in no particular order.
Definition: publictypes.h:173
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:163
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:164
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:168
@ PSM_SPARSE_TEXT_OSD
Sparse text with orientation and script det.
Definition: publictypes.h:175
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int log_level
Definition: tprintf.cpp:36
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:508
const char * GetDatapath()
Definition: baseapi.cpp:932
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:276
static const char * Version()
Definition: baseapi.cpp:238
void GetAvailableLanguagesAsVector(std::vector< std::string > *langs) const
Definition: baseapi.cpp:468
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1068
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:516
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:365
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:353
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:573
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809
void PrintFontsTable(FILE *fp) const
Definition: baseapi.cpp:335
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:301
void SetOutputName(const char *name)
Definition: baseapi.cpp:272
static bool IsNEONAvailable()
Definition: simddetect.h:59
static bool IsAVX512BWAvailable()
Definition: simddetect.h:47
static bool IsFMAAvailable()
Definition: simddetect.h:51
static bool IsAVXAvailable()
Definition: simddetect.h:35
static bool IsAVX512FAvailable()
Definition: simddetect.h:43
static bool IsSSEAvailable()
Definition: simddetect.h:55
static bool IsAVX2Available()
Definition: simddetect.h:39
static TESS_API void Update()
Definition: simddetect.cpp:264
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:172