tesseract  5.0.0
baseapi.h
Go to the documentation of this file.
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_API_BASEAPI_H_
20 #define TESSERACT_API_BASEAPI_H_
21 
22 #ifdef HAVE_CONFIG_H
23 # include "config_auto.h" // DISABLED_LEGACY_ENGINE
24 #endif
25 
26 #include "export.h"
27 #include "pageiterator.h"
28 #include "publictypes.h"
29 #include "resultiterator.h"
30 #include "unichar.h"
31 
32 #include <tesseract/version.h>
33 
34 #include <cstdio>
35 #include <vector> // for std::vector
36 
37 struct Pix;
38 struct Pixa;
39 struct Boxa;
40 
41 namespace tesseract {
42 
43 class PAGE_RES;
44 class ParagraphModel;
45 class BLOCK_LIST;
46 class ETEXT_DESC;
47 struct OSResults;
48 class UNICHARSET;
49 
50 class Dawg;
51 class Dict;
52 class EquationDetect;
53 class PageIterator;
54 class ImageThresholder;
55 class LTRResultIterator;
56 class ResultIterator;
57 class MutableIterator;
58 class TessResultRenderer;
59 class Tesseract;
60 
61 // Function to read a std::vector<char> from a whole file.
62 // Returns false on failure.
63 using FileReader = bool (*)(const char *filename, std::vector<char> *data);
64 
65 using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
66  bool) const;
67 using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
68  int, const char *, int);
69 
79 public:
80  TessBaseAPI();
81  virtual ~TessBaseAPI();
82  // Copy constructor and assignment operator are currently unsupported.
83  TessBaseAPI(TessBaseAPI const &) = delete;
84  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
85 
89  static const char *Version();
90 
98  static size_t getOpenCLDevice(void **device);
99 
104  void SetInputName(const char *name);
112  const char *GetInputName();
113  // Takes ownership of the input pix.
114  void SetInputImage(Pix *pix);
115  Pix *GetInputImage();
116  int GetSourceYResolution();
117  const char *GetDatapath();
118 
120  void SetOutputName(const char *name);
121 
135  bool SetVariable(const char *name, const char *value);
136  bool SetDebugVariable(const char *name, const char *value);
137 
142  bool GetIntVariable(const char *name, int *value) const;
143  bool GetBoolVariable(const char *name, bool *value) const;
144  bool GetDoubleVariable(const char *name, double *value) const;
145 
150  const char *GetStringVariable(const char *name) const;
151 
152 #ifndef DISABLED_LEGACY_ENGINE
153 
157  void PrintFontsTable(FILE *fp) const;
158 
159 #endif
160 
164  void PrintVariables(FILE *fp) const;
165 
169  bool GetVariableAsString(const char *name, std::string *val) const;
170 
208  int Init(const char *datapath, const char *language, OcrEngineMode mode,
209  char **configs, int configs_size,
210  const std::vector<std::string> *vars_vec,
211  const std::vector<std::string> *vars_values,
212  bool set_only_non_debug_params);
213  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
214  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
215  }
216  int Init(const char *datapath, const char *language) {
217  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
218  false);
219  }
220  // In-memory version reads the traineddata file directly from the given
221  // data[data_size] array, and/or reads data via a FileReader.
222  int Init(const char *data, int data_size, const char *language,
223  OcrEngineMode mode, char **configs, int configs_size,
224  const std::vector<std::string> *vars_vec,
225  const std::vector<std::string> *vars_values,
226  bool set_only_non_debug_params, FileReader reader);
227 
236  const char *GetInitLanguagesAsString() const;
237 
243  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
244 
248  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
249 
254  void InitForAnalysePage();
255 
262  void ReadConfigFile(const char *filename);
264  void ReadDebugConfigFile(const char *filename);
265 
271  void SetPageSegMode(PageSegMode mode);
272 
274  PageSegMode GetPageSegMode() const;
275 
293  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
294  int bytes_per_line, int left, int top, int width,
295  int height);
296 
301  void ClearAdaptiveClassifier();
302 
309  /* @{ */
310 
318  void SetImage(const unsigned char *imagedata, int width, int height,
319  int bytes_per_pixel, int bytes_per_line);
320 
329  void SetImage(Pix *pix);
330 
335  void SetSourceResolution(int ppi);
336 
342  void SetRectangle(int left, int top, int width, int height);
343 
349  Pix *GetThresholdedImage();
350 
356  Boxa *GetRegions(Pixa **pixa);
357 
369  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
370  int **blockids, int **paraids);
371  /*
372  Helper method to extract from the thresholded image. (most common usage)
373 */
374  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
375  return GetTextlines(false, 0, pixa, blockids, nullptr);
376  }
377 
386  Boxa *GetStrips(Pixa **pixa, int **blockids);
387 
393  Boxa *GetWords(Pixa **pixa);
394 
403  Boxa *GetConnectedComponents(Pixa **cc);
404 
417  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
418  bool raw_image, int raw_padding, Pixa **pixa,
419  int **blockids, int **paraids);
420  // Helper function to get binary images with no padding (most common usage).
421  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
422  Pixa **pixa, int **blockids) {
423  return GetComponentImages(level, text_only, false, 0, pixa, blockids,
424  nullptr);
425  }
426 
433  int GetThresholdedImageScaleFactor() const;
434 
450  PageIterator *AnalyseLayout();
451  PageIterator *AnalyseLayout(bool merge_similar_words);
452 
459  int Recognize(ETEXT_DESC *monitor);
460 
488  bool ProcessPages(const char *filename, const char *retry_config,
489  int timeout_millisec, TessResultRenderer *renderer);
490  // Does the real work of ProcessPages.
491  bool ProcessPagesInternal(const char *filename, const char *retry_config,
492  int timeout_millisec, TessResultRenderer *renderer);
493 
503  bool ProcessPage(Pix *pix, int page_index, const char *filename,
504  const char *retry_config, int timeout_millisec,
505  TessResultRenderer *renderer);
506 
515  ResultIterator *GetIterator();
516 
525  MutableIterator *GetMutableIterator();
526 
531  char *GetUTF8Text();
532 
542  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
543 
550  char *GetHOCRText(int page_number);
551 
556  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
557 
562  char *GetAltoText(int page_number);
563 
569  char *GetTSVText(int page_number);
570 
577  char *GetLSTMBoxText(int page_number);
578 
586  char *GetBoxText(int page_number);
587 
594  char *GetWordStrBoxText(int page_number);
595 
601  char *GetUNLVText();
602 
612  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
613  const char **script_name, float *script_conf);
614 
620  char *GetOsdText(int page_number);
621 
623  int MeanTextConf();
630  int *AllWordConfidences();
631 
632 #ifndef DISABLED_LEGACY_ENGINE
643  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
644 #endif // ndef DISABLED_LEGACY_ENGINE
645 
652  void Clear();
653 
660  void End();
661 
669  static void ClearPersistentCache();
670 
677  int IsValidWord(const char *word) const;
678  // Returns true if utf8_character is defined in the UniCharset.
679  bool IsValidCharacter(const char *utf8_character) const;
680 
681  bool GetTextDirection(int *out_offset, float *out_slope);
682 
684  void SetDictFunc(DictFunc f);
685 
689  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
690 
695  bool DetectOS(OSResults *);
696 
701  void GetBlockTextOrientations(int **block_orientation,
702  bool **vertical_writing);
703 
705  const char *GetUnichar(int unichar_id) const;
706 
708  const Dawg *GetDawg(int i) const;
709 
711  int NumDawgs() const;
712 
713  Tesseract *tesseract() const {
714  return tesseract_;
715  }
716 
717  OcrEngineMode oem() const {
718  return last_oem_requested_;
719  }
720 
721  void set_min_orientation_margin(double margin);
722  /* @} */
723 
724 protected:
727  bool InternalSetImage();
728 
733  virtual bool Threshold(Pix **pix);
734 
739  int FindLines();
740 
742  void ClearResults();
743 
749  LTRResultIterator *GetLTRIterator();
750 
757  int TextLength(int *blob_count) const;
758 
760  void DetectParagraphs(bool after_text_recognition);
761 
762  const PAGE_RES *GetPageRes() const {
763  return page_res_;
764  }
765 
766 protected:
772  std::vector<ParagraphModel *> *paragraph_models_;
773  BLOCK_LIST *block_list_;
775  std::string input_file_;
776  std::string output_file_;
777  std::string datapath_;
778  std::string language_;
781 
786  /* @{ */
793  /* @} */
794 
795 private:
796  // A list of image filenames gets special consideration
797  bool ProcessPagesFileList(FILE *fp, std::string *buf,
798  const char *retry_config, int timeout_millisec,
799  TessResultRenderer *renderer,
800  int tessedit_page_number);
801  // TIFF supports multipage so gets special consideration.
802  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
803  const char *filename, const char *retry_config,
804  int timeout_millisec,
805  TessResultRenderer *renderer,
806  int tessedit_page_number);
807 }; // class TessBaseAPI.
808 
810 std::string HOcrEscape(const char *text);
811 
812 } // namespace tesseract
813 
814 #endif // TESSERACT_API_BASEAPI_H_
struct TessBaseAPI TessBaseAPI
Definition: capi.h:62
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
Definition: baseapi.h:66
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
Definition: baseapi.h:68
std::string HOcrEscape(const char *text)
Definition: baseapi.cpp:2338
int UNICHAR_ID
Definition: unichar.h:36
bool(*)(const char *filename, std::vector< char > *data) FileReader
Definition: baseapi.h:63
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:769
std::string input_file_
Name used by training code.
Definition: baseapi.h:775
const PAGE_RES * GetPageRes() const
Definition: baseapi.h:762
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:774
TessBaseAPI(TessBaseAPI const &)=delete
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:767
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:374
std::string language_
Last initialized language.
Definition: baseapi.h:778
std::string datapath_
Current location of tessdata.
Definition: baseapi.h:777
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:213
std::vector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:772
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:780
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:770
int Init(const char *datapath, const char *language)
Definition: baseapi.h:216
OcrEngineMode oem() const
Definition: baseapi.h:717
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:771
std::string output_file_
Name used by debug code.
Definition: baseapi.h:776
Tesseract * tesseract() const
Definition: baseapi.h:713
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:773
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:421
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:768
TessBaseAPI & operator=(TessBaseAPI const &)=delete
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:779
#define TESS_API
Definition: export.h:34