tesseract  5.0.0
tesseract::TessBaseAPI Class Reference

#include <baseapi.h>

Inheritance diagram for tesseract::TessBaseAPI:
tesseract::FriendlyTessBaseAPI

Public Member Functions

 TessBaseAPI ()
 
virtual ~TessBaseAPI ()
 
 TessBaseAPI (TessBaseAPI const &)=delete
 
TessBaseAPIoperator= (TessBaseAPI const &)=delete
 
void SetInputName (const char *name)
 
const char * GetInputName ()
 
void SetInputImage (Pix *pix)
 
Pix * GetInputImage ()
 
int GetSourceYResolution ()
 
const char * GetDatapath ()
 
void SetOutputName (const char *name)
 
bool SetVariable (const char *name, const char *value)
 
bool SetDebugVariable (const char *name, const char *value)
 
bool GetIntVariable (const char *name, int *value) const
 
bool GetBoolVariable (const char *name, bool *value) const
 
bool GetDoubleVariable (const char *name, double *value) const
 
const char * GetStringVariable (const char *name) const
 
void PrintFontsTable (FILE *fp) const
 
void PrintVariables (FILE *fp) const
 
bool GetVariableAsString (const char *name, std::string *val) const
 
int Init (const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
 
int Init (const char *datapath, const char *language, OcrEngineMode oem)
 
int Init (const char *datapath, const char *language)
 
int Init (const char *data, int data_size, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, FileReader reader)
 
const char * GetInitLanguagesAsString () const
 
void GetLoadedLanguagesAsVector (std::vector< std::string > *langs) const
 
void GetAvailableLanguagesAsVector (std::vector< std::string > *langs) const
 
void InitForAnalysePage ()
 
void ReadConfigFile (const char *filename)
 
void ReadDebugConfigFile (const char *filename)
 
void SetPageSegMode (PageSegMode mode)
 
PageSegMode GetPageSegMode () const
 
char * TesseractRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
 
void ClearAdaptiveClassifier ()
 
void SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void SetImage (Pix *pix)
 
void SetSourceResolution (int ppi)
 
void SetRectangle (int left, int top, int width, int height)
 
Pix * GetThresholdedImage ()
 
Boxa * GetRegions (Pixa **pixa)
 
Boxa * GetTextlines (bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * GetStrips (Pixa **pixa, int **blockids)
 
Boxa * GetWords (Pixa **pixa)
 
Boxa * GetConnectedComponents (Pixa **cc)
 
Boxa * GetComponentImages (PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int GetThresholdedImageScaleFactor () const
 
PageIteratorAnalyseLayout ()
 
PageIteratorAnalyseLayout (bool merge_similar_words)
 
int Recognize (ETEXT_DESC *monitor)
 
bool ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratorGetIterator ()
 
MutableIteratorGetMutableIterator ()
 
char * GetUTF8Text ()
 
char * GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * GetHOCRText (int page_number)
 
char * GetAltoText (ETEXT_DESC *monitor, int page_number)
 
char * GetAltoText (int page_number)
 
char * GetTSVText (int page_number)
 
char * GetLSTMBoxText (int page_number)
 
char * GetBoxText (int page_number)
 
char * GetWordStrBoxText (int page_number)
 
char * GetUNLVText ()
 
bool DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * GetOsdText (int page_number)
 
int MeanTextConf ()
 
int * AllWordConfidences ()
 
bool AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void Clear ()
 
void End ()
 
int IsValidWord (const char *word) const
 
bool IsValidCharacter (const char *utf8_character) const
 
bool GetTextDirection (int *out_offset, float *out_slope)
 
void SetDictFunc (DictFunc f)
 
void SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
bool DetectOS (OSResults *)
 
void GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
const char * GetUnichar (int unichar_id) const
 
const DawgGetDawg (int i) const
 
int NumDawgs () const
 
Tesseracttesseract () const
 
OcrEngineMode oem () const
 
void set_min_orientation_margin (double margin)
 

Static Public Member Functions

static const char * Version ()
 
static size_t getOpenCLDevice (void **device)
 
static void ClearPersistentCache ()
 

Protected Member Functions

bool InternalSetImage ()
 
virtual bool Threshold (Pix **pix)
 
int FindLines ()
 
void ClearResults ()
 
LTRResultIteratorGetLTRIterator ()
 
int TextLength (int *blob_count) const
 
void DetectParagraphs (bool after_text_recognition)
 
const PAGE_RESGetPageRes () const
 

Protected Attributes

Tesseracttesseract_
 The underlying data object. More...
 
Tesseractosd_tesseract_
 For orientation & script detection. More...
 
EquationDetectequ_detect_
 The equation detector. More...
 
FileReader reader_
 Reads files from any filesystem. More...
 
ImageThresholderthresholder_
 Image thresholding module. More...
 
std::vector< ParagraphModel * > * paragraph_models_
 
BLOCK_LIST * block_list_
 The page layout. More...
 
PAGE_RESpage_res_
 The page-level data. More...
 
std::string input_file_
 Name used by training code. More...
 
std::string output_file_
 Name used by debug code. More...
 
std::string datapath_
 Current location of tessdata. More...
 
std::string language_
 Last initialized language. More...
 
OcrEngineMode last_oem_requested_
 Last ocr language mode requested. More...
 
bool recognition_done_
 page_res_ contains recognition data. More...
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 
int image_width_
 
int image_height_
 

Detailed Description

Base class for all tesseract APIs. Specific classes can add ability to work on different inputs or produce different outputs. This class is mostly an interface layer on top of the Tesseract instance class to hide the data types so that users of this class don't have to include any other Tesseract headers.

Definition at line 78 of file baseapi.h.

Constructor & Destructor Documentation

◆ TessBaseAPI() [1/2]

tesseract::TessBaseAPI::TessBaseAPI ( )

Definition at line 208 of file baseapi.cpp.

209  : tesseract_(nullptr)
210  , osd_tesseract_(nullptr)
211  , equ_detect_(nullptr)
212  , reader_(nullptr)
213  ,
214  // thresholder_ is initialized to nullptr here, but will be set before use
215  // by: A constructor of a derived API or created
216  // implicitly when used in InternalSetImage.
217  thresholder_(nullptr)
218  , paragraph_models_(nullptr)
219  , block_list_(nullptr)
220  , page_res_(nullptr)
222  , recognition_done_(false)
223  , rect_left_(0)
224  , rect_top_(0)
225  , rect_width_(0)
226  , rect_height_(0)
227  , image_width_(0)
228  , image_height_(0) {
229 }
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:769
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:774
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:767
std::vector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:772
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:780
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:770
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:771
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:773
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:768
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:779

◆ ~TessBaseAPI()

tesseract::TessBaseAPI::~TessBaseAPI ( )
virtual

Definition at line 231 of file baseapi.cpp.

231  {
232  End();
233 }

◆ TessBaseAPI() [2/2]

tesseract::TessBaseAPI::TessBaseAPI ( TessBaseAPI const &  )
delete

Member Function Documentation

◆ AdaptToWordStr()

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1795 of file baseapi.cpp.

1795  {
1796  int debug = 0;
1797  GetIntVariable("applybox_debug", &debug);
1798  bool success = true;
1799  PageSegMode current_psm = GetPageSegMode();
1800  SetPageSegMode(mode);
1801  SetVariable("classify_enable_learning", "0");
1802  const std::unique_ptr<const char[]> text(GetUTF8Text());
1803  if (debug) {
1804  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1805  }
1806  if (text != nullptr) {
1807  PAGE_RES_IT it(page_res_);
1808  WERD_RES *word_res = it.word();
1809  if (word_res != nullptr) {
1810  word_res->word->set_text(wordstr);
1811  // Check to see if text matches wordstr.
1812  int w = 0;
1813  int t;
1814  for (t = 0; text[t] != '\0'; ++t) {
1815  if (text[t] == '\n' || text[t] == ' ') {
1816  continue;
1817  }
1818  while (wordstr[w] == ' ') {
1819  ++w;
1820  }
1821  if (text[t] != wordstr[w]) {
1822  break;
1823  }
1824  ++w;
1825  }
1826  if (text[t] != '\0' || wordstr[w] != '\0') {
1827  // No match.
1828  delete page_res_;
1829  std::vector<TBOX> boxes;
1833  PAGE_RES_IT pr_it(page_res_);
1834  if (pr_it.word() == nullptr) {
1835  success = false;
1836  } else {
1837  word_res = pr_it.word();
1838  }
1839  } else {
1840  word_res->BestChoiceToCorrectText();
1841  }
1842  if (success) {
1843  tesseract_->EnableLearning = true;
1844  tesseract_->LearnWord(nullptr, word_res);
1845  }
1846  } else {
1847  success = false;
1848  }
1849  } else {
1850  success = false;
1851  }
1852  SetPageSegMode(current_psm);
1853  return success;
1854 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:508
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:291
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:276
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:516
void TidyUp(PAGE_RES *page_res)
void ReSegmentByClassification(PAGE_RES *page_res)
PAGE_RES * SetupApplyBoxes(const std::vector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:197
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:262

◆ AllWordConfidences()

int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1755 of file baseapi.cpp.

1755  {
1756  if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1757  return nullptr;
1758  }
1759  int n_word = 0;
1760  PAGE_RES_IT res_it(page_res_);
1761  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
1762  n_word++;
1763  }
1764 
1765  int *conf = new int[n_word + 1];
1766  n_word = 0;
1767  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
1768  WERD_RES *word = res_it.word();
1769  WERD_CHOICE *choice = word->best_choice;
1770  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1771  // This is the eq for converting Tesseract confidence to 1..100
1772  if (w_conf < 0) {
1773  w_conf = 0;
1774  }
1775  if (w_conf > 100) {
1776  w_conf = 100;
1777  }
1778  conf[n_word++] = w_conf;
1779  }
1780  conf[n_word] = -1;
1781  return conf;
1782 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:831

◆ AnalyseLayout() [1/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns nullptr on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 809 of file baseapi.cpp.

809  {
810  return AnalyseLayout(false);
811 }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809

◆ AnalyseLayout() [2/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 813 of file baseapi.cpp.

813  {
814  if (FindLines() == 0) {
815  if (block_list_->empty()) {
816  return nullptr; // The page was empty.
817  }
818  page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
819  DetectParagraphs(false);
820  return new PageIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
823  }
824  return nullptr;
825 }
void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2304
int GetScaledYResolution() const
Definition: thresholder.h:102

◆ Clear()

void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 1863 of file baseapi.cpp.

1863  {
1864  if (thresholder_ != nullptr) {
1865  thresholder_->Clear();
1866  }
1867  ClearResults();
1868  if (tesseract_ != nullptr) {
1869  SetInputImage(nullptr);
1870  }
1871 }
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:917
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:53

◆ ClearAdaptiveClassifier()

void tesseract::TessBaseAPI::ClearAdaptiveClassifier ( )

Call between pages or documents etc to free up memory and forget adaptive data.

Definition at line 557 of file baseapi.cpp.

557  {
558  if (tesseract_ == nullptr) {
559  return;
560  }
563 }

◆ ClearPersistentCache()

void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 1916 of file baseapi.cpp.

1916  {
1918 }
void DeleteUnusedDawgs()
Definition: dawg_cache.h:42
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:172

◆ ClearResults()

void tesseract::TessBaseAPI::ClearResults ( )
protected

Delete the pageres and block list ready for a new page.

Delete the pageres and clear the block list ready for a new page.

Definition at line 2165 of file baseapi.cpp.

2165  {
2166  if (tesseract_ != nullptr) {
2167  tesseract_->Clear();
2168  }
2169  delete page_res_;
2170  page_res_ = nullptr;
2171  recognition_done_ = false;
2172  if (block_list_ == nullptr) {
2173  block_list_ = new BLOCK_LIST;
2174  } else {
2175  block_list_->clear();
2176  }
2177  if (paragraph_models_ != nullptr) {
2178  for (auto model : *paragraph_models_) {
2179  delete model;
2180  }
2181  delete paragraph_models_;
2182  paragraph_models_ = nullptr;
2183  }
2184 }

◆ DetectOrientationScript()

bool tesseract::TessBaseAPI::DetectOrientationScript ( int *  orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 1668 of file baseapi.cpp.

1669  {
1670  OSResults osr;
1671 
1672  bool osd = DetectOS(&osr);
1673  if (!osd) {
1674  return false;
1675  }
1676 
1677  int orient_id = osr.best_result.orientation_id;
1678  int script_id = osr.get_best_script(orient_id);
1679  if (orient_conf) {
1680  *orient_conf = osr.best_result.oconfidence;
1681  }
1682  if (orient_deg) {
1683  *orient_deg = orient_id * 90; // convert quadrant to degrees
1684  }
1685 
1686  if (script_name) {
1687  const char *script = osr.unicharset->get_script_from_script_id(script_id);
1688 
1689  *script_name = script;
1690  }
1691 
1692  if (script_conf) {
1693  *script_conf = osr.best_result.sconfidence;
1694  }
1695 
1696  return true;
1697 }
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2226

◆ DetectOS()

bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2226 of file baseapi.cpp.

2226  {
2227  if (tesseract_ == nullptr) {
2228  return false;
2229  }
2230  ClearResults();
2231  if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
2232  return false;
2233  }
2234 
2235  if (input_file_.empty()) {
2236  input_file_ = kInputFile;
2237  }
2238  return orientation_and_script_detection(input_file_.c_str(), osr, tesseract_) > 0;
2239 }
int orientation_and_script_detection(const char *filename, OSResults *, tesseract::Tesseract *)
Definition: osdetect.cpp:188
std::string input_file_
Name used by training code.
Definition: baseapi.h:775
virtual bool Threshold(Pix **pix)
Definition: baseapi.cpp:2013
Image * mutable_pix_binary()
Image pix_binary() const
Pix * pix_
Definition: image.h:27

◆ DetectParagraphs()

void tesseract::TessBaseAPI::DetectParagraphs ( bool  after_text_recognition)
protected

Definition at line 2304 of file baseapi.cpp.

2304  {
2305  int debug_level = 0;
2306  GetIntVariable("paragraph_debug_level", &debug_level);
2307  if (paragraph_models_ == nullptr) {
2308  paragraph_models_ = new std::vector<ParagraphModel *>;
2309  }
2310  MutableIterator *result_it = GetMutableIterator();
2311  do { // Detect paragraphs for this block
2312  std::vector<ParagraphModel *> models;
2313  ::tesseract::DetectParagraphs(debug_level, after_text_recognition, result_it, &models);
2314  paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end());
2315  } while (result_it->Next(RIL_BLOCK));
2316  delete result_it;
2317 }
void DetectParagraphs(int debug_level, std::vector< RowInfo > *row_infos, std::vector< PARA * > *row_owners, PARA_LIST *paragraphs, std::vector< ParagraphModel * > *models)
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1330

◆ End()

void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 1879 of file baseapi.cpp.

1879  {
1880  Clear();
1881  delete thresholder_;
1882  thresholder_ = nullptr;
1883  delete page_res_;
1884  page_res_ = nullptr;
1885  delete block_list_;
1886  block_list_ = nullptr;
1887  if (paragraph_models_ != nullptr) {
1888  for (auto model : *paragraph_models_) {
1889  delete model;
1890  }
1891  delete paragraph_models_;
1892  paragraph_models_ = nullptr;
1893  }
1894 #ifndef DISABLED_LEGACY_ENGINE
1895  if (osd_tesseract_ == tesseract_) {
1896  osd_tesseract_ = nullptr;
1897  }
1898  delete osd_tesseract_;
1899  osd_tesseract_ = nullptr;
1900  delete equ_detect_;
1901  equ_detect_ = nullptr;
1902 #endif // ndef DISABLED_LEGACY_ENGINE
1903  delete tesseract_;
1904  tesseract_ = nullptr;
1905  input_file_.clear();
1906  output_file_.clear();
1907  datapath_.clear();
1908  language_.clear();
1909 }
std::string language_
Last initialized language.
Definition: baseapi.h:778
std::string datapath_
Current location of tessdata.
Definition: baseapi.h:777
std::string output_file_
Name used by debug code.
Definition: baseapi.h:776

◆ FindLines()

int tesseract::TessBaseAPI::FindLines ( )
protected

Find lines from the image making the BLOCK_LIST.

Returns
0 on success.

Find lines from the image making the BLOCK_LIST.

Definition at line 2088 of file baseapi.cpp.

2088  {
2089  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
2090  tprintf("Please call SetImage before attempting recognition.\n");
2091  return -1;
2092  }
2093  if (recognition_done_) {
2094  ClearResults();
2095  }
2096  if (!block_list_->empty()) {
2097  return 0;
2098  }
2099  if (tesseract_ == nullptr) {
2100  tesseract_ = new Tesseract;
2101 #ifndef DISABLED_LEGACY_ENGINE
2103 #endif
2104  }
2105  if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
2106  return -1;
2107  }
2108 
2110 
2111 #ifndef DISABLED_LEGACY_ENGINE
2112  if (tesseract_->textord_equation_detect) {
2113  if (equ_detect_ == nullptr && !datapath_.empty()) {
2114  equ_detect_ = new EquationDetect(datapath_.c_str(), nullptr);
2115  }
2116  if (equ_detect_ == nullptr) {
2117  tprintf("Warning: Could not set equation detector\n");
2118  } else {
2120  }
2121  }
2122 #endif // ndef DISABLED_LEGACY_ENGINE
2123 
2124  Tesseract *osd_tess = osd_tesseract_;
2125  OSResults osr;
2126 #ifndef DISABLED_LEGACY_ENGINE
2127  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == nullptr) {
2128  if (strcmp(language_.c_str(), "osd") == 0) {
2129  osd_tess = tesseract_;
2130  } else {
2131  osd_tesseract_ = new Tesseract;
2132  TessdataManager mgr(reader_);
2133  if (datapath_.empty()) {
2134  tprintf(
2135  "Warning: Auto orientation and script detection requested,"
2136  " but data path is undefined\n");
2137  delete osd_tesseract_;
2138  osd_tesseract_ = nullptr;
2139  } else if (osd_tesseract_->init_tesseract(datapath_.c_str(), "", "osd", OEM_TESSERACT_ONLY,
2140  nullptr, 0, nullptr, nullptr, false, &mgr) == 0) {
2141  osd_tess = osd_tesseract_;
2143  } else {
2144  tprintf(
2145  "Warning: Auto orientation and script detection requested,"
2146  " but osd language failed to load\n");
2147  delete osd_tesseract_;
2148  osd_tesseract_ = nullptr;
2149  }
2150  }
2151  }
2152 #endif // ndef DISABLED_LEGACY_ENGINE
2153 
2154  if (tesseract_->SegmentPage(input_file_.c_str(), block_list_, osd_tess, &osr) < 0) {
2155  return -1;
2156  }
2157 
2158  // If Devanagari is being recognized, we use different images for page seg
2159  // and for OCR.
2160  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2161  return 0;
2162 }
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:266
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:188
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const std::string &arg0, const std::string &textbase, const std::string &language, OcrEngineMode oem, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params, TessdataManager *mgr)
Definition: tessedit.cpp:291
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
void set_source_resolution(int ppi)
int GetSourceYResolution() const
Definition: thresholder.h:99
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:58
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527

◆ GetAltoText() [1/2]

char * tesseract::TessBaseAPI::GetAltoText ( ETEXT_DESC monitor,
int  page_number 
)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 135 of file altorenderer.cpp.

135  {
136  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
137  return nullptr;
138  }
139 
140  int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
141 
142  if (input_file_.empty()) {
143  SetInputName(nullptr);
144  }
145 
146 #ifdef _WIN32
147  // convert input name from ANSI encoding to utf-8
148  int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
149  wchar_t *uni16_str = new WCHAR[str16_len];
150  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
151  int utf8_len =
152  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
153  char *utf8_str = new char[utf8_len];
154  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
155  input_file_ = utf8_str;
156  delete[] uni16_str;
157  delete[] utf8_str;
158 #endif
159 
160  std::stringstream alto_str;
161  // Use "C" locale (needed for int values larger than 999).
162  alto_str.imbue(std::locale::classic());
163  alto_str << "\t\t<Page WIDTH=\"" << rect_width_ << "\" HEIGHT=\"" << rect_height_
164  << "\" PHYSICAL_IMG_NR=\"" << page_number << "\""
165  << " ID=\"page_" << page_number << "\">\n"
166  << "\t\t\t<PrintSpace HPOS=\"0\" VPOS=\"0\""
167  << " WIDTH=\"" << rect_width_ << "\""
168  << " HEIGHT=\"" << rect_height_ << "\">\n";
169 
170  ResultIterator *res_it = GetIterator();
171  while (!res_it->Empty(RIL_BLOCK)) {
172  if (res_it->Empty(RIL_WORD)) {
173  res_it->Next(RIL_WORD);
174  continue;
175  }
176 
177  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
178  alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
179  AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
180  alto_str << "\n";
181  }
182 
183  if (res_it->IsAtBeginningOf(RIL_PARA)) {
184  alto_str << "\t\t\t\t\t<TextBlock ID=\"block_" << tcnt << "\"";
185  AddBoxToAlto(res_it, RIL_PARA, alto_str);
186  alto_str << "\n";
187  }
188 
189  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
190  alto_str << "\t\t\t\t\t\t<TextLine ID=\"line_" << lcnt << "\"";
191  AddBoxToAlto(res_it, RIL_TEXTLINE, alto_str);
192  alto_str << "\n";
193  }
194 
195  alto_str << "\t\t\t\t\t\t\t<String ID=\"string_" << wcnt << "\"";
196  AddBoxToAlto(res_it, RIL_WORD, alto_str);
197  alto_str << " CONTENT=\"";
198 
199  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
200  bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
201  bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
202 
203  int left, top, right, bottom;
204  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
205 
206  do {
207  const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
208  if (grapheme && grapheme[0] != 0) {
209  alto_str << HOcrEscape(grapheme.get()).c_str();
210  }
211  res_it->Next(RIL_SYMBOL);
212  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
213 
214  alto_str << "\"/>";
215 
216  wcnt++;
217 
218  if (last_word_in_line) {
219  alto_str << "\n\t\t\t\t\t\t</TextLine>\n";
220  lcnt++;
221  } else {
222  int hpos = right;
223  int vpos = top;
224  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
225  int width = left - hpos;
226  alto_str << "<SP WIDTH=\"" << width << "\" VPOS=\"" << vpos << "\" HPOS=\"" << hpos
227  << "\"/>\n";
228  }
229 
230  if (last_word_in_tblock) {
231  alto_str << "\t\t\t\t\t</TextBlock>\n";
232  tcnt++;
233  }
234 
235  if (last_word_in_cblock) {
236  alto_str << "\t\t\t\t</ComposedBlock>\n";
237  bcnt++;
238  }
239  }
240 
241  alto_str << "\t\t\t</PrintSpace>\n"
242  << "\t\t</Page>\n";
243  const std::string &text = alto_str.str();
244 
245  char *result = new char[text.length() + 1];
246  strcpy(result, text.c_str());
247  delete res_it;
248  return result;
249 }
std::string HOcrEscape(const char *text)
Definition: baseapi.cpp:2338
ResultIterator * GetIterator()
Definition: baseapi.cpp:1313
void SetInputName(const char *name)
Definition: baseapi.cpp:267

◆ GetAltoText() [2/2]

char * tesseract::TessBaseAPI::GetAltoText ( int  page_number)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 127 of file altorenderer.cpp.

127  {
128  return GetAltoText(nullptr, page_number);
129 }
char * GetAltoText(ETEXT_DESC *monitor, int page_number)

◆ GetAvailableLanguagesAsVector()

void tesseract::TessBaseAPI::GetAvailableLanguagesAsVector ( std::vector< std::string > *  langs) const

Returns the available languages in the sorted vector of std::string.

Definition at line 468 of file baseapi.cpp.

468  {
469  langs->clear();
470  if (tesseract_ != nullptr) {
471  addAvailableLanguages(tesseract_->datadir, "", langs);
472  std::sort(langs->begin(), langs->end());
473  }
474 }
std::string datadir
Definition: ccutil.h:57

◆ GetBlockTextOrientations()

void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2260 of file baseapi.cpp.

2260  {
2261  delete[] * block_orientation;
2262  *block_orientation = nullptr;
2263  delete[] * vertical_writing;
2264  *vertical_writing = nullptr;
2265  BLOCK_IT block_it(block_list_);
2266 
2267  block_it.move_to_first();
2268  int num_blocks = 0;
2269  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2270  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2271  continue;
2272  }
2273  ++num_blocks;
2274  }
2275  if (!num_blocks) {
2276  tprintf("WARNING: Found no blocks\n");
2277  return;
2278  }
2279  *block_orientation = new int[num_blocks];
2280  *vertical_writing = new bool[num_blocks];
2281  block_it.move_to_first();
2282  int i = 0;
2283  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2284  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2285  continue;
2286  }
2287  FCOORD re_rotation = block_it.data()->re_rotation();
2288  float re_theta = re_rotation.angle();
2289  FCOORD classify_rotation = block_it.data()->classify_rotation();
2290  float classify_theta = classify_rotation.angle();
2291  double rot_theta = -(re_theta - classify_theta) * 2.0 / M_PI;
2292  if (rot_theta < 0) {
2293  rot_theta += 4;
2294  }
2295  int num_rotations = static_cast<int>(rot_theta + 0.5);
2296  (*block_orientation)[i] = num_rotations;
2297  // The classify_rotation is non-zero only if the text has vertical
2298  // writing direction.
2299  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2300  ++i;
2301  }
2302 }

◆ GetBoolVariable()

bool tesseract::TessBaseAPI::GetBoolVariable ( const char *  name,
bool *  value 
) const

Definition at line 301 of file baseapi.cpp.

301  {
302  auto *p = ParamUtils::FindParam<BoolParam>(name, GlobalParams()->bool_params,
304  if (p == nullptr) {
305  return false;
306  }
307  *value = bool(*p);
308  return true;
309 }
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:36
ParamsVectors * params()
Definition: ccutil.h:53
std::vector< BoolParam * > bool_params
Definition: params.h:47

◆ GetBoxText()

char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

The recognized text is returned as a char* which is coded as a UTF8 box file. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 1512 of file baseapi.cpp.

1512  {
1513  if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1514  return nullptr;
1515  }
1516  int blob_count;
1517  int utf8_length = TextLength(&blob_count);
1518  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + kMaxBytesPerLine;
1519  char *result = new char[total_length];
1520  result[0] = '\0';
1521  int output_length = 0;
1522  LTRResultIterator *it = GetLTRIterator();
1523  do {
1524  int left, top, right, bottom;
1525  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1526  const std::unique_ptr</*non-const*/ char[]> text(it->GetUTF8Text(RIL_SYMBOL));
1527  // Tesseract uses space for recognition failure. Fix to a reject
1528  // character, kTesseractReject so we don't create illegal box files.
1529  for (int i = 0; text[i] != '\0'; ++i) {
1530  if (text[i] == ' ') {
1531  text[i] = kTesseractReject;
1532  }
1533  }
1534  snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n",
1535  text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number);
1536  output_length += strlen(result + output_length);
1537  // Just in case...
1538  if (output_length + kMaxBytesPerLine > total_length) {
1539  break;
1540  }
1541  }
1542  } while (it->Next(RIL_SYMBOL));
1543  delete it;
1544  return result;
1545 }
const char kTesseractReject
Definition: baseapi.cpp:106
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1495
const int kMaxBytesPerLine
Definition: baseapi.cpp:1504
int TextLength(int *blob_count) const
Definition: baseapi.cpp:2193
LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1296

◆ GetComponentImages() [1/2]

Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 421 of file baseapi.h.

422  {
423  return GetComponentImages(level, text_only, false, 0, pixa, blockids,
424  nullptr);
425  }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:699

◆ GetComponentImages() [2/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( PageIteratorLevel  level,
bool  text_only,
bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not nullptr, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 699 of file baseapi.cpp.

701  {
702  /*non-const*/ std::unique_ptr</*non-const*/ PageIterator> page_it(GetIterator());
703  if (page_it == nullptr) {
704  page_it.reset(AnalyseLayout());
705  }
706  if (page_it == nullptr) {
707  return nullptr; // Failed.
708  }
709 
710  // Count the components to get a size for the arrays.
711  int component_count = 0;
712  int left, top, right, bottom;
713 
714  if (raw_image) {
715  // Get bounding box in original raw image with padding.
716  do {
717  if (page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom) &&
718  (!text_only || PTIsTextType(page_it->BlockType()))) {
719  ++component_count;
720  }
721  } while (page_it->Next(level));
722  } else {
723  // Get bounding box from binarized imaged. Note that this could be
724  // differently scaled from the original image.
725  do {
726  if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
727  (!text_only || PTIsTextType(page_it->BlockType()))) {
728  ++component_count;
729  }
730  } while (page_it->Next(level));
731  }
732 
733  Boxa *boxa = boxaCreate(component_count);
734  if (pixa != nullptr) {
735  *pixa = pixaCreate(component_count);
736  }
737  if (blockids != nullptr) {
738  *blockids = new int[component_count];
739  }
740  if (paraids != nullptr) {
741  *paraids = new int[component_count];
742  }
743 
744  int blockid = 0;
745  int paraid = 0;
746  int component_index = 0;
747  page_it->Begin();
748  do {
749  bool got_bounding_box;
750  if (raw_image) {
751  got_bounding_box = page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom);
752  } else {
753  got_bounding_box = page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom);
754  }
755  if (got_bounding_box && (!text_only || PTIsTextType(page_it->BlockType()))) {
756  Box *lbox = boxCreate(left, top, right - left, bottom - top);
757  boxaAddBox(boxa, lbox, L_INSERT);
758  if (pixa != nullptr) {
759  Pix *pix = nullptr;
760  if (raw_image) {
761  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, &top);
762  } else {
763  pix = page_it->GetBinaryImage(level);
764  }
765  pixaAddPix(*pixa, pix, L_INSERT);
766  pixaAddBox(*pixa, lbox, L_CLONE);
767  }
768  if (paraids != nullptr) {
769  (*paraids)[component_index] = paraid;
770  if (page_it->IsAtFinalElement(RIL_PARA, level)) {
771  ++paraid;
772  }
773  }
774  if (blockids != nullptr) {
775  (*blockids)[component_index] = blockid;
776  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
777  ++blockid;
778  paraid = 0;
779  }
780  }
781  ++component_index;
782  }
783  } while (page_it->Next(level));
784  return boxa;
785 }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82

◆ GetConnectedComponents()

Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 687 of file baseapi.cpp.

687  {
688  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
689 }

◆ GetDatapath()

const char * tesseract::TessBaseAPI::GetDatapath ( )

Definition at line 932 of file baseapi.cpp.

932  {
933  return tesseract_->datadir.c_str();
934 }

◆ GetDawg()

const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2325 of file baseapi.cpp.

2325  {
2326  if (tesseract_ == nullptr || i >= NumDawgs()) {
2327  return nullptr;
2328  }
2329  return tesseract_->getDict().GetDawg(i);
2330 }
int NumDawgs() const
Definition: baseapi.cpp:2333
Dict & getDict() override
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:385

◆ GetDoubleVariable()

bool tesseract::TessBaseAPI::GetDoubleVariable ( const char *  name,
double *  value 
) const

Definition at line 317 of file baseapi.cpp.

317  {
318  auto *p = ParamUtils::FindParam<DoubleParam>(name, GlobalParams()->double_params,
320  if (p == nullptr) {
321  return false;
322  }
323  *value = (double)(*p);
324  return true;
325 }
std::vector< DoubleParam * > double_params
Definition: params.h:49

◆ GetHOCRText() [1/2]

char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 134 of file hocrrenderer.cpp.

134  {
135  if (tesseract_ == nullptr ||
136  (page_res_ == nullptr && Recognize(monitor) < 0)) {
137  return nullptr;
138  }
139 
140  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, ccnt = 1;
141  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
142  bool para_is_ltr = true; // Default direction is LTR
143  const char *paragraph_lang = nullptr;
144  bool font_info = false;
145  bool hocr_boxes = false;
146  GetBoolVariable("hocr_font_info", &font_info);
147  GetBoolVariable("hocr_char_boxes", &hocr_boxes);
148 
149  if (input_file_.empty()) {
150  SetInputName(nullptr);
151  }
152 
153 #ifdef _WIN32
154  // convert input name from ANSI encoding to utf-8
155  int str16_len =
156  MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
157  wchar_t *uni16_str = new WCHAR[str16_len];
158  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str,
159  str16_len);
160  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
161  0, nullptr, nullptr);
162  char *utf8_str = new char[utf8_len];
163  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
164  nullptr, nullptr);
165  input_file_ = utf8_str;
166  delete[] uni16_str;
167  delete[] utf8_str;
168 #endif
169 
170  std::stringstream hocr_str;
171  // Use "C" locale (needed for double values x_size and x_descenders).
172  hocr_str.imbue(std::locale::classic());
173  // Use 8 digits for double values.
174  hocr_str.precision(8);
175  hocr_str << " <div class='ocr_page'"
176  << " id='"
177  << "page_" << page_id << "'"
178  << " title='image \"";
179  if (!input_file_.empty()) {
180  hocr_str << HOcrEscape(input_file_.c_str());
181  } else {
182  hocr_str << "unknown";
183  }
184 
185  hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
186  << rect_width_ << " " << rect_height_ << "; ppageno " << page_number
187  << "; scan_res " << GetSourceYResolution() << " "
188  << GetSourceYResolution() << "'>\n";
189 
190  std::unique_ptr<ResultIterator> res_it(GetIterator());
191  while (!res_it->Empty(RIL_BLOCK)) {
192  if (res_it->Empty(RIL_WORD)) {
193  res_it->Next(RIL_WORD);
194  continue;
195  }
196 
197  // Open any new block/paragraph/textline.
198  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
199  para_is_ltr = true; // reset to default direction
200  hocr_str << " <div class='ocr_carea'"
201  << " id='"
202  << "block_" << page_id << "_" << bcnt << "'";
203  AddBoxTohOCR(res_it.get(), RIL_BLOCK, hocr_str);
204  }
205  if (res_it->IsAtBeginningOf(RIL_PARA)) {
206  hocr_str << "\n <p class='ocr_par'";
207  para_is_ltr = res_it->ParagraphIsLtr();
208  if (!para_is_ltr) {
209  hocr_str << " dir='rtl'";
210  }
211  hocr_str << " id='"
212  << "par_" << page_id << "_" << pcnt << "'";
213  paragraph_lang = res_it->WordRecognitionLanguage();
214  if (paragraph_lang) {
215  hocr_str << " lang='" << paragraph_lang << "'";
216  }
217  AddBoxTohOCR(res_it.get(), RIL_PARA, hocr_str);
218  }
219  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
220  hocr_str << "\n <span class='";
221  switch (res_it->BlockType()) {
222  case PT_HEADING_TEXT:
223  hocr_str << "ocr_header";
224  break;
225  case PT_PULLOUT_TEXT:
226  hocr_str << "ocr_textfloat";
227  break;
228  case PT_CAPTION_TEXT:
229  hocr_str << "ocr_caption";
230  break;
231  default:
232  hocr_str << "ocr_line";
233  }
234  hocr_str << "' id='"
235  << "line_" << page_id << "_" << lcnt << "'";
236  AddBoxTohOCR(res_it.get(), RIL_TEXTLINE, hocr_str);
237  }
238 
239  // Now, process the word...
240  int32_t lstm_choice_mode = tesseract_->lstm_choice_mode;
241  std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
242  *rawTimestepMap = nullptr;
243  std::vector<std::vector<std::pair<const char *, float>>> *CTCMap = nullptr;
244  if (lstm_choice_mode) {
245  CTCMap = res_it->GetBestLSTMSymbolChoices();
246  rawTimestepMap = res_it->GetRawLSTMTimesteps();
247  }
248  hocr_str << "\n <span class='ocrx_word'"
249  << " id='"
250  << "word_" << page_id << "_" << wcnt << "'";
251  int left, top, right, bottom;
252  bool bold, italic, underlined, monospace, serif, smallcaps;
253  int pointsize, font_id;
254  const char *font_name;
255  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
256  font_name =
257  res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
258  &serif, &smallcaps, &pointsize, &font_id);
259  hocr_str << " title='bbox " << left << " " << top << " " << right << " "
260  << bottom << "; x_wconf "
261  << static_cast<int>(res_it->Confidence(RIL_WORD));
262  if (font_info) {
263  if (font_name) {
264  hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
265  }
266  hocr_str << "; x_fsize " << pointsize;
267  }
268  hocr_str << "'";
269  const char *lang = res_it->WordRecognitionLanguage();
270  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
271  hocr_str << " lang='" << lang << "'";
272  }
273  switch (res_it->WordDirection()) {
274  // Only emit direction if different from current paragraph direction
275  case DIR_LEFT_TO_RIGHT:
276  if (!para_is_ltr) {
277  hocr_str << " dir='ltr'";
278  }
279  break;
280  case DIR_RIGHT_TO_LEFT:
281  if (para_is_ltr) {
282  hocr_str << " dir='rtl'";
283  }
284  break;
285  case DIR_MIX:
286  case DIR_NEUTRAL:
287  default: // Do nothing.
288  break;
289  }
290  hocr_str << ">";
291  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
292  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
293  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
294  if (bold) {
295  hocr_str << "<strong>";
296  }
297  if (italic) {
298  hocr_str << "<em>";
299  }
300  do {
301  const std::unique_ptr<const char[]> grapheme(
302  res_it->GetUTF8Text(RIL_SYMBOL));
303  if (grapheme && grapheme[0] != 0) {
304  if (hocr_boxes) {
305  res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
306  hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
307  << left << " " << top << " " << right << " " << bottom
308  << "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
309  }
310  hocr_str << HOcrEscape(grapheme.get()).c_str();
311  if (hocr_boxes) {
312  hocr_str << "</span>";
313  tesseract::ChoiceIterator ci(*res_it);
314  if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) {
315  std::vector<std::vector<std::pair<const char *, float>>> *symbol =
316  ci.Timesteps();
317  hocr_str << "\n <span class='ocr_symbol'"
318  << " id='"
319  << "symbol_" << page_id << "_" << wcnt << "_" << scnt
320  << "'>";
321  for (const auto &timestep : *symbol) {
322  hocr_str << "\n <span class='ocrx_cinfo'"
323  << " id='"
324  << "timestep" << page_id << "_" << wcnt << "_" << tcnt
325  << "'>";
326  for (auto conf : timestep) {
327  hocr_str << "\n <span class='ocrx_cinfo'"
328  << " id='"
329  << "choice_" << page_id << "_" << wcnt << "_" << ccnt
330  << "'"
331  << " title='x_confs " << int(conf.second * 100) << "'>"
332  << HOcrEscape(conf.first).c_str() << "</span>";
333  ++ccnt;
334  }
335  hocr_str << "</span>";
336  ++tcnt;
337  }
338  hocr_str << "\n </span>";
339  ++scnt;
340  } else if (lstm_choice_mode == 2) {
341  hocr_str << "\n <span class='ocrx_cinfo'"
342  << " id='"
343  << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
344  << "'>";
345  do {
346  const char *choice = ci.GetUTF8Text();
347  float choiceconf = ci.Confidence();
348  if (choice != nullptr) {
349  hocr_str << "\n <span class='ocrx_cinfo'"
350  << " id='"
351  << "choice_" << page_id << "_" << wcnt << "_" << ccnt
352  << "'"
353  << " title='x_confs " << choiceconf << "'>"
354  << HOcrEscape(choice).c_str() << "</span>";
355  ccnt++;
356  }
357  } while (ci.Next());
358  hocr_str << "\n </span>";
359  tcnt++;
360  }
361  }
362  }
363  res_it->Next(RIL_SYMBOL);
364  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
365  if (italic) {
366  hocr_str << "</em>";
367  }
368  if (bold) {
369  hocr_str << "</strong>";
370  }
371  // If the lstm choice mode is required it is added here
372  if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
373  for (const auto &symbol : *rawTimestepMap) {
374  hocr_str << "\n <span class='ocr_symbol'"
375  << " id='"
376  << "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
377  for (const auto &timestep : symbol) {
378  hocr_str << "\n <span class='ocrx_cinfo'"
379  << " id='"
380  << "timestep" << page_id << "_" << wcnt << "_" << tcnt
381  << "'>";
382  for (auto conf : timestep) {
383  hocr_str << "\n <span class='ocrx_cinfo'"
384  << " id='"
385  << "choice_" << page_id << "_" << wcnt << "_" << ccnt
386  << "'"
387  << " title='x_confs " << int(conf.second * 100) << "'>"
388  << HOcrEscape(conf.first).c_str() << "</span>";
389  ++ccnt;
390  }
391  hocr_str << "</span>";
392  ++tcnt;
393  }
394  hocr_str << "</span>";
395  ++scnt;
396  }
397  } else if (lstm_choice_mode == 2 && !hocr_boxes && CTCMap != nullptr) {
398  for (const auto &timestep : *CTCMap) {
399  if (timestep.size() > 0) {
400  hocr_str << "\n <span class='ocrx_cinfo'"
401  << " id='"
402  << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
403  << "'>";
404  for (auto &j : timestep) {
405  float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
406  if (conf < 0.0f) {
407  conf = 0.0f;
408  }
409  if (conf > 100.0f) {
410  conf = 100.0f;
411  }
412  hocr_str << "\n <span class='ocrx_cinfo'"
413  << " id='"
414  << "choice_" << page_id << "_" << wcnt << "_" << ccnt
415  << "'"
416  << " title='x_confs " << conf << "'>"
417  << HOcrEscape(j.first).c_str() << "</span>";
418  ccnt++;
419  }
420  hocr_str << "</span>";
421  tcnt++;
422  }
423  }
424  }
425  // Close ocrx_word.
426  if (hocr_boxes || lstm_choice_mode > 0) {
427  hocr_str << "\n ";
428  }
429  hocr_str << "</span>";
430  tcnt = 1;
431  ccnt = 1;
432  wcnt++;
433  // Close any ending block/paragraph/textline.
434  if (last_word_in_line) {
435  hocr_str << "\n </span>";
436  lcnt++;
437  }
438  if (last_word_in_para) {
439  hocr_str << "\n </p>\n";
440  pcnt++;
441  para_is_ltr = true; // back to default direction
442  }
443  if (last_word_in_block) {
444  hocr_str << " </div>\n";
445  bcnt++;
446  }
447  }
448  hocr_str << " </div>\n";
449 
450  const std::string &text = hocr_str.str();
451  char *result = new char[text.length() + 1];
452  strcpy(result, text.c_str());
453  return result;
454 }
@ DIR_MIX
Definition: unichar.h:47
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:45
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:46
@ DIR_NEUTRAL
Definition: unichar.h:44
@ PT_CAPTION_TEXT
Definition: publictypes.h:62
@ PT_PULLOUT_TEXT
Definition: publictypes.h:57
@ PT_HEADING_TEXT
Definition: publictypes.h:56
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:301

◆ GetHOCRText() [2/2]

char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 121 of file hocrrenderer.cpp.

121  {
122  return GetHOCRText(nullptr, page_number);
123 }
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)

◆ GetInitLanguagesAsString()

const char * tesseract::TessBaseAPI::GetInitLanguagesAsString ( ) const

Returns the languages string used in the last valid initialization. If the last initialization specified "deu+hin" then that will be returned. If hin loaded eng automatically as well, then that will not be included in this list. To find the languages actually loaded use GetLoadedLanguagesAsVector. The returned string should NOT be deleted.

Definition at line 445 of file baseapi.cpp.

445  {
446  return language_.c_str();
447 }

◆ GetInputImage()

Pix * tesseract::TessBaseAPI::GetInputImage ( )

Definition at line 921 of file baseapi.cpp.

921  {
922  return tesseract_->pix_original();
923 }
Image pix_original() const

◆ GetInputName()

const char * tesseract::TessBaseAPI::GetInputName ( )

These functions are required for searchable PDF output. We need our hands on the input file so that we can include it in the PDF without transcoding. If that is not possible, we need the original image. Finally, resolution metadata is stored in the PDF so we need that as well.

Definition at line 925 of file baseapi.cpp.

925  {
926  if (!input_file_.empty()) {
927  return input_file_.c_str();
928  }
929  return nullptr;
930 }

◆ GetIntVariable()

bool tesseract::TessBaseAPI::GetIntVariable ( const char *  name,
int *  value 
) const

Returns true if the parameter was found among Tesseract parameters. Fills in value with the value of the parameter.

Definition at line 291 of file baseapi.cpp.

291  {
292  auto *p = ParamUtils::FindParam<IntParam>(name, GlobalParams()->int_params,
294  if (p == nullptr) {
295  return false;
296  }
297  *value = (int32_t)(*p);
298  return true;
299 }
std::vector< IntParam * > int_params
Definition: params.h:46

◆ GetIterator()

ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1313 of file baseapi.cpp.

1313  {
1314  if (tesseract_ == nullptr || page_res_ == nullptr) {
1315  return nullptr;
1316  }
1317  return ResultIterator::StartOfParagraph(LTRResultIterator(
1320 }
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)

◆ GetLoadedLanguagesAsVector()

void tesseract::TessBaseAPI::GetLoadedLanguagesAsVector ( std::vector< std::string > *  langs) const

Returns the loaded languages in the vector of std::string. Includes all languages loaded by the last Init, including those loaded as dependencies of other loaded languages.

Definition at line 454 of file baseapi.cpp.

454  {
455  langs->clear();
456  if (tesseract_ != nullptr) {
457  langs->push_back(tesseract_->lang);
458  int num_subs = tesseract_->num_sub_langs();
459  for (int i = 0; i < num_subs; ++i) {
460  langs->push_back(tesseract_->get_sub_lang(i)->lang);
461  }
462  }
463 }
int num_sub_langs() const
Tesseract * get_sub_lang(int index) const
std::string lang
Definition: ccutil.h:59

◆ GetLSTMBoxText()

char * tesseract::TessBaseAPI::GetLSTMBoxText ( int  page_number = 0)

Make a box file for LSTM training from the internal data structures. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 38 of file lstmboxrenderer.cpp.

38  {
39  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
40  return nullptr;
41  }
42 
43  std::string lstm_box_str;
44  bool first_word = true;
45  int left = 0, top = 0, right = 0, bottom = 0;
46 
47  LTRResultIterator *res_it = GetLTRIterator();
48  while (!res_it->Empty(RIL_BLOCK)) {
49  if (res_it->Empty(RIL_SYMBOL)) {
50  res_it->Next(RIL_SYMBOL);
51  continue;
52  }
53  if (!first_word) {
54  if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
55  if (res_it->IsAtBeginningOf(RIL_WORD)) {
56  lstm_box_str += " " + std::to_string(left);
57  AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
58  lstm_box_str += "\n"; // end of row for word
59  } // word
60  } else {
61  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
62  lstm_box_str += "\t " + std::to_string(left);
63  AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
64  lstm_box_str += "\n"; // end of row for line
65  } // line
66  }
67  } // not first word
68  first_word = false;
69  // Use bounding box for whole line for everything
70  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
71  do {
72  lstm_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
73  res_it->Next(RIL_SYMBOL);
74  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
75  lstm_box_str += " " + std::to_string(left);
76  AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
77  lstm_box_str += "\n"; // end of row for symbol
78  }
79  if (!first_word) { // if first_word is true => empty page
80  lstm_box_str += "\t " + std::to_string(left);
81  AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
82  lstm_box_str += "\n"; // end of PAGE
83  }
84  char *ret = new char[lstm_box_str.length() + 1];
85  strcpy(ret, lstm_box_str.c_str());
86  delete res_it;
87  return ret;
88 }

◆ GetLTRIterator()

LTRResultIterator * tesseract::TessBaseAPI::GetLTRIterator ( )
protected

Return an LTR Result Iterator – used only for training, as we really want to ignore all BiDi smarts at that point. delete once you're done with it.

Get a left-to-right iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use.

Definition at line 1296 of file baseapi.cpp.

1296  {
1297  if (tesseract_ == nullptr || page_res_ == nullptr) {
1298  return nullptr;
1299  }
1300  return new LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
1303 }

◆ GetMutableIterator()

MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1330 of file baseapi.cpp.

1330  {
1331  if (tesseract_ == nullptr || page_res_ == nullptr) {
1332  return nullptr;
1333  }
1334  return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
1337 }

◆ getOpenCLDevice()

size_t tesseract::TessBaseAPI::getOpenCLDevice ( void **  data)
static

If compiled with OpenCL AND an available OpenCL device is deemed faster than serial code, then "device" is populated with the cl_device_id and returns sizeof(cl_device_id) otherwise *device=nullptr and returns 0.

Definition at line 249 of file baseapi.cpp.

249  {
250 #ifdef USE_OPENCL
251  ds_device device = OpenclDevice::getDeviceSelection();
252  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
253  *data = new cl_device_id;
254  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
255  return sizeof(cl_device_id);
256  }
257 #endif
258 
259  *data = nullptr;
260  return 0;
261 }

◆ GetOsdText()

char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1704 of file baseapi.cpp.

1704  {
1705  int orient_deg;
1706  float orient_conf;
1707  const char *script_name;
1708  float script_conf;
1709 
1710  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf)) {
1711  return nullptr;
1712  }
1713 
1714  // clockwise rotation needed to make the page upright
1715  int rotate = OrientationIdToValue(orient_deg / 90);
1716 
1717  std::stringstream stream;
1718  // Use "C" locale (needed for float values orient_conf and script_conf).
1719  stream.imbue(std::locale::classic());
1720  // Use fixed notation with 2 digits after the decimal point for float values.
1721  stream.precision(2);
1722  stream << std::fixed << "Page number: " << page_number << "\n"
1723  << "Orientation in degrees: " << orient_deg << "\n"
1724  << "Rotate: " << rotate << "\n"
1725  << "Orientation confidence: " << orient_conf << "\n"
1726  << "Script: " << script_name << "\n"
1727  << "Script confidence: " << script_conf << "\n";
1728  const std::string &text = stream.str();
1729  char *result = new char[text.length() + 1];
1730  strcpy(result, text.c_str());
1731  return result;
1732 }
TESS_API int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:566
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1668

◆ GetPageRes()

const PAGE_RES* tesseract::TessBaseAPI::GetPageRes ( ) const
inlineprotected

Definition at line 762 of file baseapi.h.

762  {
763  return page_res_;
764  }

◆ GetPageSegMode()

PageSegMode tesseract::TessBaseAPI::GetPageSegMode ( ) const

Return the current page segmentation mode.

Definition at line 516 of file baseapi.cpp.

516  {
517  if (tesseract_ == nullptr) {
518  return PSM_SINGLE_BLOCK;
519  }
520  return static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
521 }
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:168

◆ GetRegions()

Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 643 of file baseapi.cpp.

643  {
644  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
645 }

◆ GetSourceYResolution()

int tesseract::TessBaseAPI::GetSourceYResolution ( )

Definition at line 936 of file baseapi.cpp.

936  {
937  if (thresholder_ == nullptr)
938  return -1;
940 }

◆ GetStringVariable()

const char * tesseract::TessBaseAPI::GetStringVariable ( const char *  name) const

Returns the pointer to the string that represents the value of the parameter if it was found among Tesseract parameters.

Definition at line 311 of file baseapi.cpp.

311  {
312  auto *p = ParamUtils::FindParam<StringParam>(name, GlobalParams()->string_params,
314  return (p != nullptr) ? p->c_str() : nullptr;
315 }
std::vector< StringParam * > string_params
Definition: params.h:48

◆ GetStrips()

Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 668 of file baseapi.cpp.

668  {
669  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
670 }

◆ GetTextDirection()

bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 1934 of file baseapi.cpp.

1934  {
1935  const std::unique_ptr<const PageIterator> it(AnalyseLayout());
1936  if (it == nullptr) {
1937  return false;
1938  }
1939  int x1, x2, y1, y2;
1940  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
1941  // Calculate offset and slope (NOTE: Kind of ugly)
1942  if (x2 <= x1) {
1943  x2 = x1 + 1;
1944  }
1945  // Convert the point pair to slope/offset of the baseline (in image coords.)
1946  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
1947  *out_offset = static_cast<int>(y1 - *out_slope * x1);
1948  // Get the y-coord of the baseline at the left and right edges of the
1949  // textline's bounding box.
1950  int left, top, right, bottom;
1951  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1952  return false;
1953  }
1954  int left_y = IntCastRounded(*out_slope * left + *out_offset);
1955  int right_y = IntCastRounded(*out_slope * right + *out_offset);
1956  // Shift the baseline down so it passes through the nearest bottom-corner
1957  // of the textline's bounding box. This is the difference between the y
1958  // at the lowest (max) edge of the box and the actual box bottom.
1959  *out_offset += bottom - std::max(left_y, right_y);
1960  // Switch back to bottom-up tesseract coordinates. Requires negation of
1961  // the slope and height - offset for the offset.
1962  *out_slope = -*out_slope;
1963  *out_offset = rect_height_ - *out_offset;
1964 
1965  return true;
1966 }
int IntCastRounded(double x)
Definition: helpers.h:175

◆ GetTextlines() [1/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 655 of file baseapi.cpp.

656  {
657  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, pixa, blockids, paraids);
658 }

◆ GetTextlines() [2/2]

Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 374 of file baseapi.h.

374  {
375  return GetTextlines(false, 0, pixa, blockids, nullptr);
376  }
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:655

◆ GetThresholdedImage()

Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 628 of file baseapi.cpp.

628  {
629  if (tesseract_ == nullptr || thresholder_ == nullptr) {
630  return nullptr;
631  }
632  if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
633  return nullptr;
634  }
635  return tesseract_->pix_binary().clone();
636 }
Image clone() const
Definition: image.cpp:24

◆ GetThresholdedImageScaleFactor()

int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 787 of file baseapi.cpp.

787  {
788  if (thresholder_ == nullptr) {
789  return 0;
790  }
791  return thresholder_->GetScaleFactor();
792 }

◆ GetTSVText()

char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Definition at line 1372 of file baseapi.cpp.

1372  {
1373  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
1374  return nullptr;
1375  }
1376 
1377  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1378  int page_id = page_number + 1; // we use 1-based page numbers.
1379 
1380  int page_num = page_id;
1381  int block_num = 0;
1382  int par_num = 0;
1383  int line_num = 0;
1384  int word_num = 0;
1385 
1386  std::string tsv_str;
1387  tsv_str += "1\t" + std::to_string(page_num); // level 1 - page
1388  tsv_str += "\t" + std::to_string(block_num);
1389  tsv_str += "\t" + std::to_string(par_num);
1390  tsv_str += "\t" + std::to_string(line_num);
1391  tsv_str += "\t" + std::to_string(word_num);
1392  tsv_str += "\t" + std::to_string(rect_left_);
1393  tsv_str += "\t" + std::to_string(rect_top_);
1394  tsv_str += "\t" + std::to_string(rect_width_);
1395  tsv_str += "\t" + std::to_string(rect_height_);
1396  tsv_str += "\t-1\t\n";
1397 
1398  const std::unique_ptr</*non-const*/ ResultIterator> res_it(GetIterator());
1399  while (!res_it->Empty(RIL_BLOCK)) {
1400  if (res_it->Empty(RIL_WORD)) {
1401  res_it->Next(RIL_WORD);
1402  continue;
1403  }
1404 
1405  // Add rows for any new block/paragraph/textline.
1406  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1407  block_num++;
1408  par_num = 0;
1409  line_num = 0;
1410  word_num = 0;
1411  tsv_str += "2\t" + std::to_string(page_num); // level 2 - block
1412  tsv_str += "\t" + std::to_string(block_num);
1413  tsv_str += "\t" + std::to_string(par_num);
1414  tsv_str += "\t" + std::to_string(line_num);
1415  tsv_str += "\t" + std::to_string(word_num);
1416  AddBoxToTSV(res_it.get(), RIL_BLOCK, tsv_str);
1417  tsv_str += "\t-1\t\n"; // end of row for block
1418  }
1419  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1420  par_num++;
1421  line_num = 0;
1422  word_num = 0;
1423  tsv_str += "3\t" + std::to_string(page_num); // level 3 - paragraph
1424  tsv_str += "\t" + std::to_string(block_num);
1425  tsv_str += "\t" + std::to_string(par_num);
1426  tsv_str += "\t" + std::to_string(line_num);
1427  tsv_str += "\t" + std::to_string(word_num);
1428  AddBoxToTSV(res_it.get(), RIL_PARA, tsv_str);
1429  tsv_str += "\t-1\t\n"; // end of row for para
1430  }
1431  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1432  line_num++;
1433  word_num = 0;
1434  tsv_str += "4\t" + std::to_string(page_num); // level 4 - line
1435  tsv_str += "\t" + std::to_string(block_num);
1436  tsv_str += "\t" + std::to_string(par_num);
1437  tsv_str += "\t" + std::to_string(line_num);
1438  tsv_str += "\t" + std::to_string(word_num);
1439  AddBoxToTSV(res_it.get(), RIL_TEXTLINE, tsv_str);
1440  tsv_str += "\t-1\t\n"; // end of row for line
1441  }
1442 
1443  // Now, process the word...
1444  int left, top, right, bottom;
1445  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1446  word_num++;
1447  tsv_str += "5\t" + std::to_string(page_num); // level 5 - word
1448  tsv_str += "\t" + std::to_string(block_num);
1449  tsv_str += "\t" + std::to_string(par_num);
1450  tsv_str += "\t" + std::to_string(line_num);
1451  tsv_str += "\t" + std::to_string(word_num);
1452  tsv_str += "\t" + std::to_string(left);
1453  tsv_str += "\t" + std::to_string(top);
1454  tsv_str += "\t" + std::to_string(right - left);
1455  tsv_str += "\t" + std::to_string(bottom - top);
1456  tsv_str += "\t" + std::to_string(res_it->Confidence(RIL_WORD));
1457  tsv_str += "\t";
1458 
1459  // Increment counts if at end of block/paragraph/textline.
1460  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) {
1461  lcnt++;
1462  }
1463  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) {
1464  pcnt++;
1465  }
1466  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) {
1467  bcnt++;
1468  }
1469 
1470  do {
1471  tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1472  res_it->Next(RIL_SYMBOL);
1473  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1474  tsv_str += "\n"; // end of row
1475  wcnt++;
1476  }
1477 
1478  char *ret = new char[tsv_str.length() + 1];
1479  strcpy(ret, tsv_str.c_str());
1480  return ret;
1481 }

◆ GetUnichar()

const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id) const

This method returns the string form of the specified unichar.

Definition at line 2320 of file baseapi.cpp.

2320  {
2321  return tesseract_->unicharset.id_to_unichar(unichar_id);
2322 }
UNICHARSET unicharset
Definition: ccutil.h:61
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279

◆ GetUNLVText()

char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes. Returned string must be freed with the delete [] operator.

Definition at line 1561 of file baseapi.cpp.

1561  {
1562  if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1563  return nullptr;
1564  }
1565  bool tilde_crunch_written = false;
1566  bool last_char_was_newline = true;
1567  bool last_char_was_tilde = false;
1568 
1569  int total_length = TextLength(nullptr);
1570  PAGE_RES_IT page_res_it(page_res_);
1571  char *result = new char[total_length];
1572  char *ptr = result;
1573  for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
1574  WERD_RES *word = page_res_it.word();
1575  // Process the current word.
1576  if (word->unlv_crunch_mode != CR_NONE) {
1577  if (word->unlv_crunch_mode != CR_DELETE &&
1578  (!tilde_crunch_written ||
1579  (word->unlv_crunch_mode == CR_KEEP_SPACE && word->word->space() > 0 &&
1580  !word->word->flag(W_FUZZY_NON) && !word->word->flag(W_FUZZY_SP)))) {
1581  if (!word->word->flag(W_BOL) && word->word->space() > 0 && !word->word->flag(W_FUZZY_NON) &&
1582  !word->word->flag(W_FUZZY_SP)) {
1583  /* Write a space to separate from preceding good text */
1584  *ptr++ = ' ';
1585  last_char_was_tilde = false;
1586  }
1587  if (!last_char_was_tilde) {
1588  // Write a reject char.
1589  last_char_was_tilde = true;
1590  *ptr++ = kUNLVReject;
1591  tilde_crunch_written = true;
1592  last_char_was_newline = false;
1593  }
1594  }
1595  } else {
1596  // NORMAL PROCESSING of non tilde crunched words.
1597  tilde_crunch_written = false;
1599  const char *wordstr = word->best_choice->unichar_string().c_str();
1600  const auto &lengths = word->best_choice->unichar_lengths();
1601  int length = lengths.length();
1602  int i = 0;
1603  int offset = 0;
1604 
1605  if (last_char_was_tilde && word->word->space() == 0 && wordstr[offset] == ' ') {
1606  // Prevent adjacent tilde across words - we know that adjacent tildes
1607  // within words have been removed.
1608  // Skip the first character.
1609  offset = lengths[i++];
1610  }
1611  if (i < length && wordstr[offset] != 0) {
1612  if (!last_char_was_newline) {
1613  *ptr++ = ' ';
1614  } else {
1615  last_char_was_newline = false;
1616  }
1617  for (; i < length; offset += lengths[i++]) {
1618  if (wordstr[offset] == ' ' || wordstr[offset] == kTesseractReject) {
1619  *ptr++ = kUNLVReject;
1620  last_char_was_tilde = true;
1621  } else {
1622  if (word->reject_map[i].rejected()) {
1623  *ptr++ = kUNLVSuspect;
1624  }
1625  UNICHAR ch(wordstr + offset, lengths[i]);
1626  int uni_ch = ch.first_uni();
1627  for (int j = 0; kUniChs[j] != 0; ++j) {
1628  if (kUniChs[j] == uni_ch) {
1629  uni_ch = kLatinChs[j];
1630  break;
1631  }
1632  }
1633  if (uni_ch <= 0xff) {
1634  *ptr++ = static_cast<char>(uni_ch);
1635  last_char_was_tilde = false;
1636  } else {
1637  *ptr++ = kUNLVReject;
1638  last_char_was_tilde = true;
1639  }
1640  }
1641  }
1642  }
1643  }
1644  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1645  /* Add a new line output */
1646  *ptr++ = '\n';
1647  tilde_crunch_written = false;
1648  last_char_was_newline = true;
1649  last_char_was_tilde = false;
1650  }
1651  }
1652  *ptr++ = '\n';
1653  *ptr = '\0';
1654  return result;
1655 }
@ W_BOL
start of line
Definition: werd.h:34
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:41
@ W_EOL
end of line
Definition: werd.h:35
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:42
@ CR_NONE
Definition: pageres.h:160
@ CR_KEEP_SPACE
Definition: pageres.h:160
@ CR_DELETE
Definition: pageres.h:160
const int kLatinChs[]
Definition: baseapi.cpp:1554
const char kUNLVReject
Definition: baseapi.cpp:108
const char kUNLVSuspect
Definition: baseapi.cpp:110
const int kUniChs[]
Definition: baseapi.cpp:1552
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:270

◆ GetUTF8Text()

char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1340 of file baseapi.cpp.

1340  {
1341  if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
1342  return nullptr;
1343  }
1344  std::string text("");
1345  const std::unique_ptr</*non-const*/ ResultIterator> it(GetIterator());
1346  do {
1347  if (it->Empty(RIL_PARA)) {
1348  continue;
1349  }
1350  const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1351  text += para_text.get();
1352  } while (it->Next(RIL_PARA));
1353  char *result = new char[text.length() + 1];
1354  strncpy(result, text.c_str(), text.length() + 1);
1355  return result;
1356 }

◆ GetVariableAsString()

bool tesseract::TessBaseAPI::GetVariableAsString ( const char *  name,
std::string *  val 
) const

Get value of named variable as a string, if it exists.

Definition at line 328 of file baseapi.cpp.

328  {
329  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
330 }
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, std::string *value)
Definition: params.cpp:130

◆ GetWords()

Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 677 of file baseapi.cpp.

677  {
678  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
679 }

◆ GetWordStrBoxText()

char * tesseract::TessBaseAPI::GetWordStrBoxText ( int  page_number = 0)

The recognized text is returned as a char* which is coded in the same format as a WordStr box file used in training. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Create a UTF8 box file with WordStr strings from the internal data structures. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 31 of file wordstrboxrenderer.cpp.

31  {
32  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
33  return nullptr;
34  }
35 
36  std::string wordstr_box_str;
37  int left = 0, top = 0, right = 0, bottom = 0;
38 
39  bool first_line = true;
40 
41  LTRResultIterator *res_it = GetLTRIterator();
42  while (!res_it->Empty(RIL_BLOCK)) {
43  if (res_it->Empty(RIL_WORD)) {
44  res_it->Next(RIL_WORD);
45  continue;
46  }
47 
48  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
49  if (!first_line) {
50  wordstr_box_str += "\n\t " + std::to_string(right + 1);
51  wordstr_box_str += " " + std::to_string(image_height_ - bottom);
52  wordstr_box_str += " " + std::to_string(right + 5);
53  wordstr_box_str += " " + std::to_string(image_height_ - top);
54  wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
55  wordstr_box_str += "\n";
56  } else {
57  first_line = false;
58  }
59  // Use bounding box for whole line for WordStr
60  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
61  wordstr_box_str += "WordStr " + std::to_string(left);
62  wordstr_box_str += " " + std::to_string(image_height_ - bottom);
63  wordstr_box_str += " " + std::to_string(right);
64  wordstr_box_str += " " + std::to_string(image_height_ - top);
65  wordstr_box_str += " " + std::to_string(page_number); // word
66  wordstr_box_str += " #";
67  }
68  do {
69  wordstr_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
70  wordstr_box_str += " ";
71  res_it->Next(RIL_WORD);
72  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
73  }
74 
75  if (left != 0 && top != 0 && right != 0 && bottom != 0) {
76  wordstr_box_str += "\n\t " + std::to_string(right + 1);
77  wordstr_box_str += " " + std::to_string(image_height_ - bottom);
78  wordstr_box_str += " " + std::to_string(right + 5);
79  wordstr_box_str += " " + std::to_string(image_height_ - top);
80  wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
81  wordstr_box_str += "\n";
82  }
83  char *ret = new char[wordstr_box_str.length() + 1];
84  strcpy(ret, wordstr_box_str.c_str());
85  delete res_it;
86  return ret;
87 }

◆ Init() [1/4]

int tesseract::TessBaseAPI::Init ( const char *  data,
int  data_size,
const char *  language,
OcrEngineMode  mode,
char **  configs,
int  configs_size,
const std::vector< std::string > *  vars_vec,
const std::vector< std::string > *  vars_values,
bool  set_only_non_debug_params,
FileReader  reader 
)

Definition at line 375 of file baseapi.cpp.

378  {
379  if (language == nullptr) {
380  language = "";
381  }
382  if (data == nullptr) {
383  data = "";
384  }
385  std::string datapath = data_size == 0 ? data : language;
386  // If the datapath, OcrEngineMode or the language have changed - start again.
387  // Note that the language_ field stores the last requested language that was
388  // initialized successfully, while tesseract_->lang stores the language
389  // actually used. They differ only if the requested language was nullptr, in
390  // which case tesseract_->lang is set to the Tesseract default ("eng").
391  if (tesseract_ != nullptr &&
392  (datapath_.empty() || language_.empty() || datapath_ != datapath ||
393  last_oem_requested_ != oem || (language_ != language && tesseract_->lang != language))) {
394  delete tesseract_;
395  tesseract_ = nullptr;
396  }
397 #ifdef USE_OPENCL
398  OpenclDevice od;
399  od.InitEnv();
400 #endif
401  bool reset_classifier = true;
402  if (tesseract_ == nullptr) {
403  reset_classifier = false;
404  tesseract_ = new Tesseract;
405  if (reader != nullptr) {
406  reader_ = reader;
407  }
408  TessdataManager mgr(reader_);
409  if (data_size != 0) {
410  mgr.LoadMemBuffer(language, data, data_size);
411  }
412  if (tesseract_->init_tesseract(datapath.c_str(), output_file_.c_str(), language, oem, configs,
413  configs_size, vars_vec, vars_values, set_only_non_debug_params,
414  &mgr) != 0) {
415  return -1;
416  }
417  }
418 
419  // Update datapath and language requested for the last valid initialization.
420  datapath_ = datapath;
421  if (datapath_.empty() && !tesseract_->datadir.empty()) {
423  }
424 
425  language_ = language;
427 
428 #ifndef DISABLED_LEGACY_ENGINE
429  // For same language and datapath, just reset the adaptive classifier.
430  if (reset_classifier) {
432  }
433 #endif // ndef DISABLED_LEGACY_ENGINE
434  return 0;
435 }
OcrEngineMode oem() const
Definition: baseapi.h:717

◆ Init() [2/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language 
)
inline

Definition at line 216 of file baseapi.h.

216  {
217  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
218  false);
219  }
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:365

◆ Init() [3/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const std::vector< std::string > *  vars_vec,
const std::vector< std::string > *  vars_values,
bool  set_only_non_debug_params 
)

Instances are now mostly thread-safe and totally independent, but some global parameters remain. Basically it is safe to use multiple TessBaseAPIs in different threads in parallel, UNLESS: you use SetVariable on some of the Params in classify and textord. If you do, then the effect will be to change it for all your instances.

Start tesseract. Returns zero on success and -1 on failure. NOTE that the only members that may be called before Init are those listed above here in the class definition.

The datapath must be the name of the tessdata directory. The language is (usually) an ISO 639-3 string or nullptr will default to eng. It is entirely safe (and eventually will be efficient too) to call Init multiple times on the same instance to change language, or just to reset the classifier. The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating that multiple languages are to be loaded. Eg hin+eng will load Hindi and English. Languages may specify internally that they want to be loaded with one or more other languages, so the ~ sign is available to override that. Eg if hin were set to load eng by default, then hin+~eng would force loading only hin. The number of loaded languages is limited only by memory, with the caveat that loading additional languages will impact both speed and accuracy, as there is more work to do to decide on the applicable language, and there is more chance of hallucinating incorrect words. WARNING: On changing languages, all Tesseract parameters are reset back to their default values. (Which may vary between languages.) If you have a rare need to set a Variable that controls initialization for a second call to Init you should explicitly call End() and then use SetVariable before Init. This is only a very rare use case, since there are very few uses that require any parameters to be set before Init.

If set_only_non_debug_params is true, only params that do not contain "debug" in the name will be set.

The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].) The language is (usually) an ISO 639-3 string or nullptr will default to eng. If numeric_mode is true, then only digits and Roman numerals will be returned.

Returns
: 0 on success and -1 on initialization failure.

Definition at line 365 of file baseapi.cpp.

367  {
368  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, vars_values,
369  set_only_non_debug_params, nullptr);
370 }

◆ Init() [4/4]

int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem 
)
inline

Definition at line 213 of file baseapi.h.

213  {
214  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
215  }

◆ InitForAnalysePage()

void tesseract::TessBaseAPI::InitForAnalysePage ( )

Init only for page layout analysis. Use only for calls to SetImage and AnalysePage. Calls that attempt recognition will generate an error.

Definition at line 480 of file baseapi.cpp.

480  {
481  if (tesseract_ == nullptr) {
482  tesseract_ = new Tesseract;
483 #ifndef DISABLED_LEGACY_ENGINE
485 #endif
486  }
487 }

◆ InternalSetImage()

bool tesseract::TessBaseAPI::InternalSetImage ( )
protected

Common code for setting the image. Returns true if Init has been called.

Common code for setting the image.

Definition at line 1995 of file baseapi.cpp.

1995  {
1996  if (tesseract_ == nullptr) {
1997  tprintf("Please call Init before attempting to set an image.\n");
1998  return false;
1999  }
2000  if (thresholder_ == nullptr) {
2001  thresholder_ = new ImageThresholder;
2002  }
2003  ClearResults();
2004  return true;
2005 }

◆ IsValidCharacter()

bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character) const

Definition at line 1928 of file baseapi.cpp.

1928  {
1929  return tesseract_->unicharset.contains_unichar(utf8_character);
1930 }
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:695

◆ IsValidWord()

int tesseract::TessBaseAPI::IsValidWord ( const char *  word) const

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 1924 of file baseapi.cpp.

1924  {
1925  return tesseract_->getDict().valid_word(word);
1926 }
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:801

◆ MeanTextConf()

int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1737 of file baseapi.cpp.

1737  {
1738  int *conf = AllWordConfidences();
1739  if (!conf) {
1740  return 0;
1741  }
1742  int sum = 0;
1743  int *pt = conf;
1744  while (*pt >= 0) {
1745  sum += *pt++;
1746  }
1747  if (pt != conf) {
1748  sum /= pt - conf;
1749  }
1750  delete[] conf;
1751  return sum;
1752 }

◆ NumDawgs()

int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2333 of file baseapi.cpp.

2333  {
2334  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
2335 }
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:381

◆ oem()

OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 717 of file baseapi.h.

717  {
718  return last_oem_requested_;
719  }

◆ operator=()

TessBaseAPI& tesseract::TessBaseAPI::operator= ( TessBaseAPI const &  )
delete

◆ PrintFontsTable()

void tesseract::TessBaseAPI::PrintFontsTable ( FILE *  fp) const

Print Tesseract fonts table to the given file.

Definition at line 335 of file baseapi.cpp.

335  {
336  const int fontinfo_size = tesseract_->get_fontinfo_table().size();
337  for (int font_index = 1; font_index < fontinfo_size; ++font_index) {
338  FontInfo font = tesseract_->get_fontinfo_table().at(font_index);
339  fprintf(fp, "ID=%3d: %s is_italic=%s is_bold=%s"
340  " is_fixed_pitch=%s is_serif=%s is_fraktur=%s\n",
341  font_index, font.name,
342  font.is_italic() ? "true" : "false",
343  font.is_bold() ? "true" : "false",
344  font.is_fixed_pitch() ? "true" : "false",
345  font.is_serif() ? "true" : "false",
346  font.is_fraktur() ? "true" : "false");
347  }
348 }
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324

◆ PrintVariables()

void tesseract::TessBaseAPI::PrintVariables ( FILE *  fp) const

Print Tesseract parameters to the given file.

Definition at line 353 of file baseapi.cpp.

353  {
355 }
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:164

◆ ProcessPage()

bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for descriptions of other parameters.

Definition at line 1229 of file baseapi.cpp.

1231  {
1232  SetInputName(filename);
1233  SetImage(pix);
1234  bool failed = false;
1235 
1236  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1237  // Disabled character recognition
1238  if (! std::unique_ptr<const PageIterator>(AnalyseLayout())) {
1239  failed = true;
1240  }
1241  } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
1242  failed = FindLines() != 0;
1243  } else if (timeout_millisec > 0) {
1244  // Running with a timeout.
1245  ETEXT_DESC monitor;
1246  monitor.cancel = nullptr;
1247  monitor.cancel_this = nullptr;
1248  monitor.set_deadline_msecs(timeout_millisec);
1249 
1250  // Now run the main recognition.
1251  failed = Recognize(&monitor) < 0;
1252  } else {
1253  // Normal layout and character recognition with no timeout.
1254  failed = Recognize(nullptr) < 0;
1255  }
1256 
1257  if (tesseract_->tessedit_write_images) {
1258  Pix *page_pix = GetThresholdedImage();
1259  std::string output_filename = output_file_ + ".processed";
1260  if (page_index > 0) {
1261  output_filename += std::to_string(page_index);
1262  }
1263  output_filename += ".tif";
1264  pixWrite(output_filename.c_str(), page_pix, IFF_TIFF_G4);
1265  pixDestroy(&page_pix);
1266  }
1267 
1268  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
1269  // Save current config variables before switching modes.
1270  FILE *fp = fopen(kOldVarsFile, "wb");
1271  if (fp == nullptr) {
1272  tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
1273  } else {
1274  PrintVariables(fp);
1275  fclose(fp);
1276  }
1277  // Switch to alternate mode for retry.
1278  ReadConfigFile(retry_config);
1279  SetImage(pix);
1280  Recognize(nullptr);
1281  // Restore saved config variables.
1282  ReadConfigFile(kOldVarsFile);
1283  }
1284 
1285  if (renderer && !failed) {
1286  failed = !renderer->AddImage(this);
1287  }
1288 
1289  return !failed;
1290 }
struct ETEXT_DESC ETEXT_DESC
Definition: capi.h:137
@ PSM_OSD_ONLY
Orientation and script detection only.
Definition: publictypes.h:160
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:163
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:353
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:573
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:494
Pix * GetThresholdedImage()
Definition: baseapi.cpp:628

◆ ProcessPages()

bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not nullptr, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1068 of file baseapi.cpp.

1069  {
1070  bool result = ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1071 #ifndef DISABLED_LEGACY_ENGINE
1072  if (result) {
1073  if (tesseract_->tessedit_train_from_boxes && !tesseract_->WriteTRFile(output_file_.c_str())) {
1074  tprintf("Write of TR file failed: %s\n", output_file_.c_str());
1075  return false;
1076  }
1077  }
1078 #endif // ndef DISABLED_LEGACY_ENGINE
1079  return result;
1080 }
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1102
bool WriteTRFile(const char *filename)
Definition: blobclass.cpp:60

◆ ProcessPagesInternal()

bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1102 of file baseapi.cpp.

1103  {
1104  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1105  if (stdInput) {
1106 #ifdef WIN32
1107  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1108  tprintf("ERROR: cin to binary: %s", strerror(errno));
1109 #endif // WIN32
1110  }
1111 
1112  if (stream_filelist) {
1113  return ProcessPagesFileList(stdin, nullptr, retry_config, timeout_millisec, renderer,
1114  tesseract_->tessedit_page_number);
1115  }
1116 
1117  // At this point we are officially in autodection territory.
1118  // That means any data in stdin must be buffered, to make it
1119  // seekable.
1120  std::string buf;
1121  const l_uint8 *data = nullptr;
1122  if (stdInput) {
1123  buf.assign((std::istreambuf_iterator<char>(std::cin)), (std::istreambuf_iterator<char>()));
1124  data = reinterpret_cast<const l_uint8 *>(buf.data());
1125  } else if (strstr(filename, "://") != nullptr) {
1126  // Get image or image list by URL.
1127 #ifdef HAVE_LIBCURL
1128  CURL *curl = curl_easy_init();
1129  if (curl == nullptr) {
1130  fprintf(stderr, "Error, curl_easy_init failed\n");
1131  return false;
1132  } else {
1133  CURLcode curlcode;
1134  auto error = [curl, &curlcode](const char *function) {
1135  fprintf(stderr, "Error, %s failed with error %s\n", function, curl_easy_strerror(curlcode));
1136  curl_easy_cleanup(curl);
1137  return false;
1138  };
1139  curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1140  if (curlcode != CURLE_OK) {
1141  return error("curl_easy_setopt");
1142  }
1143  curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1144  if (curlcode != CURLE_OK) {
1145  return error("curl_easy_setopt");
1146  }
1147  curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1148  if (curlcode != CURLE_OK) {
1149  return error("curl_easy_setopt");
1150  }
1151  curlcode = curl_easy_perform(curl);
1152  if (curlcode != CURLE_OK) {
1153  return error("curl_easy_perform");
1154  }
1155  curl_easy_cleanup(curl);
1156  data = reinterpret_cast<const l_uint8 *>(buf.data());
1157  }
1158 #else
1159  fprintf(stderr, "Error, this tesseract has no URL support\n");
1160  return false;
1161 #endif
1162  } else {
1163  // Check whether the input file can be read.
1164  if (FILE *file = fopen(filename, "rb")) {
1165  fclose(file);
1166  } else {
1167  fprintf(stderr, "Error, cannot read input file %s: %s\n", filename, strerror(errno));
1168  return false;
1169  }
1170  }
1171 
1172  // Here is our autodetection
1173  int format;
1174  int r =
1175  (data != nullptr) ? findFileFormatBuffer(data, &format) : findFileFormat(filename, &format);
1176 
1177  // Maybe we have a filelist
1178  if (r != 0 || format == IFF_UNKNOWN) {
1179  std::string s;
1180  if (data != nullptr) {
1181  s = buf.c_str();
1182  } else {
1183  std::ifstream t(filename);
1184  std::string u((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
1185  s = u.c_str();
1186  }
1187  return ProcessPagesFileList(nullptr, &s, retry_config, timeout_millisec, renderer,
1188  tesseract_->tessedit_page_number);
1189  }
1190 
1191  // Maybe we have a TIFF which is potentially multipage
1192  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || format == IFF_TIFF_RLE ||
1193  format == IFF_TIFF_G3 || format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1194 #if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1195  format == IFF_TIFF_JPEG ||
1196 #endif
1197  format == IFF_TIFF_ZIP);
1198 
1199  // Fail early if we can, before producing any output
1200  Pix *pix = nullptr;
1201  if (!tiff) {
1202  pix = (data != nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1203  if (pix == nullptr) {
1204  return false;
1205  }
1206  }
1207 
1208  // Begin the output
1209  if (renderer && !renderer->BeginDocument(document_title.c_str())) {
1210  pixDestroy(&pix);
1211  return false;
1212  }
1213 
1214  // Produce output
1215  r = (tiff) ? ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, timeout_millisec,
1216  renderer, tesseract_->tessedit_page_number)
1217  : ProcessPage(pix, 0, filename, retry_config, timeout_millisec, renderer);
1218 
1219  // Clean up memory as needed
1220  pixDestroy(&pix);
1221 
1222  // End the output
1223  if (!r || (renderer && !renderer->EndDocument())) {
1224  return false;
1225  }
1226  return true;
1227 }
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1229

◆ ReadConfigFile()

void tesseract::TessBaseAPI::ReadConfigFile ( const char *  filename)

Read a "config" file containing a set of param, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name. Note: only non-init params will be set (init params are set by Init()).

Read a "config" file containing a set of parameter name, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name.

Definition at line 494 of file baseapi.cpp.

494  {
496 }
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
Definition: params.h:42
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:48

◆ ReadDebugConfigFile()

void tesseract::TessBaseAPI::ReadDebugConfigFile ( const char *  filename)

Same as above, but only set debug params from the given config file.

Definition at line 499 of file baseapi.cpp.

499  {
501 }
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
Definition: params.h:40

◆ Recognize()

int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 831 of file baseapi.cpp.

831  {
832  if (tesseract_ == nullptr) {
833  return -1;
834  }
835  if (FindLines() != 0) {
836  return -1;
837  }
838  delete page_res_;
839  if (block_list_->empty()) {
840  page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_);
841  return 0; // Empty page.
842  }
843 
845  recognition_done_ = true;
846 #ifndef DISABLED_LEGACY_ENGINE
847  if (tesseract_->tessedit_resegment_from_line_boxes) {
849  } else if (tesseract_->tessedit_resegment_from_boxes) {
851  } else
852 #endif // ndef DISABLED_LEGACY_ENGINE
853  {
854  page_res_ =
856  }
857 
858  if (page_res_ == nullptr) {
859  return -1;
860  }
861 
862  if (tesseract_->tessedit_train_line_recognizer) {
864  return -1;
865  }
867  return 0;
868  }
869 #ifndef DISABLED_LEGACY_ENGINE
870  if (tesseract_->tessedit_make_boxes_from_boxes) {
872  return 0;
873  }
874 #endif // ndef DISABLED_LEGACY_ENGINE
875 
876  int result = 0;
877  if (tesseract_->interactive_display_mode) {
878 #ifndef GRAPHICS_DISABLED
880 #endif // !GRAPHICS_DISABLED
881  // The page_res is invalid after an interactive session, so cleanup
882  // in a way that lets us continue to the next page without crashing.
883  delete page_res_;
884  page_res_ = nullptr;
885  return -1;
886 #ifndef DISABLED_LEGACY_ENGINE
887  } else if (tesseract_->tessedit_train_from_boxes) {
888  std::string fontname;
889  ExtractFontName(output_file_.c_str(), &fontname);
891  } else if (tesseract_->tessedit_ambigs_training) {
892  FILE *training_output_file = tesseract_->init_recog_training(input_file_.c_str());
893  // OCR the page segmented into words by tesseract.
895  training_output_file);
896  fclose(training_output_file);
897 #endif // ndef DISABLED_LEGACY_ENGINE
898  } else {
899  // Now run the main recognition.
900  bool wait_for_text = true;
901  GetBoolVariable("paragraph_text_based", &wait_for_text);
902  if (!wait_for_text) {
903  DetectParagraphs(false);
904  }
905  if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
906  if (wait_for_text) {
907  DetectParagraphs(true);
908  }
909  } else {
910  result = -1;
911  }
912  }
913  return result;
914 }
bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:41
PAGE_RES * ApplyBoxes(const char *filename, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:110
void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res)
void recog_training_segmented(const char *filename, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void CorrectClassifyWords(PAGE_RES *page_res)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:354
FILE * init_recog_training(const char *filename)
bool AnyLSTMLang() const
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:287
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:387

◆ set_min_orientation_margin()

void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2242 of file baseapi.cpp.

2242  {
2243  tesseract_->min_orientation_margin.set_value(margin);
2244 }

◆ SetDebugVariable()

bool tesseract::TessBaseAPI::SetDebugVariable ( const char *  name,
const char *  value 
)

Definition at line 284 of file baseapi.cpp.

284  {
285  if (tesseract_ == nullptr) {
286  tesseract_ = new Tesseract;
287  }
289 }
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:81

◆ SetDictFunc()

void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 1969 of file baseapi.cpp.

1969  {
1970  if (tesseract_ != nullptr) {
1972  }
1973 }
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:345

◆ SetImage() [1/2]

void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 573 of file baseapi.cpp.

574  {
575  if (InternalSetImage()) {
576  thresholder_->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
578  }
579 }
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:70

◆ SetImage() [2/2]

void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 597 of file baseapi.cpp.

597  {
598  if (InternalSetImage()) {
599  if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
600  // remove alpha channel from png
601  Pix *p1 = pixRemoveAlpha(pix);
602  pixSetSpp(p1, 3);
603  (void)pixCopy(pix, p1);
604  pixDestroy(&p1);
605  }
606  thresholder_->SetImage(pix);
608  }
609 }

◆ SetInputImage()

void tesseract::TessBaseAPI::SetInputImage ( Pix *  pix)

Definition at line 917 of file baseapi.cpp.

917  {
919 }
void set_pix_original(Image original_pix)

◆ SetInputName()

void tesseract::TessBaseAPI::SetInputName ( const char *  name)

Set the name of the input file. Needed for training and reading a UNLV zone file, and for searchable PDF output.

Set the name of the input file. Needed only for training and loading a UNLV zone file.

Definition at line 267 of file baseapi.cpp.

267  {
268  input_file_ = name ? name : "";
269 }

◆ SetOutputName()

void tesseract::TessBaseAPI::SetOutputName ( const char *  name)

Set the name of the bonus output files. Needed only for debugging.

Set the name of the output files. Needed only for debugging.

Definition at line 272 of file baseapi.cpp.

272  {
273  output_file_ = name ? name : "";
274 }

◆ SetPageSegMode()

void tesseract::TessBaseAPI::SetPageSegMode ( PageSegMode  mode)

Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Set the current page segmentation mode. Defaults to PSM_AUTO. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Definition at line 508 of file baseapi.cpp.

508  {
509  if (tesseract_ == nullptr) {
510  tesseract_ = new Tesseract;
511  }
512  tesseract_->tessedit_pageseg_mode.set_value(mode);
513 }

◆ SetProbabilityInContextFunc()

void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 1983 of file baseapi.cpp.

1983  {
1984  if (tesseract_ != nullptr) {
1986  // Set it for the sublangs too.
1987  int num_subs = tesseract_->num_sub_langs();
1988  for (int i = 0; i < num_subs; ++i) {
1990  }
1991  }
1992 }
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:354

◆ SetRectangle()

void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 616 of file baseapi.cpp.

616  {
617  if (thresholder_ == nullptr) {
618  return;
619  }
620  thresholder_->SetRectangle(left, top, width, height);
621  ClearResults();
622 }
void SetRectangle(int left, int top, int width, int height)

◆ SetSourceResolution()

void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 581 of file baseapi.cpp.

581  {
582  if (thresholder_) {
584  } else {
585  tprintf("Please call SetImage before SetSourceResolution.\n");
586  }
587 }
void SetSourceYResolution(int ppi)
Definition: thresholder.h:95

◆ SetVariable()

bool tesseract::TessBaseAPI::SetVariable ( const char *  name,
const char *  value 
)

Set the value of an internal "parameter." Supply the name of the parameter and the value as a string, just as you would in a config file. Returns false if the name lookup failed. Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. SetVariable may be used before Init, but settings will revert to defaults on End().

Note: Must be called after Init(). Only works for non-init variables (init variables should be passed to Init()).

Definition at line 276 of file baseapi.cpp.

276  {
277  if (tesseract_ == nullptr) {
278  tesseract_ = new Tesseract;
279  }
281  tesseract_->params());
282 }

◆ tesseract()

Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 713 of file baseapi.h.

713  {
714  return tesseract_;
715  }

◆ TesseractRect()

char * tesseract::TessBaseAPI::TesseractRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
)

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Note that TesseractRect is the simplified convenience interface. For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and one or more of the Get*Text functions below.

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a one pixel is WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Definition at line 536 of file baseapi.cpp.

537  {
538  if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) {
539  return nullptr; // Nothing worth doing.
540  }
541 
542  // Since this original api didn't give the exact size of the image,
543  // we have to invent a reasonable value.
544  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
545  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, bytes_per_pixel,
546  bytes_per_line);
547  SetRectangle(left, top, width, height);
548 
549  return GetUTF8Text();
550 }
const int kMinRectSize
Definition: baseapi.cpp:104
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:616

◆ TextLength()

int tesseract::TessBaseAPI::TextLength ( int *  blob_count) const
protected

Return the length of the output text string, as UTF8, assuming one newline per line and one per block, with a terminator, and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Return the length of the output text string, as UTF8, assuming liberally two spacing marks after each word (as paragraphs end with two newlines), and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Definition at line 2193 of file baseapi.cpp.

2193  {
2194  if (tesseract_ == nullptr || page_res_ == nullptr) {
2195  return 0;
2196  }
2197 
2198  PAGE_RES_IT page_res_it(page_res_);
2199  int total_length = 2;
2200  int total_blobs = 0;
2201  // Iterate over the data structures to extract the recognition result.
2202  for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
2203  WERD_RES *word = page_res_it.word();
2204  WERD_CHOICE *choice = word->best_choice;
2205  if (choice != nullptr) {
2206  total_blobs += choice->length() + 2;
2207  total_length += choice->unichar_string().length() + 2;
2208  for (int i = 0; i < word->reject_map.length(); ++i) {
2209  if (word->reject_map[i].rejected()) {
2210  ++total_length;
2211  }
2212  }
2213  }
2214  }
2215  if (blob_count != nullptr) {
2216  *blob_count = total_blobs;
2217  }
2218  return total_length;
2219 }

◆ Threshold()

bool tesseract::TessBaseAPI::Threshold ( Pix **  pix)
protectedvirtual

Run the thresholder to make the thresholded image. If pix is not nullptr, the source is thresholded to pix instead of the internal IMAGE.

Run the thresholder to make the thresholded image, returned in pix, which must not be nullptr. *pix must be initialized to nullptr, or point to an existing pixDestroyable Pix. The usual argument to Threshold is Tesseract::mutable_pix_binary().

Definition at line 2013 of file baseapi.cpp.

2013  {
2014  ASSERT_HOST(pix != nullptr);
2015  if (*pix != nullptr) {
2016  pixDestroy(pix);
2017  }
2018  // Zero resolution messes up the algorithms, so make sure it is credible.
2019  int user_dpi = 0;
2020  GetIntVariable("user_defined_dpi", &user_dpi);
2021  int y_res = thresholder_->GetScaledYResolution();
2022  if (user_dpi && (user_dpi < kMinCredibleResolution || user_dpi > kMaxCredibleResolution)) {
2023  tprintf(
2024  "Warning: User defined image dpi is outside of expected range "
2025  "(%d - %d)!\n",
2027  }
2028  // Always use user defined dpi
2029  if (user_dpi) {
2031  } else if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
2032  if (y_res != 0) {
2033  // Show warning only if a resolution was given.
2034  tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
2035  y_res, kMinCredibleResolution);
2036  }
2038  }
2039 
2040  auto thresholding_method = static_cast<ThresholdMethod>(static_cast<int>(tesseract_->thresholding_method));
2041 
2042  if (thresholding_method == ThresholdMethod::Otsu) {
2043  Image pix_binary(*pix);
2044  if (!thresholder_->ThresholdToPix(&pix_binary)) {
2045  return false;
2046  }
2047  *pix = pix_binary;
2048 
2049  if (!thresholder_->IsBinary()) {
2052  } else {
2053  tesseract_->set_pix_thresholds(nullptr);
2054  tesseract_->set_pix_grey(nullptr);
2055  }
2056  } else {
2057  auto [ok, pix_grey, pix_binary, pix_thresholds] = thresholder_->Threshold(this, thresholding_method);
2058 
2059  if (!ok) {
2060  return false;
2061  }
2062  *pix = pix_binary;
2063 
2064  tesseract_->set_pix_thresholds(pix_thresholds);
2065  tesseract_->set_pix_grey(pix_grey);
2066  }
2067 
2069  &image_height_);
2070 
2071  // Set the internal resolution that is used for layout parameters from the
2072  // estimated resolution, rather than the image resolution, which may be
2073  // fabricated, but we will use the image resolution, if there is one, to
2074  // report output point sizes.
2075  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2077  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2078  tprintf(
2079  "Estimated internal resolution %d out of range! "
2080  "Corrected to %d.\n",
2081  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2082  }
2083  tesseract_->set_source_resolution(estimated_res);
2084  return true;
2085 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
constexpr int kMaxCredibleResolution
Definition: publictypes.h:40
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:110
constexpr int kMinCredibleResolution
Definition: publictypes.h:38
void set_pix_grey(Image grey_pix)
void set_pix_thresholds(Image thresholds)
int GetScaledEstimatedResolution() const
Definition: thresholder.h:115
virtual Image GetPixRectThresholds()
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
virtual std::tuple< bool, Image, Image, Image > Threshold(TessBaseAPI *api, ThresholdMethod method)
virtual Image GetPixRectGrey()
virtual bool ThresholdToPix(Image *pix)
Returns false on error.
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:84

◆ Version()

const char * tesseract::TessBaseAPI::Version ( )
static

Returns the version identifier as a static string. Do not delete.

Definition at line 238 of file baseapi.cpp.

238  {
239  return TESSERACT_VERSION_STR;
240 }
#define TESSERACT_VERSION_STR
Definition: version.h:32

Member Data Documentation

◆ block_list_

BLOCK_LIST* tesseract::TessBaseAPI::block_list_
protected

The page layout.

Definition at line 773 of file baseapi.h.

◆ datapath_

std::string tesseract::TessBaseAPI::datapath_
protected

Current location of tessdata.

Definition at line 777 of file baseapi.h.

◆ equ_detect_

EquationDetect* tesseract::TessBaseAPI::equ_detect_
protected

The equation detector.

Definition at line 769 of file baseapi.h.

◆ image_height_

int tesseract::TessBaseAPI::image_height_
protected

Definition at line 792 of file baseapi.h.

◆ image_width_

int tesseract::TessBaseAPI::image_width_
protected

Definition at line 791 of file baseapi.h.

◆ input_file_

std::string tesseract::TessBaseAPI::input_file_
protected

Name used by training code.

Definition at line 775 of file baseapi.h.

◆ language_

std::string tesseract::TessBaseAPI::language_
protected

Last initialized language.

Definition at line 778 of file baseapi.h.

◆ last_oem_requested_

OcrEngineMode tesseract::TessBaseAPI::last_oem_requested_
protected

Last ocr language mode requested.

Definition at line 779 of file baseapi.h.

◆ osd_tesseract_

Tesseract* tesseract::TessBaseAPI::osd_tesseract_
protected

For orientation & script detection.

Definition at line 768 of file baseapi.h.

◆ output_file_

std::string tesseract::TessBaseAPI::output_file_
protected

Name used by debug code.

Definition at line 776 of file baseapi.h.

◆ page_res_

PAGE_RES* tesseract::TessBaseAPI::page_res_
protected

The page-level data.

Definition at line 774 of file baseapi.h.

◆ paragraph_models_

std::vector<ParagraphModel *>* tesseract::TessBaseAPI::paragraph_models_
protected

Definition at line 772 of file baseapi.h.

◆ reader_

FileReader tesseract::TessBaseAPI::reader_
protected

Reads files from any filesystem.

Definition at line 770 of file baseapi.h.

◆ recognition_done_

bool tesseract::TessBaseAPI::recognition_done_
protected

page_res_ contains recognition data.

Definition at line 780 of file baseapi.h.

◆ rect_height_

int tesseract::TessBaseAPI::rect_height_
protected

Definition at line 790 of file baseapi.h.

◆ rect_left_

int tesseract::TessBaseAPI::rect_left_
protected

Definition at line 787 of file baseapi.h.

◆ rect_top_

int tesseract::TessBaseAPI::rect_top_
protected

Definition at line 788 of file baseapi.h.

◆ rect_width_

int tesseract::TessBaseAPI::rect_width_
protected

Definition at line 789 of file baseapi.h.

◆ tesseract_

Tesseract* tesseract::TessBaseAPI::tesseract_
protected

The underlying data object.

Definition at line 767 of file baseapi.h.

◆ thresholder_

ImageThresholder* tesseract::TessBaseAPI::thresholder_
protected

Image thresholding module.

Definition at line 771 of file baseapi.h.


The documentation for this class was generated from the following files: