tesseract  5.0.0
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
 ~LTRResultIterator () override
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
int BlanksBeforeWord () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
const void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
bool HasTruthString () const
 
bool EquivalentToTruth (const char *str) const
 
char * WordTruthUTF8Text () const
 
char * WordNormedUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
void SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBox (PageIteratorLevel level, int padding, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pta * BlockPolygon () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void RowAttributes (float *row_height, float *descenders, float *ascenders) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 
bool SetWordBlamerBundle (BlamerBundle *blamer_bundle)
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
bool include_upper_dots_
 
bool include_lower_dots_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
void BeginWord (int offset)
 

Detailed Description

Definition at line 47 of file ltrresultiterator.h.

Constructor & Destructor Documentation

◆ LTRResultIterator()

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 29 of file ltrresultiterator.cpp.

32  : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width,
33  rect_height)
34  , line_separator_("\n")
35  , paragraph_separator_("\n") {}
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

◆ ~LTRResultIterator()

tesseract::LTRResultIterator::~LTRResultIterator ( )
overridedefault

Member Function Documentation

◆ BlanksBeforeWord()

int tesseract::LTRResultIterator::BlanksBeforeWord ( ) const

Definition at line 241 of file ltrresultiterator.cpp.

241  {
242  if (it_->word() == nullptr) {
243  return 1;
244  }
245  return it_->word()->word->space();
246 }
WERD_RES * word() const
Definition: pageres.h:763
uint8_t space() const
Definition: werd.h:100

◆ Confidence()

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 97 of file ltrresultiterator.cpp.

97  {
98  if (it_->word() == nullptr) {
99  return 0.0f; // Already at the end!
100  }
101  float mean_certainty = 0.0f;
102  int certainty_count = 0;
103  PAGE_RES_IT res_it(*it_);
104  WERD_CHOICE *best_choice = res_it.word()->best_choice;
105  ASSERT_HOST(best_choice != nullptr);
106  switch (level) {
107  case RIL_BLOCK:
108  do {
109  best_choice = res_it.word()->best_choice;
110  ASSERT_HOST(best_choice != nullptr);
111  mean_certainty += best_choice->certainty();
112  ++certainty_count;
113  res_it.forward();
114  } while (res_it.block() == res_it.prev_block());
115  break;
116  case RIL_PARA:
117  do {
118  best_choice = res_it.word()->best_choice;
119  ASSERT_HOST(best_choice != nullptr);
120  mean_certainty += best_choice->certainty();
121  ++certainty_count;
122  res_it.forward();
123  } while (res_it.block() == res_it.prev_block() &&
124  res_it.row()->row->para() == res_it.prev_row()->row->para());
125  break;
126  case RIL_TEXTLINE:
127  do {
128  best_choice = res_it.word()->best_choice;
129  ASSERT_HOST(best_choice != nullptr);
130  mean_certainty += best_choice->certainty();
131  ++certainty_count;
132  res_it.forward();
133  } while (res_it.row() == res_it.prev_row());
134  break;
135  case RIL_WORD:
136  mean_certainty += best_choice->certainty();
137  ++certainty_count;
138  break;
139  case RIL_SYMBOL:
140  mean_certainty += best_choice->certainty(blob_index_);
141  ++certainty_count;
142  }
143  if (certainty_count > 0) {
144  mean_certainty /= certainty_count;
145  return ClipToRange(100 + 5 * mean_certainty, 0.0f, 100.0f);
146  }
147  return 0.0f;
148 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:110

◆ EquivalentToTruth()

bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 298 of file ltrresultiterator.cpp.

298  {
299  if (!HasTruthString()) {
300  return false;
301  }
302  ASSERT_HOST(it_->word()->uch_set != nullptr);
303  WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
304  return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
305 }
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
BlamerBundle * blamer_bundle
Definition: pageres.h:250
const UNICHARSET * uch_set
Definition: pageres.h:201

◆ GetBlamerDebug()

const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 275 of file ltrresultiterator.cpp.

275  {
276  return it_->word()->blamer_bundle->debug().c_str();
277 }
const std::string & debug() const
Definition: blamer.h:140

◆ GetBlamerMisadaptionDebug()

const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 281 of file ltrresultiterator.cpp.

281  {
282  return it_->word()->blamer_bundle->misadaption_debug().c_str();
283 }
const std::string & misadaption_debug() const
Definition: blamer.h:143

◆ GetParamsTrainingBundle()

const void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 266 of file ltrresultiterator.cpp.

266  {
267  return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr)
269  : nullptr;
270 }
const tesseract::ParamsTrainingBundle & params_training_bundle() const
Definition: blamer.h:176

◆ GetUTF8Text()

char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 44 of file ltrresultiterator.cpp.

44  {
45  if (it_->word() == nullptr) {
46  return nullptr; // Already at the end!
47  }
48  std::string text;
49  PAGE_RES_IT res_it(*it_);
50  WERD_CHOICE *best_choice = res_it.word()->best_choice;
51  ASSERT_HOST(best_choice != nullptr);
52  if (level == RIL_SYMBOL) {
53  text = res_it.word()->BestUTF8(blob_index_, false);
54  } else if (level == RIL_WORD) {
55  text = best_choice->unichar_string();
56  } else {
57  bool eol = false; // end of line?
58  bool eop = false; // end of paragraph?
59  do { // for each paragraph in a block
60  do { // for each text line in a paragraph
61  do { // for each word in a text line
62  best_choice = res_it.word()->best_choice;
63  ASSERT_HOST(best_choice != nullptr);
64  text += best_choice->unichar_string();
65  text += " ";
66  res_it.forward();
67  eol = res_it.row() != res_it.prev_row();
68  } while (!eol);
69  text.resize(text.length() - 1);
70  text += line_separator_;
71  eop = res_it.block() != res_it.prev_block() ||
72  res_it.row()->row->para() != res_it.prev_row()->row->para();
73  } while (level != RIL_TEXTLINE && !eop);
74  if (eop) {
75  text += paragraph_separator_;
76  }
77  } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
78  }
79  int length = text.length() + 1;
80  char *result = new char[length];
81  strncpy(result, text.c_str(), length);
82  return result;
83 }

◆ HasBlamerInfo()

bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 258 of file ltrresultiterator.cpp.

258  {
259  return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr &&
261 }
bool HasDebugInfo() const
Definition: blamer.h:137

◆ HasTruthString()

bool tesseract::LTRResultIterator::HasTruthString ( ) const

Definition at line 286 of file ltrresultiterator.cpp.

286  {
287  if (it_->word() == nullptr) {
288  return false; // Already at the end!
289  }
290  if (it_->word()->blamer_bundle == nullptr || it_->word()->blamer_bundle->NoTruth()) {
291  return false; // no truth information for this word
292  }
293  return true;
294 }
bool NoTruth() const
Definition: blamer.h:134

◆ SetLineSeparator()

void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 86 of file ltrresultiterator.cpp.

86  {
87  line_separator_ = new_line;
88 }

◆ SetParagraphSeparator()

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 91 of file ltrresultiterator.cpp.

91  {
92  paragraph_separator_ = new_para;
93 }

◆ SymbolIsDropcap()

bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 375 of file ltrresultiterator.cpp.

375  {
376  if (cblob_it_ == nullptr && it_->word() != nullptr) {
378  }
379  return false;
380 }
@ SP_DROPCAP
Definition: ratngs.h:250
WERD_CHOICE * best_choice
Definition: pageres.h:239
ScriptPos BlobPosition(unsigned index) const
Definition: ratngs.h:302

◆ SymbolIsSubscript()

bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 365 of file ltrresultiterator.cpp.

365  {
366  if (cblob_it_ == nullptr && it_->word() != nullptr) {
368  }
369  return false;
370 }
@ SP_SUBSCRIPT
Definition: ratngs.h:250

◆ SymbolIsSuperscript()

bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 355 of file ltrresultiterator.cpp.

355  {
356  if (cblob_it_ == nullptr && it_->word() != nullptr) {
358  }
359  return false;
360 }
@ SP_SUPERSCRIPT
Definition: ratngs.h:250

◆ WordDirection()

StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 213 of file ltrresultiterator.cpp.

213  {
214  if (it_->word() == nullptr) {
215  return DIR_NEUTRAL;
216  }
217  bool has_rtl = it_->word()->AnyRtlCharsInWord();
218  bool has_ltr = it_->word()->AnyLtrCharsInWord();
219  if (has_rtl && !has_ltr) {
220  return DIR_RIGHT_TO_LEFT;
221  }
222  if (has_ltr && !has_rtl) {
223  return DIR_LEFT_TO_RIGHT;
224  }
225  if (!has_ltr && !has_rtl) {
226  return DIR_NEUTRAL;
227  }
228  return DIR_MIX;
229 }
@ DIR_MIX
Definition: unichar.h:47
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:45
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:46
@ DIR_NEUTRAL
Definition: unichar.h:44
bool AnyRtlCharsInWord() const
Definition: pageres.h:394
bool AnyLtrCharsInWord() const
Definition: pageres.h:413

◆ WordFontAttributes()

const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 158 of file ltrresultiterator.cpp.

161  {
162  const char *result = nullptr;
163 
164  if (it_->word() == nullptr) {
165  // Already at the end!
166  *pointsize = 0;
167  } else {
168  float row_height =
169  it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders();
170  // Convert from pixels to printers points.
171  *pointsize =
172  scaled_yres_ > 0 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) : 0;
173 
174 #ifndef DISABLED_LEGACY_ENGINE
175  const FontInfo *font_info = it_->word()->fontinfo;
176  if (font_info) {
177  // Font information available.
178  *font_id = font_info->universal_id;
179  *is_bold = font_info->is_bold();
180  *is_italic = font_info->is_italic();
181  *is_underlined = false; // TODO(rays) fix this!
182  *is_monospace = font_info->is_fixed_pitch();
183  *is_serif = font_info->is_serif();
184  result = font_info->name;
185  }
186 #endif // ndef DISABLED_LEGACY_ENGINE
187 
188  *is_smallcaps = it_->word()->small_caps;
189  }
190 
191  if (!result) {
192  *is_bold = false;
193  *is_italic = false;
194  *is_underlined = false;
195  *is_monospace = false;
196  *is_serif = false;
197  *is_smallcaps = false;
198  *font_id = -1;
199  }
200 
201  return result;
202 }
constexpr int kPointsPerInch
Definition: publictypes.h:33
int32_t universal_id
Definition: fontinfo.h:140
float x_height() const
Definition: ocrrow.h:66
float ascenders() const
Definition: ocrrow.h:84
float descenders() const
Definition: ocrrow.h:87
const FontInfo * fontinfo
Definition: pageres.h:307
ROW_RES * row() const
Definition: pageres.h:766

◆ WordIsFromDictionary()

bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 232 of file ltrresultiterator.cpp.

232  {
233  if (it_->word() == nullptr) {
234  return false; // Already at the end!
235  }
236  int permuter = it_->word()->best_choice->permuter();
237  return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM;
238 }
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:240
@ USER_DAWG_PERM
Definition: ratngs.h:242
@ FREQ_DAWG_PERM
Definition: ratngs.h:243
uint8_t permuter() const
Definition: ratngs.h:327

◆ WordIsNumeric()

bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 249 of file ltrresultiterator.cpp.

249  {
250  if (it_->word() == nullptr) {
251  return false; // Already at the end!
252  }
253  int permuter = it_->word()->best_choice->permuter();
254  return permuter == NUMBER_PERM;
255 }
@ NUMBER_PERM
Definition: ratngs.h:238

◆ WordLattice()

const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 341 of file ltrresultiterator.cpp.

341  {
342  if (it_->word() == nullptr) {
343  return nullptr; // Already at the end!
344  }
345  if (it_->word()->blamer_bundle == nullptr) {
346  return nullptr;
347  }
348  *lattice_size = it_->word()->blamer_bundle->lattice_size();
349  return it_->word()->blamer_bundle->lattice_data();
350 }
int lattice_size() const
Definition: blamer.h:166
const char * lattice_data() const
Definition: blamer.h:163

◆ WordNormedUTF8Text()

char * tesseract::LTRResultIterator::WordNormedUTF8Text ( ) const

Definition at line 322 of file ltrresultiterator.cpp.

322  {
323  if (it_->word() == nullptr) {
324  return nullptr; // Already at the end!
325  }
326  std::string ocr_text;
327  WERD_CHOICE *best_choice = it_->word()->best_choice;
328  const UNICHARSET *unicharset = it_->word()->uch_set;
329  ASSERT_HOST(best_choice != nullptr);
330  for (unsigned i = 0; i < best_choice->length(); ++i) {
331  ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
332  }
333  auto length = ocr_text.length() + 1;
334  char *result = new char[length];
335  strncpy(result, ocr_text.c_str(), length);
336  return result;
337 }

◆ WordRecognitionLanguage()

const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 205 of file ltrresultiterator.cpp.

205  {
206  if (it_->word() == nullptr || it_->word()->tesseract == nullptr) {
207  return nullptr;
208  }
209  return it_->word()->tesseract->lang.c_str();
210 }
tesseract::Tesseract * tesseract
Definition: pageres.h:278
std::string lang
Definition: ccutil.h:59

◆ WordTruthUTF8Text()

char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 309 of file ltrresultiterator.cpp.

309  {
310  if (!HasTruthString()) {
311  return nullptr;
312  }
313  std::string truth_text = it_->word()->blamer_bundle->TruthString();
314  int length = truth_text.length() + 1;
315  char *result = new char[length];
316  strncpy(result, truth_text.c_str(), length);
317  return result;
318 }
std::string TruthString() const
Definition: blamer.h:124

Friends And Related Function Documentation

◆ ChoiceIterator

friend class ChoiceIterator
friend

Definition at line 48 of file ltrresultiterator.h.

Member Data Documentation

◆ line_separator_

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 177 of file ltrresultiterator.h.

◆ paragraph_separator_

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 178 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: