tesseract  5.0.0
chopper.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: chopper.cpp (Formerly chopper.c)
4  * Author: Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23 
24 #include "blamer.h" // for BlamerBundle, IRR_CORRECT
25 #include "blobs.h" // for TPOINT, TBLOB, EDGEPT, TESSLINE, divisible_blob
26 #include "dict.h" // for Dict
27 #include "lm_pain_points.h" // for LMPainPoints
28 #include "lm_state.h" // for BestChoiceBundle
29 #include "matrix.h" // for MATRIX
30 #include "normalis.h" // for DENORM
31 #include "pageres.h" // for WERD_RES
32 #include "params.h" // for IntParam, BoolParam
33 #include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST (ptr ...
34 #include "rect.h" // for TBOX
35 #include "render.h" // for display_blob
36 #include "seam.h" // for SEAM
37 #include "split.h" // for remove_edgept
38 #include "stopper.h" // for DANGERR
39 #include "tprintf.h" // for tprintf
40 #include "wordrec.h" // for Wordrec, SegSearchPending (ptr only)
41 
42 namespace tesseract {
43 
44 // Even though the limit on the number of chunks may now be removed, keep
45 // the same limit for repeatable behavior, and it may be a speed advantage.
46 static const int kMaxNumChunks = 64;
47 
48 /*----------------------------------------------------------------------
49  F u n c t i o n s
50 ----------------------------------------------------------------------*/
51 
57 static int check_blob(TBLOB *blob) {
58  TESSLINE *outline;
59  EDGEPT *edgept;
60 
61  for (outline = blob->outlines; outline != nullptr; outline = outline->next) {
62  edgept = outline->loop;
63  do {
64  if (edgept == nullptr) {
65  break;
66  }
67  edgept = edgept->next;
68  } while (edgept != outline->loop);
69  if (edgept == nullptr) {
70  return 1;
71  }
72  }
73  return 0;
74 }
75 
81 static int any_shared_split_points(const std::vector<SEAM *> &seams, SEAM *seam) {
82  int length;
83  int index;
84 
85  length = seams.size();
86  for (index = 0; index < length; index++) {
87  if (seam->SharesPosition(*seams[index])) {
88  return true;
89  }
90  }
91  return false;
92 }
93 
99 static void preserve_outline(EDGEPT *start) {
100  EDGEPT *srcpt;
101 
102  if (start == nullptr) {
103  return;
104  }
105  srcpt = start;
106  do {
107  srcpt->runlength = 1;
108  srcpt = srcpt->next;
109  } while (srcpt != start);
110  srcpt->runlength = 2;
111 }
112 
113 static void preserve_outline_tree(TESSLINE *srcline) {
114  TESSLINE *outline;
115 
116  for (outline = srcline; outline != nullptr; outline = outline->next) {
117  preserve_outline(outline->loop);
118  }
119 }
120 
126 static EDGEPT *restore_outline(EDGEPT *start) {
127  EDGEPT *srcpt;
128  EDGEPT *real_start;
129 
130  if (start == nullptr) {
131  return nullptr;
132  }
133  srcpt = start;
134  do {
135  if (srcpt->runlength == 2) {
136  break;
137  }
138  srcpt = srcpt->next;
139  } while (srcpt != start);
140  real_start = srcpt;
141  do {
142  srcpt = srcpt->next;
143  if (srcpt->prev->runlength == 0) {
144  remove_edgept(srcpt->prev);
145  }
146  } while (srcpt != real_start);
147  return real_start;
148 }
149 
150 static void restore_outline_tree(TESSLINE *srcline) {
151  TESSLINE *outline;
152 
153  for (outline = srcline; outline != nullptr; outline = outline->next) {
154  outline->loop = restore_outline(outline->loop);
155  outline->start = outline->loop->pos;
156  }
157 }
158 
159 /**********************************************************************
160  * total_containment
161  *
162  * Check to see if one of these outlines is totally contained within
163  * the bounding box of the other.
164  **********************************************************************/
165 static int16_t total_containment(TBLOB *blob1, TBLOB *blob2) {
166  TBOX box1 = blob1->bounding_box();
167  TBOX box2 = blob2->bounding_box();
168  return box1.contains(box2) || box2.contains(box1);
169 }
170 
171 // Helper runs all the checks on a seam to make sure it is valid.
172 // Returns the seam if OK, otherwise deletes the seam and returns nullptr.
173 static SEAM *CheckSeam(int debug_level, int32_t blob_number, TWERD *word, TBLOB *blob,
174  TBLOB *other_blob, const std::vector<SEAM *> &seams, SEAM *seam) {
175  if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr ||
176  total_containment(blob, other_blob) || check_blob(other_blob) ||
177  !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
178  any_shared_split_points(seams, seam) ||
179  !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
180  word->blobs.erase(word->blobs.begin() + blob_number + 1);
181  if (seam) {
182  seam->UndoSeam(blob, other_blob);
183  delete seam;
184  seam = nullptr;
185 #ifndef GRAPHICS_DISABLED
186  if (debug_level) {
187  if (debug_level > 2) {
189  }
190  tprintf("\n** seam being removed ** \n");
191  }
192 #endif
193  } else {
194  delete other_blob;
195  }
196  return nullptr;
197  }
198  return seam;
199 }
200 
207 SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob,
208  const std::vector<SEAM *> &seams) {
209  if (repair_unchopped_blobs) {
210  preserve_outline_tree(blob->outlines);
211  }
212  TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
213  // Insert it into the word.
214  word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
215 
216  SEAM *seam = nullptr;
217  if (prioritize_division) {
218  TPOINT location;
219  if (divisible_blob(blob, italic_blob, &location)) {
220  seam = new SEAM(0.0f, location);
221  }
222  }
223  if (seam == nullptr) {
224  seam = pick_good_seam(blob);
225  }
226  if (chop_debug) {
227  if (seam != nullptr) {
228  seam->Print("Good seam picked=");
229  } else {
230  tprintf("\n** no seam picked *** \n");
231  }
232  }
233  if (seam) {
234  seam->ApplySeam(italic_blob, blob, other_blob);
235  }
236 
237  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
238  if (seam == nullptr) {
239  if (repair_unchopped_blobs) {
240  restore_outline_tree(blob->outlines);
241  }
242  if (allow_blob_division && !prioritize_division) {
243  // If the blob can simply be divided into outlines, then do that.
244  TPOINT location;
245  if (divisible_blob(blob, italic_blob, &location)) {
246  other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
247  word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
248  seam = new SEAM(0.0f, location);
249  seam->ApplySeam(italic_blob, blob, other_blob);
250  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
251  }
252  }
253  }
254  if (seam != nullptr) {
255  // Make sure this seam doesn't get chopped again.
256  seam->Finalize();
257  }
258  return seam;
259 }
260 
261 SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob,
262  const std::vector<SEAM *> &seams) {
263  return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams);
264 }
265 
266 SEAM *Wordrec::chop_overlapping_blob(const std::vector<TBOX> &boxes, bool italic_blob,
267  WERD_RES *word_res, unsigned *blob_number) {
268  TWERD *word = word_res->chopped_word;
269  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
270  TBLOB *blob = word->blobs[*blob_number];
271  TPOINT topleft, botright;
272  topleft.x = blob->bounding_box().left();
273  topleft.y = blob->bounding_box().top();
274  botright.x = blob->bounding_box().right();
275  botright.y = blob->bounding_box().bottom();
276 
277  TPOINT original_topleft, original_botright;
278  word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
279  word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
280 
281  TBOX original_box =
282  TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y);
283 
284  bool almost_equal_box = false;
285  int num_overlap = 0;
286  for (auto boxe : boxes) {
287  if (original_box.overlap_fraction(boxe) > 0.125) {
288  num_overlap++;
289  }
290  if (original_box.almost_equal(boxe, 3)) {
291  almost_equal_box = true;
292  }
293  }
294 
295  TPOINT location;
296  if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) {
297  SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array);
298  if (seam != nullptr) {
299  return seam;
300  }
301  }
302  }
303 
304  *blob_number = UINT_MAX;
305  return nullptr;
306 }
307 
320 SEAM *Wordrec::improve_one_blob(const std::vector<BLOB_CHOICE *> &blob_choices, DANGERR *fixpt,
321  bool split_next_to_fragment, bool italic_blob, WERD_RES *word,
322  unsigned *blob_number) {
323  float rating_ceiling = FLT_MAX;
324  SEAM *seam = nullptr;
325  do {
326  auto blob = select_blob_to_split_from_fixpt(fixpt);
327  if (chop_debug) {
328  tprintf("blob_number from fixpt = %d\n", blob);
329  }
330  bool split_point_from_dict = (blob != -1);
331  if (split_point_from_dict) {
332  fixpt->clear();
333  } else {
334  blob = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment);
335  }
336  if (chop_debug) {
337  tprintf("blob_number = %d\n", blob);
338  }
339  *blob_number = blob;
340  if (blob == -1) {
341  return nullptr;
342  }
343 
344  // TODO(rays) it may eventually help to allow italic_blob to be true,
345  seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array);
346  if (seam != nullptr) {
347  return seam; // Success!
348  }
349  if (blob_choices[*blob_number] == nullptr) {
350  return nullptr;
351  }
352  if (!split_point_from_dict) {
353  // We chopped the worst rated blob, try something else next time.
354  rating_ceiling = blob_choices[*blob_number]->rating();
355  }
356  } while (true);
357  return seam;
358 }
359 
367 SEAM *Wordrec::chop_one_blob(const std::vector<TBOX> &boxes,
368  const std::vector<BLOB_CHOICE *> &blob_choices, WERD_RES *word_res,
369  unsigned *blob_number) {
370  if (prioritize_division) {
371  return chop_overlapping_blob(boxes, true, word_res, blob_number);
372  } else {
373  return improve_one_blob(blob_choices, nullptr, false, true, word_res, blob_number);
374  }
375 }
376 
386  int num_blobs = word->chopped_word->NumBlobs();
387  if (word->ratings == nullptr) {
388  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
389  }
390  if (word->ratings->get(0, 0) == nullptr) {
391  // Run initial classification.
392  for (int b = 0; b < num_blobs; ++b) {
393  BLOB_CHOICE_LIST *choices = classify_piece(
394  word->seam_array, b, b, "Initial:", word->chopped_word, word->blamer_bundle);
395  word->ratings->put(b, b, choices);
396  }
397  } else {
398  // Blobs have been pre-classified. Set matrix cell for all blob choices
399  for (int col = 0; col < word->ratings->dimension(); ++col) {
400  for (int row = col;
401  row < word->ratings->dimension() && row < col + word->ratings->bandwidth(); ++row) {
402  BLOB_CHOICE_LIST *choices = word->ratings->get(col, row);
403  if (choices != nullptr) {
404  BLOB_CHOICE_IT bc_it(choices);
405  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
406  bc_it.data()->set_matrix_cell(col, row);
407  }
408  }
409  }
410  }
411  }
412 
413  // Run Segmentation Search.
414  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
415  SegSearch(word, &best_choice_bundle, word->blamer_bundle);
416 
417  if (word->best_choice == nullptr) {
418  // SegSearch found no valid paths, so just use the leading diagonal.
420  }
421  word->RebuildBestState();
422  // If we finished without a hyphen at the end of the word, let the next word
423  // be found in the dictionary.
424  if (word->word->flag(W_EOL) && !getDict().has_hyphen_end(*word->best_choice)) {
425  getDict().reset_hyphen_vars(true);
426  }
427 
428  if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
429  CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, word->blamer_bundle);
430  }
431  if (wordrec_debug_level > 0) {
432  tprintf("Final Ratings Matrix:\n");
433  word->ratings->print(getDict().getUnicharset());
434  }
435  word->FilterWordChoices(getDict().stopper_debug_level);
436 }
437 
445 void Wordrec::improve_by_chopping(float rating_cert_scale, WERD_RES *word,
446  BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle,
447  LMPainPoints *pain_points,
448  std::vector<SegSearchPending> *pending) {
449  unsigned blob_number;
450  do { // improvement loop.
451  // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
452  // one to chop.
453  std::vector<BLOB_CHOICE *> blob_choices;
454  int num_blobs = word->ratings->dimension();
455  for (int i = 0; i < num_blobs; ++i) {
456  BLOB_CHOICE_LIST *choices = word->ratings->get(i, i);
457  if (choices == nullptr || choices->empty()) {
458  blob_choices.push_back(nullptr);
459  } else {
460  BLOB_CHOICE_IT bc_it(choices);
461  blob_choices.push_back(bc_it.data());
462  }
463  }
464  SEAM *seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt, false, false, word,
465  &blob_number);
466  if (seam == nullptr) {
467  break;
468  }
469  // A chop has been made. We have to correct all the data structures to
470  // take into account the extra bottom-level blob.
471  // Put the seam into the seam_array and correct everything else on the
472  // word: ratings matrix (including matrix location in the BLOB_CHOICES),
473  // states in WERD_CHOICEs, and blob widths.
474  word->InsertSeam(blob_number, seam);
475  // Insert a new entry in the beam array.
476  best_choice_bundle->beam.insert(best_choice_bundle->beam.begin() + blob_number, new LanguageModelState);
477  // Fixpts are outdated, but will get recalculated.
478  best_choice_bundle->fixpt.clear();
479  // Remap existing pain points.
480  pain_points->RemapForSplit(blob_number);
481  // Insert a new pending at the chop point.
482  pending->insert(pending->begin() + blob_number, SegSearchPending());
483 
484  // Classify the two newly created blobs using ProcessSegSearchPainPoint,
485  // as that updates the pending correctly and adds new pain points.
486  MATRIX_COORD pain_point(blob_number, blob_number);
487  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word, pain_points, blamer_bundle);
488  pain_point.col = blob_number + 1;
489  pain_point.row = blob_number + 1;
490  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word, pain_points, blamer_bundle);
491  if (language_model_->language_model_ngram_on) {
492  // N-gram evaluation depends on the number of blobs in a chunk, so we
493  // have to re-evaluate everything in the word.
494  ResetNGramSearch(word, best_choice_bundle, *pending);
495  blob_number = 0;
496  }
497  // Run language model incrementally. (Except with the n-gram model on.)
498  UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, word, pain_points,
499  best_choice_bundle, blamer_bundle);
500  } while (!language_model_->AcceptableChoiceFound() && word->ratings->dimension() < kMaxNumChunks);
501 
502  // If after running only the chopper best_choice is incorrect and no blame
503  // has been yet set, blame the classifier if best_choice is classifier's
504  // top choice and is a dictionary word (i.e. language model could not have
505  // helped). Otherwise blame the tradeoff between the classifier and
506  // the old language model (permuters).
507  if (word->blamer_bundle != nullptr &&
509  !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
510  bool valid_permuter = word->best_choice != nullptr &&
512  word->blamer_bundle->BlameClassifierOrLangModel(word, getDict().getUnicharset(), valid_permuter,
513  wordrec_debug_blamer);
514  }
515 }
516 
517 /**********************************************************************
518  * select_blob_to_split
519  *
520  * These are the results of the last classification. Find a likely
521  * place to apply splits. If none, return -1.
522  **********************************************************************/
523 int Wordrec::select_blob_to_split(const std::vector<BLOB_CHOICE *> &blob_choices,
524  float rating_ceiling, bool split_next_to_fragment) {
525  BLOB_CHOICE *blob_choice;
526  float worst = -FLT_MAX;
527  int worst_index = -1;
528  float worst_near_fragment = -FLT_MAX;
529  int worst_index_near_fragment = -1;
530  std::vector<const CHAR_FRAGMENT *> fragments;
531 
532  if (chop_debug) {
533  if (rating_ceiling < FLT_MAX) {
534  tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
535  } else {
536  tprintf("rating_ceiling = No Limit\n");
537  }
538  }
539 
540  if (split_next_to_fragment && blob_choices.size() > 0) {
541  fragments.resize(blob_choices.size());
542  if (blob_choices[0] != nullptr) {
543  fragments[0] = getDict().getUnicharset().get_fragment(blob_choices[0]->unichar_id());
544  } else {
545  fragments[0] = nullptr;
546  }
547  }
548 
549  for (unsigned x = 0; x < blob_choices.size(); ++x) {
550  if (blob_choices[x] == nullptr) {
551  return x;
552  } else {
553  blob_choice = blob_choices[x];
554  // Populate fragments for the following position.
555  if (split_next_to_fragment && x + 1 < blob_choices.size()) {
556  if (blob_choices[x + 1] != nullptr) {
557  fragments[x + 1] =
558  getDict().getUnicharset().get_fragment(blob_choices[x + 1]->unichar_id());
559  } else {
560  fragments[x + 1] = nullptr;
561  }
562  }
563  if (blob_choice->rating() < rating_ceiling &&
564  blob_choice->certainty() < tessedit_certainty_threshold) {
565  // Update worst and worst_index.
566  if (blob_choice->rating() > worst) {
567  worst_index = x;
568  worst = blob_choice->rating();
569  }
570  if (split_next_to_fragment) {
571  // Update worst_near_fragment and worst_index_near_fragment.
572  bool expand_following_fragment =
573  (x + 1 < blob_choices.size() && fragments[x + 1] != nullptr &&
574  !fragments[x + 1]->is_beginning());
575  bool expand_preceding_fragment =
576  (x > 0 && fragments[x - 1] != nullptr && !fragments[x - 1]->is_ending());
577  if ((expand_following_fragment || expand_preceding_fragment) &&
578  blob_choice->rating() > worst_near_fragment) {
579  worst_index_near_fragment = x;
580  worst_near_fragment = blob_choice->rating();
581  if (chop_debug) {
582  tprintf(
583  "worst_index_near_fragment=%d"
584  " expand_following_fragment=%d"
585  " expand_preceding_fragment=%d\n",
586  worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment);
587  }
588  }
589  }
590  }
591  }
592  }
593  // TODO(daria): maybe a threshold of badness for
594  // worst_near_fragment would be useful.
595  return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index;
596 }
597 
598 /**********************************************************************
599  * select_blob_to_split_from_fixpt
600  *
601  * Given the fix point from a dictionary search, if there is a single
602  * dangerous blob that maps to multiple characters, return that blob
603  * index as a place we need to split. If none, return -1.
604  **********************************************************************/
606  if (!fixpt) {
607  return -1;
608  }
609  for (auto &i : *fixpt) {
610  if (i.begin + 1 == i.end && i.dangerous && i.correct_is_ngram) {
611  return i.begin;
612  }
613  }
614  return -1;
615 }
616 
617 } // namespace tesseract
@ TBOX
@ W_EOL
end of line
Definition: werd.h:35
void remove_edgept(EDGEPT *point)
Definition: split.cpp:199
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ IRR_CORRECT
Definition: blamer.h:58
std::vector< DANGERR_INFO > DANGERR
Definition: stopper.h:47
@ TOP_CHOICE_PERM
Definition: ratngs.h:234
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:923
void display_blob(TBLOB *blob, ScrollView::Color color)
Definition: render.cpp:54
T get(ICOORD pos) const
Definition: matrix.h:268
void put(ICOORD pos, const T &thing)
Definition: matrix.h:260
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:363
IncorrectResultReason incorrect_result_reason() const
Definition: blamer.h:131
TDimension x
Definition: blobs.h:89
TDimension y
Definition: blobs.h:90
EDGEPT * next
Definition: blobs.h:200
TBOX bounding_box() const
Definition: blobs.cpp:466
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:342
TESSLINE * outlines
Definition: blobs.h:404
std::vector< TBLOB * > blobs
Definition: blobs.h:462
unsigned NumBlobs() const
Definition: blobs.h:449
int dimension() const
Definition: matrix.h:612
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:115
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:399
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:930
WERD_CHOICE * best_choice
Definition: pageres.h:239
void FilterWordChoices(int debug_level)
Definition: pageres.cpp:518
TWERD * chopped_word
Definition: pageres.h:210
void InsertSeam(int blob_number, SEAM *seam)
Definition: pageres.cpp:419
BlamerBundle * blamer_bundle
Definition: pageres.h:250
void RebuildBestState()
Definition: pageres.cpp:837
const UNICHARSET * uch_set
Definition: pageres.h:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:247
MATRIX * ratings
Definition: pageres.h:235
std::vector< SEAM * > seam_array
Definition: pageres.h:212
float certainty() const
Definition: ratngs.h:87
float rating() const
Definition: ratngs.h:84
uint8_t permuter() const
Definition: ratngs.h:327
TDimension left() const
Definition: rect.h:82
bool almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:272
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
double overlap_fraction(const TBOX &box) const
Definition: rect.h:396
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:106
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:122
void Finalize()
Definition: seam.h:126
void Print(const char *label) const
Definition: seam.cpp:144
bool flag(WERD_FLAGS mask) const
Definition: werd.h:128
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:769
virtual Dict & getDict()
Definition: classify.h:98
const UNICHARSET & getUnicharset() const
Definition: dict.h:104
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:437
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:27
void RemapForSplit(int index)
Struct to store information maintained by various language model components.
Definition: lm_state.h:204
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:226
std::vector< LanguageModelState * > beam
Definition: lm_state.h:246
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
Definition: lm_state.h:242
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
Definition: chopper.cpp:445
SEAM * improve_one_blob(const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
Definition: chopper.cpp:320
SEAM * chop_one_blob(const std::vector< TBOX > &boxes, const std::vector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, unsigned *blob_number)
Definition: chopper.cpp:367
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:240
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:162
int select_blob_to_split(const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:523
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:261
SEAM * chop_overlapping_blob(const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
Definition: chopper.cpp:266
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:385
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:222
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:605
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:207
virtual BLOB_CHOICE_LIST * classify_piece(const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:49
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
Definition: segsearch.cpp:279
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:214
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:33
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:382