tesseract  5.0.0
stopper.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: stopper.c
3  ** Purpose: Stopping criteria for word classifier.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 #include <cctype>
19 #include <cmath>
20 #include <cstdio>
21 #include <cstring>
22 
23 #include "stopper.h"
24 #ifndef DISABLED_LEGACY_ENGINE
25 # include "ambigs.h"
26 #endif
27 #include <tesseract/unichar.h>
28 #include "ccutil.h"
29 #include "dict.h"
30 #include "helpers.h"
31 #include "matchdefs.h"
32 #include "pageres.h"
33 #include "params.h"
34 #include "ratngs.h"
35 
36 /*----------------------------------------------------------------------------
37  Private Code
38 ----------------------------------------------------------------------------*/
39 
40 namespace tesseract {
41 
42 bool Dict::AcceptableChoice(const WERD_CHOICE &best_choice,
43  XHeightConsistencyEnum xheight_consistency) {
44  float CertaintyThreshold = stopper_nondict_certainty_base;
45  int WordSize;
46 
47  if (stopper_no_acceptable_choices) {
48  return false;
49  }
50 
51  if (best_choice.empty()) {
52  return false;
53  }
54 
55  bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
56  bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
57  bool is_case_ok = case_ok(best_choice);
58 
59  if (stopper_debug_level >= 1) {
60  const char *xht = "UNKNOWN";
61  switch (xheight_consistency) {
62  case XH_GOOD:
63  xht = "NORMAL";
64  break;
65  case XH_SUBNORMAL:
66  xht = "SUBNORMAL";
67  break;
68  case XH_INCONSISTENT:
69  xht = "INCONSISTENT";
70  break;
71  default:
72  xht = "UNKNOWN";
73  }
74  tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
75  best_choice.unichar_string().c_str(), (is_valid_word ? 'y' : 'n'),
76  (is_case_ok ? 'y' : 'n'), xht, best_choice.min_x_height(), best_choice.max_x_height());
77  }
78  // Do not accept invalid words in PASS1.
79  if (reject_offset_ <= 0.0f && !is_valid_word) {
80  return false;
81  }
82  if (is_valid_word && is_case_ok) {
83  WordSize = LengthOfShortestAlphaRun(best_choice);
84  WordSize -= stopper_smallword_size;
85  if (WordSize < 0) {
86  WordSize = 0;
87  }
88  CertaintyThreshold += WordSize * stopper_certainty_per_char;
89  }
90 
91  if (stopper_debug_level >= 1) {
92  tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
93  best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
94  }
95 
96  if (no_dang_ambigs && best_choice.certainty() > CertaintyThreshold &&
97  xheight_consistency < XH_INCONSISTENT && UniformCertainties(best_choice)) {
98  return true;
99  } else {
100  if (stopper_debug_level >= 1) {
101  tprintf(
102  "AcceptableChoice() returned false"
103  " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
104  no_dang_ambigs, best_choice.certainty(), CertaintyThreshold,
105  UniformCertainties(best_choice));
106  }
107  return false;
108  }
109 }
110 
111 bool Dict::AcceptableResult(WERD_RES *word) const {
112  if (word->best_choice == nullptr) {
113  return false;
114  }
115  float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
116  int WordSize;
117 
118  if (stopper_debug_level >= 1) {
119  tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
120  word->best_choice->debug_string().c_str(), (valid_word(*word->best_choice) ? 'y' : 'n'),
121  (case_ok(*word->best_choice) ? 'y' : 'n'),
122  word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
123  word->best_choices.singleton() ? 'n' : 'y');
124  }
125 
126  if (word->best_choice->empty() || !word->best_choices.singleton()) {
127  return false;
128  }
129  if (valid_word(*word->best_choice) && case_ok(*word->best_choice)) {
130  WordSize = LengthOfShortestAlphaRun(*word->best_choice);
131  WordSize -= stopper_smallword_size;
132  if (WordSize < 0) {
133  WordSize = 0;
134  }
135  CertaintyThreshold += WordSize * stopper_certainty_per_char;
136  }
137 
138  if (stopper_debug_level >= 1) {
139  tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ", word->best_choice->certainty(),
140  CertaintyThreshold);
141  }
142 
143  if (word->best_choice->certainty() > CertaintyThreshold && !stopper_no_acceptable_choices) {
144  if (stopper_debug_level >= 1) {
145  tprintf("ACCEPTED\n");
146  }
147  return true;
148  } else {
149  if (stopper_debug_level >= 1) {
150  tprintf("REJECTED\n");
151  }
152  return false;
153  }
154 }
155 
156 #if !defined(DISABLED_LEGACY_ENGINE)
157 
158 bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_replaceable,
159  MATRIX *ratings) {
160  if (stopper_debug_level > 2) {
161  tprintf("\nRunning NoDangerousAmbig() for %s\n", best_choice->debug_string().c_str());
162  }
163 
164  // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
165  // for each unichar id in BestChoice.
166  BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
167  bool ambigs_found = false;
168  // For each position in best_choice:
169  // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
170  // -- initialize wrong_ngram with a single unichar_id at best_choice[i]
171  // -- look for ambiguities corresponding to wrong_ngram in the list while
172  // adding the following unichar_ids from best_choice to wrong_ngram
173  //
174  // Repeat the above procedure twice: first time look through
175  // ambigs to be replaced and replace all the ambiguities found;
176  // second time look through dangerous ambiguities and construct
177  // ambig_blob_choices with fake a blob choice for each ambiguity
178  // and pass them to dawg_permute_and_select() to search for
179  // ambiguous words in the dictionaries.
180  //
181  // Note that during the execution of the for loop (on the first pass)
182  // if replacements are made the length of best_choice might change.
183  for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
184  bool replace = (fix_replaceable && pass == 0);
185  const UnicharAmbigsVector &table =
187  if (!replace) {
188  // Initialize ambig_blob_choices with lists containing a single
189  // unichar id for the corresponding position in best_choice.
190  // best_choice consisting from only the original letters will
191  // have a rating of 0.0.
192  for (unsigned i = 0; i < best_choice->length(); ++i) {
193  auto *lst = new BLOB_CHOICE_LIST();
194  BLOB_CHOICE_IT lst_it(lst);
195  // TODO(rays/antonova) Put real xheights and y shifts here.
196  lst_it.add_to_end(
197  new BLOB_CHOICE(best_choice->unichar_id(i), 0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG));
198  ambig_blob_choices.push_back(lst);
199  }
200  }
201  UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
202  int wrong_ngram_index;
203  int blob_index = 0;
204  for (unsigned i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
205  auto curr_unichar_id = best_choice->unichar_id(i);
206  if (stopper_debug_level > 2) {
207  tprintf("Looking for %s ngrams starting with %s:\n", replace ? "replaceable" : "ambiguous",
208  getUnicharset().debug_str(curr_unichar_id).c_str());
209  }
210  int num_wrong_blobs = best_choice->state(i);
211  wrong_ngram_index = 0;
212  wrong_ngram[wrong_ngram_index] = curr_unichar_id;
213  if (curr_unichar_id == INVALID_UNICHAR_ID || static_cast<size_t>(curr_unichar_id) >= table.size() ||
214  table[curr_unichar_id] == nullptr) {
215  continue; // there is no ambig spec for this unichar id
216  }
217  AmbigSpec_IT spec_it(table[curr_unichar_id]);
218  for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
219  const AmbigSpec *ambig_spec = spec_it.data();
220  wrong_ngram[wrong_ngram_index + 1] = INVALID_UNICHAR_ID;
221  int compare = UnicharIdArrayUtils::compare(wrong_ngram, ambig_spec->wrong_ngram);
222  if (stopper_debug_level > 2) {
223  tprintf("candidate ngram: ");
225  tprintf("current ngram from spec: ");
227  tprintf("comparison result: %d\n", compare);
228  }
229  if (compare == 0) {
230  // Record the place where we found an ambiguity.
231  if (fixpt != nullptr) {
232  UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0];
233  fixpt->push_back(DANGERR_INFO(blob_index, blob_index + num_wrong_blobs, replace,
234  getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
235  leftmost_id));
236  if (stopper_debug_level > 1) {
237  tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index, blob_index + num_wrong_blobs, false,
238  getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
239  getUnicharset().id_to_unichar(leftmost_id));
240  }
241  }
242 
243  if (replace) {
244  if (stopper_debug_level > 2) {
245  tprintf("replace ambiguity with %s : ",
246  getUnicharset().id_to_unichar(ambig_spec->correct_ngram_id));
248  }
249  ReplaceAmbig(i, ambig_spec->wrong_ngram_size, ambig_spec->correct_ngram_id, best_choice,
250  ratings);
251  } else if (i > 0 || ambig_spec->type != CASE_AMBIG) {
252  // We found dang ambig - update ambig_blob_choices.
253  if (stopper_debug_level > 2) {
254  tprintf("found ambiguity: ");
256  }
257  ambigs_found = true;
258  for (int tmp_index = 0; tmp_index <= wrong_ngram_index; ++tmp_index) {
259  // Add a blob choice for the corresponding fragment of the
260  // ambiguity. These fake blob choices are initialized with
261  // negative ratings (which are not possible for real blob
262  // choices), so that dawg_permute_and_select() considers any
263  // word not consisting of only the original letters a better
264  // choice and stops searching for alternatives once such a
265  // choice is found.
266  BLOB_CHOICE_IT bc_it(ambig_blob_choices[i + tmp_index]);
267  bc_it.add_to_end(new BLOB_CHOICE(ambig_spec->correct_fragments[tmp_index], -1.0, 0.0,
268  -1, 0, 1, 0, BCC_AMBIG));
269  }
270  }
271  spec_it.forward();
272  } else if (compare == -1) {
273  unsigned next_index;
274  if (wrong_ngram_index + 1 < ambig_spec->wrong_ngram_size &&
275  ((next_index = wrong_ngram_index + 1 + i) < best_choice->length())) {
276  // Add the next unichar id to wrong_ngram and keep looking for
277  // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST.
278  wrong_ngram[++wrong_ngram_index] = best_choice->unichar_id(next_index);
279  num_wrong_blobs += best_choice->state(next_index);
280  } else {
281  break; // no more matching ambigs in this AMBIG_SPEC_LIST
282  }
283  } else {
284  spec_it.forward();
285  }
286  } // end searching AmbigSpec_LIST
287  } // end searching best_choice
288  } // end searching replace and dangerous ambigs
289 
290  // If any ambiguities were found permute the constructed ambig_blob_choices
291  // to see if an alternative dictionary word can be found.
292  if (ambigs_found) {
293  if (stopper_debug_level > 2) {
294  tprintf("\nResulting ambig_blob_choices:\n");
295  for (unsigned i = 0; i < ambig_blob_choices.size(); ++i) {
296  print_ratings_list("", ambig_blob_choices.at(i), getUnicharset());
297  tprintf("\n");
298  }
299  }
300  WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0);
301  ambigs_found = (alt_word->rating() < 0.0);
302  if (ambigs_found) {
303  if (stopper_debug_level >= 1) {
304  tprintf("Stopper: Possible ambiguous word = %s\n", alt_word->debug_string().c_str());
305  }
306  if (fixpt != nullptr) {
307  // Note: Currently character choices combined from fragments can only
308  // be generated by NoDangrousAmbigs(). This code should be updated if
309  // the capability to produce classifications combined from character
310  // fragments is added to other functions.
311  int orig_i = 0;
312  for (unsigned i = 0; i < alt_word->length(); ++i) {
313  const UNICHARSET &uchset = getUnicharset();
314  bool replacement_is_ngram = uchset.get_isngram(alt_word->unichar_id(i));
315  UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
316  if (replacement_is_ngram) {
317  // we have to extract the leftmost unichar from the ngram.
318  const char *str = uchset.id_to_unichar(leftmost_id);
319  int step = uchset.step(str);
320  if (step) {
321  leftmost_id = uchset.unichar_to_id(str, step);
322  }
323  }
324  int end_i = orig_i + alt_word->state(i);
325  if (alt_word->state(i) > 1 || (orig_i + 1 == end_i && replacement_is_ngram)) {
326  // Compute proper blob indices.
327  int blob_start = 0;
328  for (int j = 0; j < orig_i; ++j) {
329  blob_start += best_choice->state(j);
330  }
331  int blob_end = blob_start;
332  for (int j = orig_i; j < end_i; ++j) {
333  blob_end += best_choice->state(j);
334  }
335  fixpt->push_back(
336  DANGERR_INFO(blob_start, blob_end, true, replacement_is_ngram, leftmost_id));
337  if (stopper_debug_level > 1) {
338  tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i, true,
339  replacement_is_ngram, uchset.id_to_unichar(leftmost_id));
340  }
341  }
342  orig_i += alt_word->state(i);
343  }
344  }
345  }
346  delete alt_word;
347  }
348  if (output_ambig_words_file_ != nullptr) {
349  fprintf(output_ambig_words_file_, "\n");
350  }
351 
352  for (auto data : ambig_blob_choices) {
353  delete data;
354  }
355  return !ambigs_found;
356 }
357 
359 
360 #endif // !defined(DISABLED_LEGACY_ENGINE)
361 
363  reject_offset_ = 0.0;
364 }
365 
367  reject_offset_ = stopper_phase2_certainty_rejection_offset;
368 }
369 
370 void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
371  UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings) {
372  int num_blobs_to_replace = 0;
373  int begin_blob_index = 0;
374  int i;
375  // Rating and certainty for the new BLOB_CHOICE are derived from the
376  // replaced choices.
377  float new_rating = 0.0f;
378  float new_certainty = 0.0f;
379  BLOB_CHOICE *old_choice = nullptr;
380  for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
381  if (i >= wrong_ngram_begin_index) {
382  int num_blobs = werd_choice->state(i);
383  int col = begin_blob_index + num_blobs_to_replace;
384  int row = col + num_blobs - 1;
385  BLOB_CHOICE_LIST *choices = ratings->get(col, row);
386  ASSERT_HOST(choices != nullptr);
387  old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
388  ASSERT_HOST(old_choice != nullptr);
389  new_rating += old_choice->rating();
390  new_certainty += old_choice->certainty();
391  num_blobs_to_replace += num_blobs;
392  } else {
393  begin_blob_index += werd_choice->state(i);
394  }
395  }
396  new_certainty /= wrong_ngram_size;
397  // If there is no entry in the ratings matrix, add it.
398  MATRIX_COORD coord(begin_blob_index, begin_blob_index + num_blobs_to_replace - 1);
399  if (!coord.Valid(*ratings)) {
400  ratings->IncreaseBandSize(coord.row - coord.col + 1);
401  }
402  if (ratings->get(coord.col, coord.row) == nullptr) {
403  ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
404  }
405  BLOB_CHOICE_LIST *new_choices = ratings->get(coord.col, coord.row);
406  BLOB_CHOICE *choice = FindMatchingChoice(correct_ngram_id, new_choices);
407  if (choice != nullptr) {
408  // Already there. Upgrade if new rating better.
409  if (new_rating < choice->rating()) {
410  choice->set_rating(new_rating);
411  }
412  if (new_certainty < choice->certainty()) {
413  choice->set_certainty(new_certainty);
414  }
415  // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
416  } else {
417  // Need a new choice with the correct_ngram_id.
418  choice = new BLOB_CHOICE(*old_choice);
419  choice->set_unichar_id(correct_ngram_id);
420  choice->set_rating(new_rating);
421  choice->set_certainty(new_certainty);
422  choice->set_classifier(BCC_AMBIG);
423  choice->set_matrix_cell(coord.col, coord.row);
424  BLOB_CHOICE_IT it(new_choices);
425  it.add_to_end(choice);
426  }
427  // Remove current unichar from werd_choice. On the last iteration
428  // set the correct replacement unichar instead of removing a unichar.
429  for (int replaced_count = 0; replaced_count < wrong_ngram_size; ++replaced_count) {
430  if (replaced_count + 1 == wrong_ngram_size) {
431  werd_choice->set_blob_choice(wrong_ngram_begin_index, num_blobs_to_replace, choice);
432  } else {
433  werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
434  }
435  }
436  if (stopper_debug_level >= 1) {
437  werd_choice->print("ReplaceAmbig() ");
438  tprintf("Modified blob_choices: ");
439  print_ratings_list("\n", new_choices, getUnicharset());
440  }
441 }
442 
443 int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
444  int shortest = INT32_MAX;
445  int curr_len = 0;
446  for (unsigned w = 0; w < WordChoice.length(); ++w) {
447  if (WordChoice.unicharset()->get_isalpha(WordChoice.unichar_id(w))) {
448  curr_len++;
449  } else if (curr_len > 0) {
450  if (curr_len < shortest) {
451  shortest = curr_len;
452  }
453  curr_len = 0;
454  }
455  }
456  if (curr_len > 0 && curr_len < shortest) {
457  shortest = curr_len;
458  } else if (shortest == INT32_MAX) {
459  shortest = 0;
460  }
461  return shortest;
462 }
463 
465  float Certainty;
466  float WorstCertainty = FLT_MAX;
467  float CertaintyThreshold;
468  double TotalCertainty;
469  double TotalCertaintySquared;
470  double Variance;
471  float Mean, StdDev;
472  int word_length = word.length();
473 
474  if (word_length < 3) {
475  return true;
476  }
477 
478  TotalCertainty = TotalCertaintySquared = 0.0;
479  for (int i = 0; i < word_length; ++i) {
480  Certainty = word.certainty(i);
481  TotalCertainty += Certainty;
482  TotalCertaintySquared += static_cast<double>(Certainty) * Certainty;
483  if (Certainty < WorstCertainty) {
484  WorstCertainty = Certainty;
485  }
486  }
487 
488  // Subtract off worst certainty from statistics.
489  word_length--;
490  TotalCertainty -= WorstCertainty;
491  TotalCertaintySquared -= static_cast<double>(WorstCertainty) * WorstCertainty;
492 
493  Mean = TotalCertainty / word_length;
494  Variance = ((word_length * TotalCertaintySquared - TotalCertainty * TotalCertainty) /
495  (word_length * (word_length - 1)));
496  if (Variance < 0.0) {
497  Variance = 0.0;
498  }
499  StdDev = sqrt(Variance);
500 
501  CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
502  if (CertaintyThreshold > stopper_nondict_certainty_base) {
503  CertaintyThreshold = stopper_nondict_certainty_base;
504  }
505 
506  if (word.certainty() < CertaintyThreshold) {
507  if (stopper_debug_level >= 1) {
508  tprintf(
509  "Stopper: Non-uniform certainty = %4.1f"
510  " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
511  word.certainty(), Mean, StdDev, CertaintyThreshold);
512  }
513  return false;
514  } else {
515  return true;
516  }
517 }
518 
519 } // namespace tesseract
#define MAX_AMBIG_SIZE
Definition: ambigs.h:34
#define ASSERT_HOST(x)
Definition: errcode.h:59
std::vector< AmbigSpec_LIST * > UnicharAmbigsVector
Definition: ambigs.h:140
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
XHeightConsistencyEnum
Definition: dict.h:81
@ XH_GOOD
Definition: dict.h:81
@ XH_SUBNORMAL
Definition: dict.h:81
@ XH_INCONSISTENT
Definition: dict.h:81
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
float Mean(PROTOTYPE *Proto, uint16_t Dimension)
Definition: cluster.cpp:1663
int UNICHAR_ID
Definition: unichar.h:36
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:177
@ CASE_AMBIG
Definition: ambigs.h:45
@ BCC_AMBIG
Definition: ratngs.h:52
std::vector< DANGERR_INFO > DANGERR
Definition: stopper.h:47
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
Definition: ratngs.h:623
T get(ICOORD pos) const
Definition: matrix.h:268
void put(ICOORD pos, const T &thing)
Definition: matrix.h:260
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:52
bool Valid(const MATRIX &m) const
Definition: matrix.h:697
WERD_CHOICE * best_choice
Definition: pageres.h:239
WERD_CHOICE_LIST best_choices
Definition: pageres.h:247
void set_certainty(float newrat)
Definition: ratngs.h:150
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
float certainty() const
Definition: ratngs.h:87
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:160
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
void set_rating(float newrat)
Definition: ratngs.h:147
float rating() const
Definition: ratngs.h:84
float max_x_height() const
Definition: ratngs.h:320
std::string debug_string() const
Definition: ratngs.h:475
float certainty() const
Definition: ratngs.h:311
void remove_unichar_id(unsigned index)
Definition: ratngs.h:454
void set_blob_choice(unsigned index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:297
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:295
bool empty() const
Definition: ratngs.h:280
uint8_t permuter() const
Definition: ratngs.h:327
bool dangerous_ambig_found() const
Definition: ratngs.h:344
unsigned state(unsigned index) const
Definition: ratngs.h:299
float min_x_height() const
Definition: ratngs.h:317
const UNICHARSET * unicharset() const
Definition: ratngs.h:277
unsigned length() const
Definition: ratngs.h:283
void print() const
Definition: ratngs.h:557
float rating() const
Definition: ratngs.h:308
std::string & unichar_string()
Definition: ratngs.h:515
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:93
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
Definition: ambigs.h:58
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:132
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:130
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:131
AmbigType type
Definition: ambigs.h:133
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:157
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:160
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:497
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
bool get_isngram(UNICHAR_ID unichar_id) const
Definition: unicharset.h:542
int step(const char *str) const
Definition: unicharset.cpp:211
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186
bool AcceptableResult(WERD_RES *word) const
Definition: stopper.cpp:111
int UniformCertainties(const WERD_CHOICE &word)
Definition: stopper.cpp:464
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:366
const UNICHARSET & getUnicharset() const
Definition: dict.h:104
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const
Returns the length of the shortest alpha run in WordChoice.
Definition: stopper.cpp:443
void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings)
Definition: stopper.cpp:370
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:437
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:801
bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency)
Returns true if the given best_choice is good enough to stop.
Definition: stopper.cpp:42
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
Definition: permdawg.cpp:159
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:111
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings)
Definition: stopper.cpp:158
void EndDangerousAmbigs()
Definition: stopper.cpp:358
int case_ok(const WERD_CHOICE &word) const
Check a string to see if it matches a set of lexical rules.
Definition: context.cpp:45
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:362