tesseract  5.0.0
adaptions.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: adaptions.cpp (Formerly adaptions.c)
3  * Description: Functions used to adapt to blobs already confidently
4  * identified
5  * Author: Chris Newton
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <cctype>
21 #include <cstring>
22 #include "control.h"
23 #include "reject.h"
24 #include "stopper.h"
25 #include "tesseractclass.h"
26 #include "tessvars.h"
27 
28 // Include automatically generated configuration file if running autoconf.
29 #ifdef HAVE_CONFIG_H
30 # include "config_auto.h"
31 #endif
32 
33 namespace tesseract {
34 bool Tesseract::word_adaptable( // should we adapt?
35  WERD_RES *word, uint16_t mode) {
36  if (tessedit_adaption_debug) {
37  tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
38  word->best_choice->unichar_string().c_str(), word->best_choice->rating(),
39  word->best_choice->certainty());
40  }
41 
42  bool status = false;
43  std::bitset<16> flags(mode);
44 
45  enum MODES {
46  ADAPTABLE_WERD,
47  ACCEPTABLE_WERD,
48  CHECK_DAWGS,
49  CHECK_SPACES,
50  CHECK_ONE_ELL_CONFLICT,
51  CHECK_AMBIG_WERD
52  };
53 
54  /*
55 0: NO adaption
56 */
57  if (mode == 0) {
58  if (tessedit_adaption_debug) {
59  tprintf("adaption disabled\n");
60  }
61  return false;
62  }
63 
64  if (flags[ADAPTABLE_WERD]) {
65  status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
66  if (tessedit_adaption_debug && !status) {
67  tprintf("tess_would_adapt bit is false\n");
68  }
69  }
70 
71  if (flags[ACCEPTABLE_WERD]) {
72  status |= word->tess_accepted;
73  if (tessedit_adaption_debug && !status) {
74  tprintf("tess_accepted bit is false\n");
75  }
76  }
77 
78  if (!status) { // If not set then
79  return false; // ignore other checks
80  }
81 
82  if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&
83  (word->best_choice->permuter() != FREQ_DAWG_PERM) &&
84  (word->best_choice->permuter() != USER_DAWG_PERM) &&
85  (word->best_choice->permuter() != NUMBER_PERM)) {
86  if (tessedit_adaption_debug) {
87  tprintf("word not in dawgs\n");
88  }
89  return false;
90  }
91 
92  if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) {
93  if (tessedit_adaption_debug) {
94  tprintf("word has ell conflict\n");
95  }
96  return false;
97  }
98 
99  if (flags[CHECK_SPACES] &&
100  (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
101  if (tessedit_adaption_debug) {
102  tprintf("word contains spaces\n");
103  }
104  return false;
105  }
106 
107  if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) {
108  if (tessedit_adaption_debug) {
109  tprintf("word is ambiguous\n");
110  }
111  return false;
112  }
113 
114  if (tessedit_adaption_debug) {
115  tprintf("returning status %d\n", status);
116  }
117  return status;
118 }
119 
120 } // namespace tesseract
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:240
@ NUMBER_PERM
Definition: ratngs.h:238
@ USER_DAWG_PERM
Definition: ratngs.h:242
@ FREQ_DAWG_PERM
Definition: ratngs.h:243
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
Definition: reject.cpp:287
bool word_adaptable(WERD_RES *word, uint16_t mode)
Definition: adaptions.cpp:34
WERD_CHOICE * best_choice
Definition: pageres.h:239
float certainty() const
Definition: ratngs.h:311
uint8_t permuter() const
Definition: ratngs.h:327
bool dangerous_ambig_found() const
Definition: ratngs.h:344
float rating() const
Definition: ratngs.h:308
std::string & unichar_string()
Definition: ratngs.h:515