tesseract  5.0.0
rejctmap.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.cpp (Formerly rejmap.c)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  *
6  * (C) Copyright 1994, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "rejctmap.h"
20 
21 #include <memory>
22 
23 #include "params.h"
24 
25 namespace tesseract {
26 
27 void REJ::full_print(FILE *fp) const {
28  fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F");
29  fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F");
30  fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F");
31  fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F");
32  fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F");
33  fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F");
34  fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F");
35  fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F");
36  fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F");
37  fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n",
38  flag(R_NOT_TESS_ACCEPTED) ? "T" : "F");
39  fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F");
40  fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F");
41  fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F");
42  fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F");
43  fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F");
44  fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F");
45  fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F");
46  fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F");
47  fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F");
48  fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F");
49  fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F");
50  fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F");
51  fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F");
52  fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F");
53  fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F");
54  fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F");
55  fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
56  flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
57 }
58 
59 REJMAP &REJMAP::operator=(const REJMAP &source) {
60  initialise(source.len);
61  for (unsigned i = 0; i < len; i++) {
62  ptr[i] = source.ptr[i];
63  }
64  return *this;
65 }
66 
67 void REJMAP::initialise(uint16_t length) {
68  ptr = std::make_unique<REJ[]>(length);
69  len = length;
70 }
71 
72 int16_t REJMAP::accept_count() const { // How many accepted?
73  int16_t count = 0;
74  for (unsigned i = 0; i < len; i++) {
75  if (ptr[i].accepted()) {
76  count++;
77  }
78  }
79  return count;
80 }
81 
82 bool REJMAP::recoverable_rejects() const { // Any non perm rejs?
83  for (unsigned i = 0; i < len; i++) {
84  if (ptr[i].recoverable()) {
85  return true;
86  }
87  }
88  return false;
89 }
90 
91 bool REJMAP::quality_recoverable_rejects() const { // Any potential rejs?
92  for (unsigned i = 0; i < len; i++) {
93  if (ptr[i].accept_if_good_quality()) {
94  return true;
95  }
96  }
97  return false;
98 }
99 
100 void REJMAP::remove_pos( // Cut out an element
101  uint16_t pos // element to remove
102 ) {
103  ASSERT_HOST(pos < len);
104  ASSERT_HOST(len > 0);
105 
106  len--;
107  for (; pos < len; pos++) {
108  ptr[pos] = ptr[pos + 1];
109  }
110 }
111 
112 void REJMAP::print(FILE *fp) const {
113  fputc('"', fp);
114  for (unsigned i = 0; i < len; i++) {
115  fputc( ptr[i].display_char(), fp);
116  }
117  fputc('"', fp);
118 }
119 
120 void REJMAP::full_print(FILE *fp) const {
121  for (unsigned i = 0; i < len; i++) {
122  ptr[i].full_print(fp);
123  fprintf(fp, "\n");
124  }
125 }
126 
127 void REJMAP::rej_word_small_xht() { // Reject whole word
128  for (unsigned i = 0; i < len; i++) {
129  ptr[i].setrej_small_xht();
130  }
131 }
132 
133 void REJMAP::rej_word_tess_failure() { // Reject whole word
134  for (unsigned i = 0; i < len; i++) {
135  ptr[i].setrej_tess_failure();
136  }
137 }
138 
139 void REJMAP::rej_word_not_tess_accepted() { // Reject whole word
140  for (unsigned i = 0; i < len; i++) {
141  if (ptr[i].accepted()) {
142  ptr[i].setrej_not_tess_accepted();
143  }
144  }
145 }
146 
147 void REJMAP::rej_word_contains_blanks() { // Reject whole word
148  for (unsigned i = 0; i < len; i++) {
149  if (ptr[i].accepted()) {
150  ptr[i].setrej_contains_blanks();
151  }
152  }
153 }
154 
155 void REJMAP::rej_word_bad_permuter() { // Reject whole word
156  for (unsigned i = 0; i < len; i++) {
157  if (ptr[i].accepted()) {
158  ptr[i].setrej_bad_permuter();
159  }
160  }
161 }
162 
163 void REJMAP::rej_word_xht_fixup() { // Reject whole word
164  for (unsigned i = 0; i < len; i++) {
165  if (ptr[i].accepted()) {
166  ptr[i].setrej_xht_fixup();
167  }
168  }
169 }
170 
171 void REJMAP::rej_word_no_alphanums() { // Reject whole word
172  for (unsigned i = 0; i < len; i++) {
173  if (ptr[i].accepted()) {
174  ptr[i].setrej_no_alphanums();
175  }
176  }
177 }
178 
179 void REJMAP::rej_word_mostly_rej() { // Reject whole word
180  for (unsigned i = 0; i < len; i++) {
181  if (ptr[i].accepted()) {
182  ptr[i].setrej_mostly_rej();
183  }
184  }
185 }
186 
187 void REJMAP::rej_word_bad_quality() { // Reject whole word
188  for (unsigned i = 0; i < len; i++) {
189  if (ptr[i].accepted()) {
190  ptr[i].setrej_bad_quality();
191  }
192  }
193 }
194 
195 void REJMAP::rej_word_doc_rej() { // Reject whole word
196  for (unsigned i = 0; i < len; i++) {
197  if (ptr[i].accepted()) {
198  ptr[i].setrej_doc_rej();
199  }
200  }
201 }
202 
203 void REJMAP::rej_word_block_rej() { // Reject whole word
204  for (unsigned i = 0; i < len; i++) {
205  if (ptr[i].accepted()) {
206  ptr[i].setrej_block_rej();
207  }
208  }
209 }
210 
211 void REJMAP::rej_word_row_rej() { // Reject whole word
212  for (unsigned i = 0; i < len; i++) {
213  if (ptr[i].accepted()) {
214  ptr[i].setrej_row_rej();
215  }
216  }
217 }
218 
219 } // namespace tesseract
#define ASSERT_HOST(x)
Definition: errcode.h:59
@ R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:89
@ R_ROW_REJ
Definition: rejctmap.h:81
@ R_NO_ALPHANUMS
Definition: rejctmap.h:71
@ R_TESS_FAILURE
Definition: rejctmap.h:53
@ R_QUALITY_ACCEPT
Definition: rejctmap.h:88
@ R_DOC_REJ
Definition: rejctmap.h:79
@ R_MM_ACCEPT
Definition: rejctmap.h:87
@ R_MOSTLY_REJ
Definition: rejctmap.h:72
@ R_XHT_FIXUP
Definition: rejctmap.h:73
@ R_POOR_MATCH
Definition: rejctmap.h:63
@ R_SMALL_XHT
Definition: rejctmap.h:54
@ R_BAD_PERMUTER
Definition: rejctmap.h:66
@ R_BAD_REPETITION
Definition: rejctmap.h:60
@ R_BLOCK_REJ
Definition: rejctmap.h:80
@ R_HYPHEN_ACCEPT
Definition: rejctmap.h:86
@ R_HYPHEN
Definition: rejctmap.h:69
@ R_CONTAINS_BLANKS
Definition: rejctmap.h:65
@ R_POSTNN_1IL
Definition: rejctmap.h:57
@ R_REJ_CBLOB
Definition: rejctmap.h:58
@ R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:64
@ R_BAD_QUALITY
Definition: rejctmap.h:76
@ R_UNLV_REJ
Definition: rejctmap.h:82
@ R_NN_ACCEPT
Definition: rejctmap.h:85
@ R_DUBIOUS
Definition: rejctmap.h:70
@ R_MM_REJECT
Definition: rejctmap.h:59
@ R_1IL_CONFLICT
Definition: rejctmap.h:56
@ R_EDGE_CHAR
Definition: rejctmap.h:55
bool flag(REJ_FLAGS rej_flag) const
Definition: rejctmap.h:117
void full_print(FILE *fp) const
Definition: rejctmap.cpp:27
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:139
void print(FILE *fp) const
Definition: rejctmap.cpp:112
void rej_word_tess_failure()
Definition: rejctmap.cpp:133
void rej_word_contains_blanks()
Definition: rejctmap.cpp:147
void rej_word_small_xht()
Definition: rejctmap.cpp:127
void rej_word_bad_quality()
Definition: rejctmap.cpp:187
void rej_word_xht_fixup()
Definition: rejctmap.cpp:163
void rej_word_row_rej()
Definition: rejctmap.cpp:211
void remove_pos(uint16_t pos)
Definition: rejctmap.cpp:100
int16_t accept_count() const
Definition: rejctmap.cpp:72
uint16_t length() const
Definition: rejctmap.h:333
void rej_word_block_rej()
Definition: rejctmap.cpp:203
bool quality_recoverable_rejects() const
Definition: rejctmap.cpp:91
void initialise(uint16_t length)
Definition: rejctmap.cpp:67
void rej_word_bad_permuter()
Definition: rejctmap.cpp:155
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:59
void rej_word_no_alphanums()
Definition: rejctmap.cpp:171
void rej_word_doc_rej()
Definition: rejctmap.cpp:195
bool recoverable_rejects() const
Definition: rejctmap.cpp:82
void full_print(FILE *fp) const
Definition: rejctmap.cpp:120
void rej_word_mostly_rej()
Definition: rejctmap.cpp:179