tesseract  5.0.0
lm_consistency.h
Go to the documentation of this file.
1 // File: lm_consistency.h
3 // Description: Struct for recording consistency of the paths representing
4 // OCR hypotheses.
5 // Author: Rika Antonova
6 //
7 // (C) Copyright 2012, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_WORDREC_LM_CONSISTENCY_H_
21 #define TESSERACT_WORDREC_LM_CONSISTENCY_H_
22 
23 #include <cstdint> // for INT16_MAX
24 #include "dawg.h" // for EDGE_REF, NO_EDGE
25 #include "dict.h" // for XH_GOOD, XH_INCONSISTENT, XHeightConsi...
26 
27 class BLOB_CHOICE;
28 
29 namespace tesseract {
30 
31 static const char *const XHeightConsistencyEnumName[] = {
32  "XH_GOOD",
33  "XH_SUBNORMAL",
34  "XH_INCONSISTENT",
35 };
36 
37 // Struct for keeping track of the consistency of the path.
40 
41  // How much do characters have to be shifted away from normal parameters
42  // before we say they're not normal?
43  static const int kShiftThresh = 1;
44 
45  // How much shifting from subscript to superscript and back
46  // before we declare shenanigans?
47  static const int kMaxEntropy = 1;
48 
49  // Script positions - order important for entropy calculation.
50  static const int kSUB = 0, kNORM = 1, kSUP = 2;
51  static const int kNumPos = 3;
52 
53  explicit LMConsistencyInfo(const LMConsistencyInfo *parent_info) {
54  if (parent_info == nullptr) {
55  // Initialize from scratch.
56  num_alphas = 0;
57  num_digits = 0;
58  num_punc = 0;
59  num_other = 0;
60  chartype = CT_NONE;
61  punc_ref = NO_EDGE;
62  invalid_punc = false;
64  num_lower = 0;
65  script_id = 0;
66  inconsistent_script = false;
68  inconsistent_font = false;
69  // Initialize XHeight stats.
70  for (int i = 0; i < kNumPos; i++) {
71  xht_count[i] = 0;
72  xht_count_punc[i] = 0;
73  xht_lo[i] = 0;
74  xht_hi[i] = 256; // kBlnCellHeight
75  }
76  xht_sp = -1; // This invalid value indicates that there was no parent.
77  xpos_entropy = 0;
79  } else {
80  // Copy parent info
81  *this = *parent_info;
82  }
83  }
84  inline int NumInconsistentPunc() const {
85  return invalid_punc ? num_punc : 0;
86  }
87  inline int NumInconsistentCase() const {
89  }
90  inline int NumInconsistentChartype() const {
91  return (NumInconsistentPunc() + num_other +
93  }
94  inline bool Consistent() const {
95  return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 &&
98  }
99  inline int NumInconsistentSpaces() const {
101  }
102  inline int InconsistentXHeight() const {
103  return xht_decision == XH_INCONSISTENT;
104  }
105  void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc);
106  float BodyMinXHeight() const {
107  if (InconsistentXHeight()) {
108  return 0.0f;
109  }
110  return xht_lo[kNORM];
111  }
112  float BodyMaxXHeight() const {
113  if (InconsistentXHeight()) {
114  return static_cast<float>(INT16_MAX);
115  }
116  return xht_hi[kNORM];
117  }
118 
122  int num_punc;
130  // Metrics clumped by position.
131  float xht_lo[kNumPos];
132  float xht_hi[kNumPos];
133  int16_t xht_count[kNumPos];
135  int16_t xht_sp;
136  int16_t xpos_entropy;
140 };
141 
142 } // namespace tesseract
143 
144 #endif // TESSERACT_WORDREC_LM_CONSISTENCY_H_
int64_t EDGE_REF
Definition: dawg.h:49
XHeightConsistencyEnum
Definition: dict.h:81
@ XH_GOOD
Definition: dict.h:81
@ XH_INCONSISTENT
Definition: dict.h:81
LMConsistencyInfo(const LMConsistencyInfo *parent_info)
void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc)
static const int kShiftThresh
int16_t xht_count_punc[kNumPos]
XHeightConsistencyEnum xht_decision
static const int kMaxEntropy