tesseract  5.0.0
classify.cpp
Go to the documentation of this file.
1 // File: classify.cpp
3 // Description: classify class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "classify.h"
20 
21 #ifdef DISABLED_LEGACY_ENGINE
22 
23 # include <string.h>
24 
25 namespace tesseract {
26 
28  : INT_MEMBER(classify_debug_level, 0, "Classify debug level", this->params())
29  ,
30 
31  BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].", this->params())
32  ,
33 
34  double_MEMBER(classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings",
35  this->params())
36  ,
37 
38  double_MEMBER(classify_max_certainty_margin, 5.5,
39  "Veto difference between classifier certainties", this->params())
40  ,
41 
42  dict_(this) {}
43 
44 Classify::~Classify() {}
45 
46 } // namespace tesseract
47 
48 #else // DISABLED_LEGACY_ENGINE not defined
49 
50 # include <cstring>
51 # include "fontinfo.h"
52 # include "intproto.h"
53 # include "mfoutline.h"
54 # include "scrollview.h"
55 # include "shapeclassifier.h"
56 # include "shapetable.h"
57 # include "unicity_table.h"
58 
59 namespace tesseract {
61  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping", this->params())
62  , BOOL_MEMBER(prioritize_division, false, "Prioritize blob division over chopping",
63  this->params())
64  , BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier", this->params())
65  , INT_MEMBER(classify_debug_level, 0, "Classify debug level", this->params())
66  , INT_MEMBER(classify_norm_method, character, "Normalization Method ...", this->params())
67  , double_MEMBER(classify_char_norm_range, 0.2, "Character Normalization Range ...",
68  this->params())
69  , double_MEMBER(classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings",
70  this->params())
71  , double_MEMBER(classify_max_certainty_margin, 5.5,
72  "Veto difference between classifier certainties", this->params())
73  , BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", this->params())
74  , BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", this->params())
75  , BOOL_MEMBER(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier", this->params())
76  , BOOL_MEMBER(classify_use_pre_adapted_templates, 0, "Use pre-adapted classifier templates",
77  this->params())
78  , BOOL_MEMBER(classify_save_adapted_templates, 0, "Save adapted templates to a file",
79  this->params())
80  , BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", this->params())
81  , BOOL_MEMBER(classify_nonlinear_norm, 0, "Non-linear stroke-density normalization",
82  this->params())
83  , INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params())
84  , INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params())
85  , INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", this->params())
86  , double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", this->params())
87  , double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", this->params())
88  , double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", this->params())
89  , double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", this->params())
90  , double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", this->params())
91  , double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", this->params())
92  , INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", this->params())
93  , INT_MEMBER(matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold",
94  this->params())
95  , INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5,
96  "Enable adaption even if the ambiguities have not been seen", this->params())
97  , double_MEMBER(matcher_clustering_max_angle_delta, 0.015,
98  "Maximum angle delta for prototype clustering", this->params())
99  , double_MEMBER(classify_misfit_junk_penalty, 0.0,
100  "Penalty to apply when a non-alnum is vertically out of "
101  "its expected textline position",
102  this->params())
103  , double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params())
104  , double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", this->params())
105  , double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used",
106  this->params())
107  , double_MEMBER(classify_adapted_pruning_factor, 2.5,
108  "Prune poor adapted results this much worse than best result", this->params())
109  , double_MEMBER(classify_adapted_pruning_threshold, -1.0,
110  "Threshold at which classify_adapted_pruning_factor starts", this->params())
111  , INT_MEMBER(classify_adapt_proto_threshold, 230,
112  "Threshold for good protos during adaptive 0-255", this->params())
113  , INT_MEMBER(classify_adapt_feature_threshold, 230,
114  "Threshold for good features during adaptive 0-255", this->params())
115  , BOOL_MEMBER(disable_character_fragments, true,
116  "Do not include character fragments in the"
117  " results of the classifier",
118  this->params())
119  , double_MEMBER(classify_character_fragments_garbage_certainty_threshold, -3.0,
120  "Exclude fragments that do not look like whole"
121  " characters from training and adaption",
122  this->params())
123  , BOOL_MEMBER(classify_debug_character_fragments, false,
124  "Bring up graphical debugging windows for fragments training", this->params())
125  , BOOL_MEMBER(matcher_debug_separate_windows, false,
126  "Use two different windows for debugging the matching: "
127  "One for the protos and one for the features.",
128  this->params())
129  , STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", this->params())
130  , INT_MEMBER(classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255",
131  this->params())
132  , INT_MEMBER(classify_class_pruner_multiplier, 15,
133  "Class Pruner Multiplier 0-255: ", this->params())
134  , INT_MEMBER(classify_cp_cutoff_strength, 7,
135  "Class Pruner CutoffStrength: ", this->params())
136  , INT_MEMBER(classify_integer_matcher_multiplier, 10,
137  "Integer Matcher Multiplier 0-255: ", this->params())
138  , BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].",
139  this->params())
140  , double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", this->params())
141  , double_MEMBER(speckle_rating_penalty, 10.0, "Penalty to add to worst rating for noise",
142  this->params())
143  , im_(&classify_debug_level)
144  , dict_(this) {
145  using namespace std::placeholders; // for _1, _2
146  fontinfo_table_.set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
147 
149 }
150 
153 #ifndef GRAPHICS_DISABLED
154  delete learn_debug_win_;
155  delete learn_fragmented_word_debug_win_;
156  delete learn_fragments_debug_win_;
157 #endif
158 }
159 
160 // Takes ownership of the given classifier, and uses it for future calls
161 // to CharNormClassifier.
163  delete static_classifier_;
164  static_classifier_ = static_classifier;
165 }
166 
167 // Moved from speckle.cpp
168 // Adds a noise classification result that is a bit worse than the worst
169 // current result, or the worst possible result if no current results.
170 void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
171  BLOB_CHOICE_IT bc_it(choices);
172  // If there is no classifier result, we will use the worst possible certainty
173  // and corresponding rating.
174  float certainty = -getDict().certainty_scale;
175  float rating = rating_scale * blob_length;
176  if (!choices->empty() && blob_length > 0) {
177  bc_it.move_to_last();
178  BLOB_CHOICE *worst_choice = bc_it.data();
179  // Add speckle_rating_penalty to worst rating, matching old value.
180  rating = worst_choice->rating() + speckle_rating_penalty;
181  // Compute the rating to correspond to the certainty. (Used to be kept
182  // the same, but that messes up the language model search.)
183  certainty = -rating * getDict().certainty_scale / (rating_scale * blob_length);
184  }
185  auto *blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, -1, 0.0f, FLT_MAX, 0,
187  bc_it.add_to_end(blob_choice);
188 }
189 
190 // Returns true if the blob is small enough to be a large speckle.
191 bool Classify::LargeSpeckle(const TBLOB &blob) {
192  double speckle_size = kBlnXHeight * speckle_large_max_size;
193  TBOX bbox = blob.bounding_box();
194  return bbox.width() < speckle_size && bbox.height() < speckle_size;
195 }
196 
197 } // namespace tesseract
198 
199 #endif // def DISABLED_LEGACY_ENGINE
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:368
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:374
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:372
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:370
#define classify_enable_adaptive_matcher
Definition: adaptmatch.cpp:78
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:129
@ character
Definition: mfoutline.h:53
const int kBlnXHeight
Definition: normalis.h:33
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:87
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:51
TBOX bounding_box() const
Definition: blobs.cpp:466
float rating() const
Definition: ratngs.h:84
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
~Classify() override
Definition: classify.cpp:151
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:191
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:447
virtual Dict & getDict()
Definition: classify.h:98
void SetStaticClassifier(ShapeClassifier *static_classifier)
Definition: classify.cpp:162
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:464
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:435
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:170