tesseract  5.0.0
wordrec.cpp
Go to the documentation of this file.
1 // File: wordrec.cpp
3 // Description: wordrec class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "wordrec.h"
20 
21 #include <memory>
22 
23 #ifdef DISABLED_LEGACY_ENGINE
24 
25 # include "params.h"
26 
27 namespace tesseract {
29  : // control parameters
30 
31  BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params())
32  ,
33 
34  BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params()) {
35  prev_word_best_choice_ = nullptr;
36 }
37 
38 } // namespace tesseract
39 
40 #else // DISABLED_LEGACY_ENGINE not defined
41 
42 # include "language_model.h"
43 # include "params.h"
44 
45 namespace tesseract {
47  : // control parameters
48  BOOL_MEMBER(merge_fragments_in_matrix, true,
49  "Merge the fragments in the ratings matrix and delete them"
50  " after merging",
51  params())
52  , BOOL_MEMBER(wordrec_enable_assoc, true, "Associator Enable", params())
53  , BOOL_MEMBER(force_word_assoc, false,
54  "force associator to run regardless of what enable_assoc is."
55  " This is used for CJK where component grouping is necessary.",
56  CCUtil::params())
57  , INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", params())
58  , double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", params())
59  , INT_MEMBER(chop_debug, 0, "Chop debug", params())
60  , BOOL_MEMBER(chop_enable, 1, "Chop enable", params())
61  , BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params())
62  , INT_MEMBER(chop_split_length, 10000, "Split Length", params())
63  , INT_MEMBER(chop_same_distance, 2, "Same distance", params())
64  , INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", params())
65  , INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", params())
66  , BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params())
67  , INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params())
68  , INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params())
69  , double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", params())
70  , double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", params())
71  , double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", params())
72  , INT_MEMBER(chop_centered_maxwidth, 90,
73  "Width of (smaller) chopped blobs "
74  "above which we don't care that a chop is not near the center.",
75  params())
76  , double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", params())
77  , double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", params())
78  , double_MEMBER(chop_ok_split, 100.0, "OK split limit", params())
79  , double_MEMBER(chop_good_split, 50.0, "Good split limit", params())
80  , INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params())
81  , BOOL_MEMBER(assume_fixed_pitch_char_segment, false,
82  "include fixed-pitch heuristics in char segmentation", params())
83  , INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params())
84  , INT_MEMBER(wordrec_max_join_chunks, 4, "Max number of broken pieces to associate", params())
85  , BOOL_MEMBER(wordrec_skip_no_truth_words, false,
86  "Only run OCR for words that had truth recorded in BlamerBundle", params())
87  , BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params())
88  , BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params())
89  , INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params())
90  , INT_MEMBER(segsearch_max_pain_points, 2000,
91  "Maximum number of pain points stored in the queue", params())
92  , INT_MEMBER(segsearch_max_futile_classifications, 20,
93  "Maximum number of pain point classifications per chunk that"
94  " did not result in finding a better word choice.",
95  params())
96  , double_MEMBER(segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio",
97  params())
98  , BOOL_MEMBER(save_alt_choices, true,
99  "Save alternative paths found during chopping"
100  " and segmentation search",
101  params())
102  , pass2_ok_split(0.0f) {
103  prev_word_best_choice_ = nullptr;
104  language_model_ = std::make_unique<LanguageModel>(&get_fontinfo_table(), &(getDict()));
105  fill_lattice_ = nullptr;
106 }
107 
108 } // namespace tesseract
109 
110 #endif // DISABLED_LEGACY_ENGINE
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:368
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:374
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:370
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324
virtual Dict & getDict()
Definition: classify.h:98
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:390
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:387
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:382