tesseract  5.0.0
textord.cpp
Go to the documentation of this file.
1 // File: textord.cpp
3 // Description: The top-level text line and word finding functionality.
4 // Author: Ray Smith
5 // Created: Fri Mar 13 14:43:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 # include "config_auto.h"
23 #endif
24 
25 #include "baselinedetect.h"
26 #include "drawtord.h"
27 #include "makerow.h"
28 #include "pageres.h"
29 #include "textord.h"
30 #include "tordmain.h"
31 #include "wordseg.h"
32 
33 namespace tesseract {
34 
36  : ccstruct_(ccstruct)
37  , use_cjk_fp_model_(false)
38  ,
39  // makerow.cpp ///////////////////////////////////////////
40  BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode",
41  ccstruct_->params())
42  ,
43  // tospace.cpp ///////////////////////////////////////////
44  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", ccstruct_->params())
45  , BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46  "Constrain relative values of inter and intra-word gaps for "
47  "old_to_method.",
48  ccstruct_->params())
49  , BOOL_MEMBER(tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?",
50  ccstruct_->params())
51  , BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params())
55  , BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params())
56  , BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params())
57  , BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
58  , BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
59  , BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params())
60  , BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params())
61  , BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
62  "Use row alone when inadequate cert spaces", ccstruct_->params())
63  , BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", ccstruct_->params())
64  , BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", ccstruct_->params())
65  , BOOL_MEMBER(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables",
66  ccstruct_->params())
67  , BOOL_MEMBER(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks",
68  ccstruct_->params())
69  , BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params())
70  , BOOL_MEMBER(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks",
71  ccstruct_->params())
72  , BOOL_MEMBER(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct",
73  ccstruct_->params())
74  , BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params())
75  , BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params())
76  , BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params())
77  , INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params())
78  , INT_MEMBER(tosp_enough_space_samples_for_median, 3, "or should we use mean",
79  ccstruct_->params())
80  , INT_MEMBER(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row",
81  ccstruct_->params())
82  , INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table",
83  ccstruct_->params())
84  , INT_MEMBER(tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs",
85  ccstruct_->params())
86  , INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", ccstruct_->params())
87  , double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
88  "Factor for defining space threshold in terms of space and "
89  "kern sizes",
90  ccstruct_->params())
91  , double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params())
92  , double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params())
93  , double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params())
94  , double_MEMBER(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this",
95  ccstruct_->params())
96  , double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params())
97  , double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params())
98  , double_MEMBER(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp",
99  ccstruct_->params())
100  , double_MEMBER(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp",
101  ccstruct_->params())
102  , double_MEMBER(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp",
103  ccstruct_->params())
104  , double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params())
105  , double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", ccstruct_->params())
106  , double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", ccstruct_->params())
107  , double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", ccstruct_->params())
108  , double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", ccstruct_->params())
109  , double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", ccstruct_->params())
110  , double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", ccstruct_->params())
111  , double_MEMBER(tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats",
112  ccstruct_->params())
113  , double_MEMBER(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table",
114  ccstruct_->params())
115  , double_MEMBER(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this",
116  ccstruct_->params())
117  , double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this",
118  ccstruct_->params())
119  , double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params())
120  , double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", ccstruct_->params())
121  , double_MEMBER(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn",
122  ccstruct_->params())
123  , double_MEMBER(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this",
124  ccstruct_->params())
125  , double_MEMBER(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this",
126  ccstruct_->params())
127  , double_MEMBER(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh",
128  ccstruct_->params())
129  , double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation",
130  ccstruct_->params())
131  , double_MEMBER(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns",
132  ccstruct_->params())
133  , double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns",
134  ccstruct_->params())
135  , double_MEMBER(tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank",
136  ccstruct_->params())
137  , double_MEMBER(tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small",
138  ccstruct_->params())
139  , double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context",
140  ccstruct_->params())
141  ,
142  // tordmain.cpp ///////////////////////////////////////////
143  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params())
144  , BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", ccstruct_->params())
145  , BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params())
146  , INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", ccstruct_->params())
147  , INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", ccstruct_->params())
148  , double_MEMBER(textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise",
149  ccstruct_->params())
150  , double_MEMBER(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess",
151  ccstruct_->params())
152  , double_MEMBER(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess",
153  ccstruct_->params())
154  , INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params())
155  , double_MEMBER(textord_noise_sizelimit, 0.5, "Fraction of x for big t count",
156  ccstruct_->params())
157  , INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params())
158  , double_MEMBER(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion",
159  ccstruct_->params())
160  , BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params())
161  , BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", ccstruct_->params())
162  , double_MEMBER(textord_noise_syfract, 0.2, "xh fract height error for norm blobs",
163  ccstruct_->params())
164  , double_MEMBER(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs",
165  ccstruct_->params())
166  , double_MEMBER(textord_noise_hfract, 1.0 / 64,
167  "Height fraction to discard outlines as speckle noise", ccstruct_->params())
168  , INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params())
169  , double_MEMBER(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion",
170  ccstruct_->params())
171  , BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params())
172  , double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params())
173  , double_MEMBER(textord_blshift_xfraction, 9.99, "Min size of baseline shift",
174  ccstruct_->params()) {}
175 
176 // Make the textlines and words inside each block.
177 void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
178  Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
179  BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
180  TO_BLOCK_LIST *to_blocks) {
181  page_tr_.set_x(width);
182  page_tr_.set_y(height);
183  if (to_blocks->empty()) {
184  // AutoPageSeg was not used, so we need to find_components first.
185  find_components(binary_pix, blocks, to_blocks);
186  TO_BLOCK_IT it(to_blocks);
187  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
188  TO_BLOCK *to_block = it.data();
189  // Compute the edge offsets whether or not there is a grey_pix.
190  // We have by-passed auto page seg, so we have to run it here.
191  // By page segmentation mode there is no non-text to avoid running on.
192  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
193  }
194  } else if (!PSM_SPARSE(pageseg_mode)) {
195  // AutoPageSeg does not need to find_components as it did that already.
196  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
197  filter_blobs(page_tr_, to_blocks, true);
198  }
199 
200  ASSERT_HOST(!to_blocks->empty());
201  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
202  const FCOORD anticlockwise90(0.0f, 1.0f);
203  const FCOORD clockwise90(0.0f, -1.0f);
204  TO_BLOCK_IT it(to_blocks);
205  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
206  TO_BLOCK *to_block = it.data();
207  BLOCK *block = to_block->block;
208  // Create a fake poly_block in block from its bounding box.
210  // Rotate the to_block along with its contained block and blobnbox lists.
211  to_block->rotate(anticlockwise90);
212  // Set the block's rotation values to obey the convention followed in
213  // layout analysis for vertical text.
214  block->set_re_rotation(clockwise90);
215  block->set_classify_rotation(clockwise90);
216  }
217  }
218 
219  TO_BLOCK_IT to_block_it(to_blocks);
220  TO_BLOCK *to_block = to_block_it.data();
221  // Make the rows in the block.
222  float gradient;
223  // Do it the old fashioned way.
224  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
225  gradient = make_rows(page_tr_, to_blocks);
226  } else if (!PSM_SPARSE(pageseg_mode)) {
227  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
228  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
229  } else {
230  gradient = 0.0f;
231  }
232  BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
233  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
234  baseline_detector.ComputeBaselineSplinesAndXheights(
235  page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, textord_show_final_rows, this);
236  // Now make the words in the lines.
237  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
238  // SINGLE_LINE uses the old word maker on the single line.
239  make_words(this, page_tr_, gradient, blocks, to_blocks);
240  } else {
241  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
242  // single word, and in SINGLE_CHAR mode, all the outlines
243  // go in a single blob.
244  TO_BLOCK *to_block = to_block_it.data();
245  make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(),
246  to_block->block->row_list());
247  }
248  // Remove empties.
249  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
250  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
251  // Compute the margins for each row in the block, to be used later for
252  // paragraph detection.
253  BLOCK_IT b_it(blocks);
254  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
255  b_it.data()->compute_row_margins();
256  }
257 #ifndef GRAPHICS_DISABLED
258  close_to_win();
259 #endif
260 }
261 
262 // If we were supposed to return only a single textline, and there is more
263 // than one, clean up and leave only the best.
264 void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res) {
265  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) {
266  return; // No cleanup required.
267  }
268  PAGE_RES_IT it(page_res);
269  // Find the best row, being the greatest mean word conf.
270  float row_total_conf = 0.0f;
271  int row_word_count = 0;
272  ROW_RES *best_row = nullptr;
273  float best_conf = 0.0f;
274  for (it.restart_page(); it.word() != nullptr; it.forward()) {
275  WERD_RES *word = it.word();
276  row_total_conf += word->best_choice->certainty();
277  ++row_word_count;
278  if (it.next_row() != it.row()) {
279  row_total_conf /= row_word_count;
280  if (best_row == nullptr || best_conf < row_total_conf) {
281  best_row = it.row();
282  best_conf = row_total_conf;
283  }
284  row_total_conf = 0.0f;
285  row_word_count = 0;
286  }
287  }
288  // Now eliminate any word not in the best row.
289  for (it.restart_page(); it.word() != nullptr; it.forward()) {
290  if (it.row() != best_row) {
291  it.DeleteCurrentWord();
292  }
293  }
294 }
295 
296 } // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:59
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:368
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:374
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:370
bool textord_show_final_rows
Definition: makerow.cpp:50
@ PSM_SINGLE_BLOCK_VERT_TEXT
Definition: publictypes.h:166
@ PSM_SINGLE_CHAR
Treat the image as a single character.
Definition: publictypes.h:172
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:203
void close_to_win()
Definition: drawtord.cpp:56
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:229
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:197
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:99
bool textord_heavy_nr
Definition: makerow.cpp:46
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:53
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:190
@ PT_VERTICAL_TEXT
Definition: publictypes.h:61
void rotate(const FCOORD &rotation)
Definition: blobbox.h:715
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
void ComputeEdgeOffsets(Image thresholds, Image grey)
Definition: blobbox.cpp:1042
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:138
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:111
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:132
WERD_CHOICE * best_choice
Definition: pageres.h:239
WERD_RES * restart_page()
Definition: pageres.h:710
WERD_RES * forward()
Definition: pageres.h:743
ROW_RES * row() const
Definition: pageres.h:766
WERD_RES * word() const
Definition: pageres.h:763
ROW_RES * next_row() const
Definition: pageres.h:775
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:63
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
void set_y(TDimension yin)
rewrite function
Definition: points.h:71
float certainty() const
Definition: ratngs.h:311
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: textord.cpp:177
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:238
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
Definition: textord.cpp:264
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:211
Textord(CCStruct *ccstruct)
Definition: textord.cpp:35