tesseract  5.0.0
makerow.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: makerow.h (Formerly makerows.h)
3  * Description: Code to arrange blobs into rows of text.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef MAKEROW_H
20 #define MAKEROW_H
21 
22 #include "blobbox.h"
23 #include "blobs.h"
24 #include "ocrblock.h"
25 #include "params.h"
26 #include "statistc.h"
27 
28 namespace tesseract {
29 
31  ASSIGN, // assign it to row
32  REJECT, // reject it - dual overlap
33  NEW_ROW
34 };
35 
41 };
42 
85 
86 inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) {
87  *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
88  if (*min_height < textord_min_xheight) {
89  *min_height = textord_min_xheight;
90  }
91  *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
92 }
93 
94 inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
95  if (row->xheight <= 0) {
96  return ROW_INVALID;
97  }
98  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND
99  : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
100 }
101 
102 inline bool within_error_margin(float test, float num, float margin) {
103  return (test >= num * (1 - margin) && test <= num * (1 + margin));
104 }
105 
106 void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights,
107  STATS *floating_heights);
108 
109 float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks);
110 float make_rows(ICOORD page_tr, // top right
111  TO_BLOCK_LIST *port_blocks);
112 void make_initial_textrows(ICOORD page_tr,
113  TO_BLOCK *block, // block to do
114  FCOORD rotation, // for drawing
115  bool testing_on); // correct orientation
116 void fit_lms_line(TO_ROW *row);
117 void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
118  float &page_m, // average gradient
119  float &page_err); // average error
120 void vigorous_noise_removal(TO_BLOCK *block);
121 void cleanup_rows_making(ICOORD page_tr, // top right
122  TO_BLOCK *block, // block to do
123  float gradient, // gradient to fit
124  FCOORD rotation, // for drawing
125  int32_t block_edge, // edge of block
126  bool testing_on); // correct orientation
127 void delete_non_dropout_rows( // find lines
128  TO_BLOCK *block, // block to do
129  float gradient, // global skew
130  FCOORD rotation, // deskew vector
131  int32_t block_edge, // left edge
132  bool testing_on // correct orientation
133 );
134 bool find_best_dropout_row( // find neighbours
135  TO_ROW *row, // row to test
136  int32_t distance, // dropout dist
137  float dist_limit, // threshold distance
138  int32_t line_index, // index of row
139  TO_ROW_IT *row_it, // current position
140  bool testing_on // correct orientation
141 );
142 TBOX deskew_block_coords( // block box
143  TO_BLOCK *block, // block to do
144  float gradient // global skew
145 );
146 void compute_line_occupation( // project blobs
147  TO_BLOCK *block, // block to do
148  float gradient, // global skew
149  int32_t min_y, // min coord in block
150  int32_t max_y, // in block
151  int32_t *occupation, // output projection
152  int32_t *deltas // derivative
153 );
154 void compute_occupation_threshold( // project blobs
155  int32_t low_window, // below result point
156  int32_t high_window, // above result point
157  int32_t line_count, // array sizes
158  int32_t *occupation, // input projection
159  int32_t *thresholds // output thresholds
160 );
161 void compute_dropout_distances( // project blobs
162  int32_t *occupation, // input projection
163  int32_t *thresholds, // output thresholds
164  int32_t line_count // array sizes
165 );
166 void expand_rows( // find lines
167  ICOORD page_tr, // top right
168  TO_BLOCK *block, // block to do
169  float gradient, // gradient to fit
170  FCOORD rotation, // for drawing
171  int32_t block_edge, // edge of block
172  bool testing_on // correct orientation
173 );
174 void adjust_row_limits( // tidy limits
175  TO_BLOCK *block // block to do
176 );
177 void compute_row_stats( // find lines
178  TO_BLOCK *block, // block to do
179  bool testing_on // correct orientation
180 );
181 float median_block_xheight( // find lines
182  TO_BLOCK *block, // block to do
183  float gradient // global skew
184 );
185 
186 int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only,
187  int min_height, int max_height, float *xheight, float *ascrise);
188 
189 int32_t compute_row_descdrop(TO_ROW *row, // row to do
190  float gradient, // global skew
191  int xheight_blob_count, STATS *heights);
192 int32_t compute_height_modes(STATS *heights, // stats to search
193  int32_t min_height, // bottom of range
194  int32_t max_height, // top of range
195  int32_t *modes, // output array
196  int32_t maxmodes); // size of modes
197 void correct_row_xheight(TO_ROW *row, // row to fix
198  float xheight, // average values
199  float ascrise, float descdrop);
200 void separate_underlines(TO_BLOCK *block, // block to do
201  float gradient, // skew angle
202  FCOORD rotation, // inverse landscape
203  bool testing_on); // correct orientation
204 void pre_associate_blobs(ICOORD page_tr, // top right
205  TO_BLOCK *block, // block to do
206  FCOORD rotation, // inverse landscape
207  bool testing_on); // correct orientation
208 void fit_parallel_rows(TO_BLOCK *block, // block to do
209  float gradient, // gradient to fit
210  FCOORD rotation, // for drawing
211  int32_t block_edge, // edge of block
212  bool testing_on); // correct orientation
213 void fit_parallel_lms(float gradient, // forced gradient
214  TO_ROW *row); // row to fit
215 void make_baseline_spline(TO_ROW *row, // row to fit
216  TO_BLOCK *block); // block it came from
217 bool segment_baseline( // split baseline
218  TO_ROW *row, // row to fit
219  TO_BLOCK *block, // block it came from
220  int32_t &segments, // no fo segments
221  int32_t *xstarts // coords of segments
222 );
223 double *linear_spline_baseline( // split baseline
224  TO_ROW *row, // row to fit
225  TO_BLOCK *block, // block it came from
226  int32_t &segments, // no fo segments
227  int32_t xstarts[] // coords of segments
228 );
229 void assign_blobs_to_rows( // find lines
230  TO_BLOCK *block, // block to do
231  float *gradient, // block skew
232  int pass, // identification
233  bool reject_misses, // chuck big ones out
234  bool make_new_rows, // add rows for unmatched
235  bool drawing_skew // draw smoothed skew
236 );
237 // find best row
238 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator
239  TO_ROW *&best_row, // output row
240  float top, // top of blob
241  float bottom, // bottom of blob
242  float rowsize, // max row size
243  bool testing_blob // test stuff
244 );
245 int blob_x_order( // sort function
246  const void *item1, // items to compare
247  const void *item2);
248 
249 void mark_repeated_chars(TO_ROW *row);
250 
251 } // namespace tesseract
252 
253 #endif
@ TBOX
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44
bool textord_old_baselines
Definition: makerow.cpp:55
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:976
bool textord_show_final_rows
Definition: makerow.cpp:50
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:612
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:86
double textord_xheight_error_margin
Definition: makerow.cpp:93
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2050
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1970
double textord_minxh
Definition: makerow.cpp:79
int textord_spline_minblobs
Definition: makerow.cpp:67
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1846
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:563
void compute_row_stats(TO_BLOCK *block, bool testing_on)
Definition: makerow.cpp:1163
double_VAR_H(classify_min_slope)
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
Definition: makerow.cpp:1629
int textord_min_xheight
Definition: makerow.cpp:70
double textord_chop_width
Definition: makerow.cpp:76
bool textord_show_initial_rows
Definition: makerow.cpp:47
bool textord_show_expanded_rows
Definition: makerow.cpp:49
int textord_test_y
Definition: makerow.cpp:65
double textord_ascheight_mode_fraction
Definition: makerow.cpp:87
int textord_test_x
Definition: makerow.cpp:64
double textord_spline_shift_fraction
Definition: makerow.cpp:71
double textord_descx_ratio_min
Definition: makerow.cpp:91
ROW_CATEGORY
Definition: makerow.h:36
@ ROW_ASCENDERS_FOUND
Definition: makerow.h:37
@ ROW_DESCENDERS_FOUND
Definition: makerow.h:38
@ ROW_INVALID
Definition: makerow.h:40
@ ROW_UNKNOWN
Definition: makerow.h:39
bool textord_debug_xheights
Definition: makerow.cpp:59
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1129
int textord_spline_medianwin
Definition: makerow.cpp:68
double textord_skew_lag
Definition: makerow.cpp:73
double textord_excess_blobsize
Definition: makerow.cpp:81
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2563
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2269
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1418
bool textord_parallel_baselines
Definition: makerow.cpp:53
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:254
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:296
double textord_min_blob_height_fraction
Definition: makerow.cpp:85
bool textord_show_parallel_rows
Definition: makerow.cpp:48
bool textord_fix_makerow_bug
Definition: makerow.cpp:58
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:508
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:315
double textord_occupancy_threshold
Definition: makerow.cpp:82
double textord_underline_width
Definition: makerow.cpp:83
double textord_skew_ile
Definition: makerow.cpp:72
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
Definition: makerow.cpp:2177
int textord_lms_line_trials
Definition: makerow.cpp:94
double textord_ascx_ratio_max
Definition: makerow.cpp:90
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:102
double textord_ascx_ratio_min
Definition: makerow.cpp:89
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:765
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:229
BOOL_VAR_H(wordrec_display_splits)
bool textord_heavy_nr
Definition: makerow.cpp:46
float median_block_xheight(TO_BLOCK *block, float gradient)
bool textord_test_landscape
Definition: makerow.cpp:52
bool textord_new_initial_xheight
Definition: makerow.cpp:95
INT_VAR_H(editor_image_xpos)
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1781
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1928
OVERLAP_STATE
Definition: makerow.h:30
@ ASSIGN
Definition: makerow.h:31
@ NEW_ROW
Definition: makerow.h:33
@ REJECT
Definition: makerow.h:32
double textord_linespace_iqrlimit
Definition: makerow.cpp:74
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1480
double textord_min_linesize
Definition: makerow.cpp:80
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1690
int textord_min_blobs_in_row
Definition: makerow.cpp:66
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2449
double textord_width_limit
Definition: makerow.cpp:75
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1576
double textord_xheight_mode_fraction
Definition: makerow.cpp:86
bool textord_fix_xheight_bug
Definition: makerow.cpp:57
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:94
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2540
bool textord_debug_blob
Definition: makerow.cpp:96
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
Definition: makerow.cpp:852
bool textord_show_final_blobs
Definition: makerow.cpp:51
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:190
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
Definition: makerow.cpp:933
bool textord_straight_baselines
Definition: makerow.cpp:54
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
Definition: makerow.cpp:799
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
Definition: makerow.cpp:2080
double textord_descx_ratio_max
Definition: makerow.cpp:92
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
Definition: makerow.cpp:696
bool textord_old_xheight
Definition: makerow.cpp:56
integer coordinate
Definition: points.h:36