tesseract  5.0.0
underlin.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: underlin.cpp (Formerly undrline.c)
3  * Description: Code to chop blobs apart from underlines.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1994, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "underlin.h"
20 
21 namespace tesseract {
22 
23 double_VAR(textord_underline_offset, 0.1, "Fraction of x to ignore");
24 BOOL_VAR(textord_restore_underlines, true, "Chop underlines & put back");
25 
26 /**********************************************************************
27  * restore_underlined_blobs
28  *
29  * Find underlined blobs and put them back in the row.
30  **********************************************************************/
31 
32 void restore_underlined_blobs( // get chop points
33  TO_BLOCK *block // block to do
34 ) {
35  int16_t chop_coord; // chop boundary
36  TBOX blob_box; // of underline
37  BLOBNBOX *u_line; // underline bit
38  TO_ROW *row; // best row for blob
39  ICOORDELT_LIST chop_cells; // blobs to cut out
40  // real underlines
41  BLOBNBOX_LIST residual_underlines;
42  C_OUTLINE_LIST left_coutlines;
43  C_OUTLINE_LIST right_coutlines;
44  ICOORDELT_IT cell_it = &chop_cells;
45  // under lines
46  BLOBNBOX_IT under_it = &block->underlines;
47  BLOBNBOX_IT ru_it = &residual_underlines;
48 
49  if (block->get_rows()->empty()) {
50  return; // Don't crash if there are no rows.
51  }
52  for (under_it.mark_cycle_pt(); !under_it.cycled_list(); under_it.forward()) {
53  u_line = under_it.extract();
54  blob_box = u_line->bounding_box();
55  row = most_overlapping_row(block->get_rows(), u_line);
56  if (row == nullptr) {
57  return; // Don't crash if there is no row.
58  }
59  find_underlined_blobs(u_line, &row->baseline, row->xheight,
60  row->xheight * textord_underline_offset, &chop_cells);
61  cell_it.set_to_list(&chop_cells);
62  for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) {
63  chop_coord = cell_it.data()->x();
64  if (cell_it.data()->y() - chop_coord > textord_fp_chop_error + 1) {
65  split_to_blob(u_line, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines,
66  &right_coutlines);
67  if (!left_coutlines.empty()) {
68  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
69  }
70  chop_coord = cell_it.data()->y();
71  split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, &left_coutlines,
72  &right_coutlines);
73  if (!left_coutlines.empty()) {
74  row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
75  }
76  u_line = nullptr; // no more blobs to add
77  }
78  delete cell_it.extract();
79  }
80  if (!right_coutlines.empty()) {
81  split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, &left_coutlines,
82  &right_coutlines);
83  if (!left_coutlines.empty()) {
84  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
85  }
86  }
87  delete u_line;
88  }
89  if (!ru_it.empty()) {
90  ru_it.move_to_first();
91  for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
92  under_it.add_after_then_move(ru_it.extract());
93  }
94  }
95 }
96 
97 /**********************************************************************
98  * most_overlapping_row
99  *
100  * Return the row which most overlaps the blob.
101  **********************************************************************/
102 
103 TO_ROW *most_overlapping_row( // find best row
104  TO_ROW_LIST *rows, // list of rows
105  BLOBNBOX *blob // blob to place
106 ) {
107  int16_t x = (blob->bounding_box().left() + blob->bounding_box().right()) / 2;
108  TO_ROW_IT row_it = rows; // row iterator
109  TO_ROW *row; // current row
110  TO_ROW *best_row; // output row
111  float overlap; // of blob & row
112  float bestover; // best overlap
113 
114  best_row = nullptr;
115  bestover = static_cast<float>(-INT32_MAX);
116  if (row_it.empty()) {
117  return nullptr;
118  }
119  row = row_it.data();
120  row_it.mark_cycle_pt();
121  while (row->baseline.y(x) + row->descdrop > blob->bounding_box().top() && !row_it.cycled_list()) {
122  best_row = row;
123  bestover = blob->bounding_box().top() - row->baseline.y(x) + row->descdrop;
124  row_it.forward();
125  row = row_it.data();
126  }
127  while (row->baseline.y(x) + row->xheight + row->ascrise >= blob->bounding_box().bottom() &&
128  !row_it.cycled_list()) {
129  overlap = row->baseline.y(x) + row->xheight + row->ascrise;
130  if (blob->bounding_box().top() < overlap) {
131  overlap = blob->bounding_box().top();
132  }
133  if (blob->bounding_box().bottom() > row->baseline.y(x) + row->descdrop) {
134  overlap -= blob->bounding_box().bottom();
135  } else {
136  overlap -= row->baseline.y(x) + row->descdrop;
137  }
138  if (overlap > bestover) {
139  bestover = overlap;
140  best_row = row;
141  }
142  row_it.forward();
143  row = row_it.data();
144  }
145  if (bestover < 0 &&
146  row->baseline.y(x) + row->xheight + row->ascrise - blob->bounding_box().bottom() > bestover) {
147  best_row = row;
148  }
149  return best_row;
150 }
151 
152 /**********************************************************************
153  * find_underlined_blobs
154  *
155  * Find the start and end coords of blobs in the underline.
156  **********************************************************************/
157 
158 void find_underlined_blobs( // get chop points
159  BLOBNBOX *u_line, // underlined unit
160  QSPLINE *baseline, // actual baseline
161  float xheight, // height of line
162  float baseline_offset, // amount to shrinke it
163  ICOORDELT_LIST *chop_cells // places to chop
164 ) {
165  ICOORD blob_chop; // sides of blob
166  TBOX blob_box = u_line->bounding_box();
167  // cell iterator
168  ICOORDELT_IT cell_it = chop_cells;
169  STATS upper_proj(blob_box.left(), blob_box.right() + 1);
170  STATS middle_proj(blob_box.left(), blob_box.right() + 1);
171  STATS lower_proj(blob_box.left(), blob_box.right() + 1);
172  C_OUTLINE_IT out_it; // outlines of blob
173 
174  ASSERT_HOST(u_line->cblob() != nullptr);
175 
176  out_it.set_to_list(u_line->cblob()->out_list());
177  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
178  vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, &lower_proj,
179  &middle_proj, &upper_proj);
180  }
181 
182  for (auto x = blob_box.left(); x < blob_box.right(); x++) {
183  if (middle_proj.pile_count(x) > 0) {
184  auto y = x + 1;
185  for (; y < blob_box.right() && middle_proj.pile_count(y) > 0; y++) {
186  ;
187  }
188  blob_chop = ICOORD(x, y);
189  cell_it.add_after_then_move(new ICOORDELT(blob_chop));
190  x = y;
191  }
192  }
193 }
194 
195 /**********************************************************************
196  * vertical_cunderline_projection
197  *
198  * Compute the vertical projection of a outline from its outlines
199  * and add to the given STATS.
200  **********************************************************************/
201 
202 void vertical_cunderline_projection( // project outlines
203  C_OUTLINE *outline, // outline to project
204  QSPLINE *baseline, // actual baseline
205  float xheight, // height of line
206  float baseline_offset, // amount to shrinke it
207  STATS *lower_proj, // below baseline
208  STATS *middle_proj, // centre region
209  STATS *upper_proj // top region
210 ) {
211  ICOORD pos; // current point
212  ICOORD step; // edge step
213  int16_t lower_y, upper_y; // region limits
214  int32_t length; // of outline
215  int16_t stepindex; // current step
216  C_OUTLINE_IT out_it = outline->child();
217 
218  pos = outline->start_pos();
219  length = outline->pathlength();
220  for (stepindex = 0; stepindex < length; stepindex++) {
221  step = outline->step(stepindex);
222  if (step.x() > 0) {
223  lower_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + 0.5));
224  upper_y = static_cast<int16_t>(floor(baseline->y(pos.x()) + baseline_offset + xheight + 0.5));
225  if (pos.y() >= lower_y) {
226  lower_proj->add(pos.x(), -lower_y);
227  if (pos.y() >= upper_y) {
228  middle_proj->add(pos.x(), lower_y - upper_y);
229  upper_proj->add(pos.x(), upper_y - pos.y());
230  } else {
231  middle_proj->add(pos.x(), lower_y - pos.y());
232  }
233  } else {
234  lower_proj->add(pos.x(), -pos.y());
235  }
236  } else if (step.x() < 0) {
237  lower_y = static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + 0.5));
238  upper_y =
239  static_cast<int16_t>(floor(baseline->y(pos.x() - 1) + baseline_offset + xheight + 0.5));
240  if (pos.y() >= lower_y) {
241  lower_proj->add(pos.x() - 1, lower_y);
242  if (pos.y() >= upper_y) {
243  middle_proj->add(pos.x() - 1, upper_y - lower_y);
244  upper_proj->add(pos.x() - 1, pos.y() - upper_y);
245  } else {
246  middle_proj->add(pos.x() - 1, pos.y() - lower_y);
247  }
248  } else {
249  lower_proj->add(pos.x() - 1, pos.y());
250  }
251  }
252  pos += step;
253  }
254 
255  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
256  vertical_cunderline_projection(out_it.data(), baseline, xheight, baseline_offset, lower_proj,
257  middle_proj, upper_proj);
258  }
259 }
260 
261 } // namespace tesseract
#define ASSERT_HOST(x)
Definition: errcode.h:59
#define BOOL_VAR(name, val, comment)
Definition: params.h:359
#define double_VAR(name, val, comment)
Definition: params.h:365
int textord_fp_chop_error
Definition: fpchop.cpp:34
void split_to_blob(BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:260
bool textord_restore_underlines
Definition: underlin.cpp:24
@ baseline
Definition: mfoutline.h:53
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:32
double textord_underline_offset
Definition: underlin.cpp:23
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:202
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2449
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:158
const TBOX & bounding_box() const
Definition: blobbox.h:239
C_BLOB * cblob() const
Definition: blobbox.h:277
QSPLINE baseline
Definition: blobbox.h:676
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:773
BLOBNBOX_LIST underlines
Definition: blobbox.h:777
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
int32_t pathlength() const
Definition: coutln.h:134
ICOORD step(int index) const
Definition: coutln.h:143
C_OUTLINE_LIST * child()
Definition: coutln.h:108
const ICOORD & start_pos() const
Definition: coutln.h:147
integer coordinate
Definition: points.h:36
TDimension y() const
access_function
Definition: points.h:62
TDimension x() const
access function
Definition: points.h:58
double y(double x) const
Definition: quspline.cpp:203
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
void add(int32_t value, int32_t count)
Definition: statistc.cpp:99
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70