tesseract  5.0.0
stepblob.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: stepblob.cpp (Formerly cblob.c)
3  * Description: Code for C_BLOB class.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1991, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23 
24 #include "stepblob.h"
25 
26 #include "points.h" // for operator+=, FCOORD, ICOORD
27 
28 #include <allheaders.h> // for pixCreate, pixGetDepth
29 #include <vector> // for std::vector
30 
31 namespace tesseract {
32 
33 class DENORM;
34 
35 // Max perimeter to width ratio for a baseline position above box bottom.
36 const double kMaxPerimeterWidthRatio = 8.0;
37 
38 /**********************************************************************
39  * position_outline
40  *
41  * Position the outline in the given list at the relevant place
42  * according to its nesting.
43  **********************************************************************/
44 static void position_outline( // put in place
45  C_OUTLINE *outline, // thing to place
46  C_OUTLINE_LIST *destlist // desstination list
47 ) {
48  C_OUTLINE_IT it = destlist; // iterator
49  // iterator on children
50  C_OUTLINE_IT child_it = outline->child();
51 
52  if (!it.empty()) {
53  do {
54  // outline from dest list
55  C_OUTLINE *dest_outline = it.data(); // get destination
56  // encloses dest
57  if (*dest_outline < *outline) {
58  // take off list
59  dest_outline = it.extract();
60  // put this in place
61  it.add_after_then_move(outline);
62  // make it a child
63  child_it.add_to_end(dest_outline);
64  while (!it.at_last()) {
65  it.forward(); // do rest of list
66  // check for other children
67  dest_outline = it.data();
68  if (*dest_outline < *outline) {
69  // take off list
70  dest_outline = it.extract();
71  child_it.add_to_end(dest_outline);
72  // make it a child
73  if (it.empty()) {
74  break;
75  }
76  }
77  }
78  return; // finished
79  }
80  // enclosed by dest
81  else if (*outline < *dest_outline) {
82  position_outline(outline, dest_outline->child());
83  // place in child list
84  return; // finished
85  }
86  it.forward();
87  } while (!it.at_first());
88  }
89  it.add_to_end(outline); // at outer level
90 }
91 
92 /**********************************************************************
93  * plot_outline_list
94  *
95  * Draw a list of outlines in the given colour and their children
96  * in the child colour.
97  **********************************************************************/
98 
99 #ifndef GRAPHICS_DISABLED
100 static void plot_outline_list( // draw outlines
101  C_OUTLINE_LIST *list, // outline to draw
102  ScrollView *window, // window to draw in
103  ScrollView::Color colour, // colour to use
104  ScrollView::Color child_colour // colour of children
105 ) {
106  C_OUTLINE *outline; // current outline
107  C_OUTLINE_IT it = list; // iterator
108 
109  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
110  outline = it.data();
111  // draw it
112  outline->plot(window, colour);
113  if (!outline->child()->empty()) {
114  plot_outline_list(outline->child(), window, child_colour, child_colour);
115  }
116  }
117 }
118 // Draws the outlines in the given colour, and child_colour, normalized
119 // using the given denorm, making use of sub-pixel accurate information
120 // if available.
121 static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
122  ScrollView::Color colour, ScrollView::Color child_colour,
123  ScrollView *window) {
124  C_OUTLINE_IT it(list);
125  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
126  C_OUTLINE *outline = it.data();
127  outline->plot_normed(denorm, colour, window);
128  if (!outline->child()->empty()) {
129  plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
130  }
131  }
132 }
133 #endif
134 
135 /**********************************************************************
136  * reverse_outline_list
137  *
138  * Reverse a list of outlines and their children.
139  **********************************************************************/
140 
141 static void reverse_outline_list(C_OUTLINE_LIST *list) {
142  C_OUTLINE_IT it = list; // iterator
143 
144  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
145  C_OUTLINE *outline = it.data();
146  outline->reverse(); // reverse it
147  outline->set_flag(COUT_INVERSE, true);
148  if (!outline->child()->empty()) {
149  reverse_outline_list(outline->child());
150  }
151  }
152 }
153 
154 /**********************************************************************
155  * C_BLOB::C_BLOB
156  *
157  * Constructor to build a C_BLOB from a list of C_OUTLINEs.
158  * The C_OUTLINEs are not copied so the source list is emptied.
159  * The C_OUTLINEs are nested correctly in the blob.
160  **********************************************************************/
161 
162 C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
163  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
164  C_OUTLINE *outline = ol_it.extract();
165  // Position this outline in appropriate position in the hierarchy.
166  position_outline(outline, &outlines);
167  }
169 }
170 
171 // Simpler constructor to build a blob from a single outline that has
172 // already been fully initialized.
174  C_OUTLINE_IT it(&outlines);
175  it.add_to_end(outline);
176 }
177 
178 // Builds a set of one or more blobs from a list of outlines.
179 // Input: one outline on outline_list contains all the others, but the
180 // nesting and order are undefined.
181 // If good_blob is true, the blob is added to good_blobs_it, unless
182 // an illegal (generation-skipping) parent-child relationship is found.
183 // If so, the parent blob goes to bad_blobs_it, and the immediate children
184 // are promoted to the top level, recursively being sent to good_blobs_it.
185 // If good_blob is false, all created blobs will go to the bad_blobs_it.
186 // Output: outline_list is empty. One or more blobs are added to
187 // good_blobs_it and/or bad_blobs_it.
188 void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
189  C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it) {
190  // List of top-level outlines with correctly nested children.
191  C_OUTLINE_LIST nested_outlines;
192  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
193  C_OUTLINE *outline = ol_it.extract();
194  // Position this outline in appropriate position in the hierarchy.
195  position_outline(outline, &nested_outlines);
196  }
197  // Check for legal nesting and reassign as required.
198  for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
199  C_OUTLINE *outline = ol_it.extract();
200  bool blob_is_good = good_blob;
201  if (!outline->IsLegallyNested()) {
202  // The blob is illegally nested.
203  // Mark it bad, and add all its children to the top-level list.
204  blob_is_good = false;
205  ol_it.add_list_after(outline->child());
206  }
207  auto *blob = new C_BLOB(outline);
208  // Set inverse flag and reverse if needed.
209  blob->CheckInverseFlagAndDirection();
210  // Put on appropriate list.
211  if (!blob_is_good && bad_blobs_it != nullptr) {
212  bad_blobs_it->add_after_then_move(blob);
213  } else {
214  good_blobs_it->add_after_then_move(blob);
215  }
216  }
217 }
218 
219 // Sets the COUT_INVERSE flag appropriately on the outlines and their
220 // children recursively, reversing the outlines if needed so that
221 // everything has an anticlockwise top-level.
223  C_OUTLINE_IT ol_it(&outlines);
224  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
225  C_OUTLINE *outline = ol_it.data();
226  if (outline->turn_direction() < 0) {
227  outline->reverse();
228  reverse_outline_list(outline->child());
229  outline->set_flag(COUT_INVERSE, true);
230  } else {
231  outline->set_flag(COUT_INVERSE, false);
232  }
233  }
234 }
235 
236 // Build and return a fake blob containing a single fake outline with no
237 // steps.
239  C_OUTLINE_LIST outlines;
240  C_OUTLINE::FakeOutline(box, &outlines);
241  return new C_BLOB(&outlines);
242 }
243 
244 /**********************************************************************
245  * C_BLOB::bounding_box
246  *
247  * Return the bounding box of the blob.
248  **********************************************************************/
249 
250 TBOX C_BLOB::bounding_box() const { // bounding box
251  // This is a read-only iteration of the outlines.
252  C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST *>(&outlines);
253  TBOX box; // bounding box
254 
255  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256  C_OUTLINE *outline = it.data();
257  box += outline->bounding_box();
258  }
259  return box;
260 }
261 
262 /**********************************************************************
263  * C_BLOB::area
264  *
265  * Return the area of the blob.
266  **********************************************************************/
267 
268 int32_t C_BLOB::area() { // area
269  C_OUTLINE_IT it = &outlines; // outlines of blob
270  int32_t total = 0; // total area
271 
272  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
273  C_OUTLINE *outline = it.data();
274  total += outline->area();
275  }
276  return total;
277 }
278 
279 /**********************************************************************
280  * C_BLOB::perimeter
281  *
282  * Return the perimeter of the top and 2nd level outlines.
283  **********************************************************************/
284 
285 int32_t C_BLOB::perimeter() {
286  C_OUTLINE_IT it = &outlines; // outlines of blob
287  int32_t total = 0; // total perimeter
288 
289  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
290  C_OUTLINE *outline = it.data();
291  total += outline->perimeter();
292  }
293  return total;
294 }
295 
296 /**********************************************************************
297  * C_BLOB::outer_area
298  *
299  * Return the area of the blob.
300  **********************************************************************/
301 
302 int32_t C_BLOB::outer_area() { // area
303  C_OUTLINE_IT it = &outlines; // outlines of blob
304  int32_t total = 0; // total area
305 
306  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
307  C_OUTLINE *outline = it.data();
308  total += outline->outer_area();
309  }
310  return total;
311 }
312 
313 /**********************************************************************
314  * C_BLOB::count_transitions
315  *
316  * Return the total x and y maxes and mins in the blob.
317  * Chlid outlines are not counted.
318  **********************************************************************/
319 
321  int32_t threshold // on size
322 ) {
323  C_OUTLINE_IT it = &outlines; // outlines of blob
324  int32_t total = 0; // total area
325 
326  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
327  C_OUTLINE *outline = it.data();
328  total += outline->count_transitions(threshold);
329  }
330  return total;
331 }
332 
333 /**********************************************************************
334  * C_BLOB::move
335  *
336  * Move C_BLOB by vector
337  **********************************************************************/
338 
339 void C_BLOB::move( // reposition blob
340  const ICOORD vec // by vector
341 ) {
342  C_OUTLINE_IT it(&outlines); // iterator
343 
344  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
345  it.data()->move(vec); // move each outline
346  }
347 }
348 
349 // Static helper for C_BLOB::rotate to allow recursion of child outlines.
350 static void RotateOutlineList(const FCOORD &rotation, C_OUTLINE_LIST *outlines) {
351  C_OUTLINE_LIST new_outlines;
352  C_OUTLINE_IT src_it(outlines);
353  C_OUTLINE_IT dest_it(&new_outlines);
354  while (!src_it.empty()) {
355  C_OUTLINE *old_outline = src_it.extract();
356  src_it.forward();
357  auto *new_outline = new C_OUTLINE(old_outline, rotation);
358  if (!old_outline->child()->empty()) {
359  RotateOutlineList(rotation, old_outline->child());
360  C_OUTLINE_IT child_it(new_outline->child());
361  child_it.add_list_after(old_outline->child());
362  }
363  delete old_outline;
364  dest_it.add_to_end(new_outline);
365  }
366  src_it.add_list_after(&new_outlines);
367 }
368 
369 /**********************************************************************
370  * C_BLOB::rotate
371  *
372  * Rotate C_BLOB by rotation.
373  * Warning! has to rebuild all the C_OUTLINEs.
374  **********************************************************************/
375 void C_BLOB::rotate(const FCOORD &rotation) {
376  RotateOutlineList(rotation, &outlines);
377 }
378 
379 // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
380 // outline list and its children.
381 static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
382  C_OUTLINE_IT it(list);
383  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
384  C_OUTLINE *outline = it.data();
385  if (pix != nullptr && pixGetDepth(pix) == 8) {
386  outline->ComputeEdgeOffsets(threshold, pix);
387  } else {
388  outline->ComputeBinaryOffsets();
389  }
390  if (!outline->child()->empty()) {
391  ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
392  }
393  }
394 }
395 
396 // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
397 // if the supplied pix is 8-bit or the binary edges if nullptr.
398 void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
399  ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
400 }
401 
402 // Estimates and returns the baseline position based on the shape of the
403 // outlines.
404 // We first find the minimum y-coord (y_mins) at each x-coord within the blob.
405 // If there is a run of some y or y+1 in y_mins that is longer than the total
406 // number of positions at bottom or bottom+1, subject to the additional
407 // condition that at least one side of the y/y+1 run is higher than y+1, so it
408 // is not a local minimum, then y, not the bottom, makes a good candidate
409 // baseline position for this blob. Eg
410 // | ---|
411 // | |
412 // |- -----------| <= Good candidate baseline position.
413 // |- -|
414 // | -|
415 // |---| <= Bottom of blob
417  TBOX box = bounding_box();
418  int left = box.left();
419  int width = box.width();
420  int bottom = box.bottom();
421  if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
422  return bottom; // This is only for non-CJK blobs.
423  }
424  // Get the minimum y coordinate at each x-coordinate.
425  std::vector<int> y_mins(width + 1, box.top());
426  C_OUTLINE_IT it(&outlines);
427  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
428  C_OUTLINE *outline = it.data();
429  ICOORD pos = outline->start_pos();
430  for (int s = 0; s < outline->pathlength(); ++s) {
431  if (pos.y() < y_mins[pos.x() - left]) {
432  y_mins[pos.x() - left] = pos.y();
433  }
434  pos += outline->step(s);
435  }
436  }
437  // Find the total extent of the bottom or bottom + 1.
438  int bottom_extent = 0;
439  for (int x = 0; x <= width; ++x) {
440  if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
441  ++bottom_extent;
442  }
443  }
444  // Find the lowest run longer than the bottom extent that is not the bottom.
445  int best_min = box.top();
446  int prev_run = 0;
447  int prev_y = box.top();
448  int prev_prev_y = box.top();
449  for (int x = 0; x < width; x += prev_run) {
450  // Find the length of the current run.
451  int y_at_x = y_mins[x];
452  int run = 1;
453  while (x + run <= width && y_mins[x + run] == y_at_x) {
454  ++run;
455  }
456  if (y_at_x > bottom + 1) {
457  // Possible contender.
458  int total_run = run;
459  // Find extent of current value or +1 to the right of x.
460  while (x + total_run <= width &&
461  (y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
462  ++total_run;
463  }
464  // At least one end has to be higher so it is not a local max.
465  if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
466  // If the prev_run is at y + 1, then we can add that too. There cannot
467  // be a suitable run at y before that or we would have found it already.
468  if (prev_run > 0 && prev_y == y_at_x + 1) {
469  total_run += prev_run;
470  }
471  if (total_run > bottom_extent && y_at_x < best_min) {
472  best_min = y_at_x;
473  }
474  }
475  }
476  prev_run = run;
477  prev_prev_y = prev_y;
478  prev_y = y_at_x;
479  }
480  return best_min == box.top() ? bottom : best_min;
481 }
482 
483 static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
484  C_OUTLINE_IT it(list);
485  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
486  C_OUTLINE *outline = it.data();
487  outline->render(left, top, pix);
488  if (!outline->child()->empty()) {
489  render_outline_list(outline->child(), left, top, pix);
490  }
491  }
492 }
493 
494 static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
495  C_OUTLINE_IT it(list);
496  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
497  C_OUTLINE *outline = it.data();
498  outline->render_outline(left, top, pix);
499  }
500 }
501 
502 // Returns a Pix rendering of the blob. pixDestroy after use.
504  TBOX box = bounding_box();
505  Image pix = pixCreate(box.width(), box.height(), 1);
506  render_outline_list(&outlines, box.left(), box.top(), pix);
507  return pix;
508 }
509 
510 // Returns a Pix rendering of the outline of the blob. (no fill).
511 // pixDestroy after use.
513  TBOX box = bounding_box();
514  Image pix = pixCreate(box.width(), box.height(), 1);
515  render_outline_list_outline(&outlines, box.left(), box.top(), pix);
516  return pix;
517 }
518 
519 /**********************************************************************
520  * C_BLOB::plot
521  *
522  * Draw the C_BLOB in the given colour.
523  **********************************************************************/
524 
525 #ifndef GRAPHICS_DISABLED
526 void C_BLOB::plot(ScrollView *window, // window to draw in
527  ScrollView::Color blob_colour, // main colour
528  ScrollView::Color child_colour) { // for holes
529  plot_outline_list(&outlines, window, blob_colour, child_colour);
530 }
531 // Draws the blob in the given colour, and child_colour, normalized
532 // using the given denorm, making use of sub-pixel accurate information
533 // if available.
534 void C_BLOB::plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
535  ScrollView::Color child_colour, ScrollView *window) {
536  plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, window);
537 }
538 #endif
539 
540 } // namespace tesseract
@ COUT_INVERSE
Definition: coutln.h:46
const double kMaxPerimeterWidthRatio
Definition: stepblob.cpp:36
int32_t pathlength() const
Definition: coutln.h:134
void set_flag(C_OUTLINE_FLAGS mask, bool value)
Definition: coutln.h:102
bool IsLegallyNested() const
Definition: coutln.cpp:620
int32_t area() const
Definition: coutln.cpp:257
ICOORD step(int index) const
Definition: coutln.h:143
const TBOX & bounding_box() const
Definition: coutln.h:113
int32_t perimeter() const
Definition: coutln.cpp:293
C_OUTLINE_LIST * child()
Definition: coutln.h:108
int16_t turn_direction() const
Definition: coutln.cpp:551
int32_t outer_area() const
Definition: coutln.cpp:313
void ComputeBinaryOffsets()
Definition: coutln.cpp:850
const ICOORD & start_pos() const
Definition: coutln.h:147
void render_outline(int left, int top, Image pix) const
Definition: coutln.cpp:926
int32_t count_transitions(int32_t threshold)
Definition: coutln.cpp:347
static void FakeOutline(const TBOX &box, C_OUTLINE_LIST *outlines)
Definition: coutln.cpp:241
void ComputeEdgeOffsets(int threshold, Image pix)
Definition: coutln.cpp:738
void render(int left, int top, Image pix) const
Definition: coutln.cpp:906
integer coordinate
Definition: points.h:36
TDimension y() const
access_function
Definition: points.h:62
TDimension x() const
access function
Definition: points.h:58
TDimension left() const
Definition: rect.h:82
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
TDimension top() const
Definition: rect.h:68
TDimension bottom() const
Definition: rect.h:75
int32_t count_transitions(int32_t threshold)
Definition: stepblob.cpp:320
void rotate(const FCOORD &rotation)
Definition: stepblob.cpp:375
Image render_outline()
Definition: stepblob.cpp:512
int32_t outer_area()
Definition: stepblob.cpp:302
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
void plot(ScrollView *window, ScrollView::Color blob_colour, ScrollView::Color child_colour)
Definition: stepblob.cpp:526
void plot_normed(const DENORM &denorm, ScrollView::Color blob_colour, ScrollView::Color child_colour, ScrollView *window)
Definition: stepblob.cpp:534
TBOX bounding_box() const
Definition: stepblob.cpp:250
void ComputeEdgeOffsets(int threshold, Image pix)
Definition: stepblob.cpp:398
int32_t perimeter()
Definition: stepblob.cpp:285
int16_t EstimateBaselinePosition()
Definition: stepblob.cpp:416
int32_t area()
Definition: stepblob.cpp:268
static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list, C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it)
Definition: stepblob.cpp:188
void move(const ICOORD vec)
Definition: stepblob.cpp:339
void CheckInverseFlagAndDirection()
Definition: stepblob.cpp:222