tesseract  5.0.0
tabfind.cpp
Go to the documentation of this file.
1 // File: tabfind.cpp
3 // Description: Subclass of BBGrid to find vertically aligned blobs.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config_auto.h"
21 #endif
22 
23 #include "alignedblob.h"
24 #include "colpartitiongrid.h"
25 #include "detlinefit.h"
26 #include "host.h" // for NearlyEqual
27 #include "linefind.h"
28 #include "tabfind.h"
29 
30 #include <algorithm>
31 
32 namespace tesseract {
33 
34 // Multiple of box size to search for initial gaps.
35 const int kTabRadiusFactor = 5;
36 // Min and Max multiple of height to search vertically when extrapolating.
37 const int kMinVerticalSearch = 3;
38 const int kMaxVerticalSearch = 12;
39 const int kMaxRaggedSearch = 25;
40 // Minimum number of lines in a column width to make it interesting.
41 const int kMinLinesInColumn = 10;
42 // Minimum width of a column to be interesting.
43 const int kMinColumnWidth = 200;
44 // Minimum fraction of total column lines for a column to be interesting.
45 const double kMinFractionalLinesInColumn = 0.125;
46 // Fraction of height used as alignment tolerance for aligned tabs.
47 const double kAlignedFraction = 0.03125;
48 // Maximum gutter width (in absolute inch) that we care about
49 const double kMaxGutterWidthAbsolute = 2.00;
50 // Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
51 const int kRaggedGutterMultiple = 5;
52 // Min aspect ratio of tall objects to be considered a separator line.
53 // (These will be ignored in searching the gutter for obstructions.)
54 const double kLineFragmentAspectRatio = 10.0;
55 // Min number of points to accept after evaluation.
56 const int kMinEvaluatedTabs = 3;
57 // Up to 30 degrees is allowed for rotations of diacritic blobs.
58 // Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
59 // so that the assert there never fails.
60 const double kCosMaxSkewAngle = 0.866025;
61 
62 static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
63 static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
64 
65 TabFind::TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines,
66  int vertical_x, int vertical_y, int resolution)
67  : AlignedBlob(gridsize, bleft, tright)
68  , resolution_(resolution)
69  , image_origin_(0, tright.y() - 1)
70  , v_it_(&vectors_) {
71  width_cb_ = nullptr;
72  v_it_.add_list_after(vlines);
73  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
74  using namespace std::placeholders; // for _1
75  width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
76 }
77 
78 TabFind::~TabFind() = default;
79 
81 
82 // Insert a list of blobs into the given grid (not necessarily this).
83 // If take_ownership is true, then the blobs are removed from the source list.
84 // See InsertBlob for the other arguments.
85 // It would seem to make more sense to swap this and grid, but this way
86 // around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
87 // while the grid that provides the tab stops(this) has to be derived from
88 // TabFind.
89 void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs,
91  BLOBNBOX_IT blob_it(blobs);
92  int b_count = 0;
93  int reject_count = 0;
94  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
95  BLOBNBOX *blob = blob_it.data();
96  // if (InsertBlob(true, true, blob, grid)) {
97  if (InsertBlob(h_spread, v_spread, blob, grid)) {
98  ++b_count;
99  } else {
100  ++reject_count;
101  }
102  }
103  if (textord_debug_tabfind) {
104  tprintf("Inserted %d blobs into grid, %d rejected.\n", b_count, reject_count);
105  }
106 }
107 
108 // Insert a single blob into the given grid (not necessarily this).
109 // If h_spread, then all cells covered horizontally by the box are
110 // used, otherwise, just the bottom-left. Similarly for v_spread.
111 // A side effect is that the left and right rule edges of the blob are
112 // set according to the tab vectors in this (not grid).
113 bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob,
115  TBOX box = blob->bounding_box();
116  blob->set_left_rule(LeftEdgeForBox(box, false, false));
117  blob->set_right_rule(RightEdgeForBox(box, false, false));
118  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
119  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
120  if (blob->joined_to_prev()) {
121  return false;
122  }
123  grid->InsertBBox(h_spread, v_spread, blob);
124  return true;
125 }
126 
127 // Calls SetBlobRuleEdges for all the blobs in the given block.
129  SetBlobRuleEdges(&block->blobs);
130  SetBlobRuleEdges(&block->small_blobs);
131  SetBlobRuleEdges(&block->noise_blobs);
132  SetBlobRuleEdges(&block->large_blobs);
133 }
134 
135 // Sets the left and right rule and crossing_rules for the blobs in the given
136 // list by fiding the next outermost tabvectors for each blob.
137 void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST *blobs) {
138  BLOBNBOX_IT blob_it(blobs);
139  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
140  BLOBNBOX *blob = blob_it.data();
141  TBOX box = blob->bounding_box();
142  blob->set_left_rule(LeftEdgeForBox(box, false, false));
143  blob->set_right_rule(RightEdgeForBox(box, false, false));
144  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
145  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
146  }
147 }
148 
149 // Returns the gutter width of the given TabVector between the given y limits.
150 // Also returns x-shift to be added to the vector to clear any intersecting
151 // blobs. The shift is deducted from the returned gutter.
152 // If ignore_unmergeables is true, then blobs of UnMergeableType are
153 // ignored as if they don't exist. (Used for text on image.)
154 // max_gutter_width is used as the maximum width worth searching for in case
155 // there is nothing near the TabVector.
156 int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables,
157  int max_gutter_width, int *required_shift) {
158  bool right_to_left = v.IsLeftTab();
159  int bottom_x = v.XAtY(bottom_y);
160  int top_x = v.XAtY(top_y);
161  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
162  BlobGridSearch sidesearch(this);
163  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
164  int min_gap = max_gutter_width;
165  *required_shift = 0;
166  BLOBNBOX *blob = nullptr;
167  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
168  const TBOX &box = blob->bounding_box();
169  if (box.bottom() >= top_y || box.top() <= bottom_y) {
170  continue; // Doesn't overlap enough.
171  }
172  if (box.height() >= gridsize() * 2 && box.height() > box.width() * kLineFragmentAspectRatio) {
173  // Skip likely separator line residue.
174  continue;
175  }
176  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type())) {
177  continue; // Skip non-text if required.
178  }
179  int mid_y = (box.bottom() + box.top()) / 2;
180  // We use the x at the mid-y so that the required_shift guarantees
181  // to clear all the blobs on the tab-stop. If we use the min/max
182  // of x at top/bottom of the blob, then exactness would be required,
183  // which is not a good thing.
184  int tab_x = v.XAtY(mid_y);
185  int gap;
186  if (right_to_left) {
187  gap = tab_x - box.right();
188  if (gap < 0 && box.left() - tab_x < *required_shift) {
189  *required_shift = box.left() - tab_x;
190  }
191  } else {
192  gap = box.left() - tab_x;
193  if (gap < 0 && box.right() - tab_x > *required_shift) {
194  *required_shift = box.right() - tab_x;
195  }
196  }
197  if (gap > 0 && gap < min_gap) {
198  min_gap = gap;
199  }
200  }
201  // Result may be negative, in which case, this is a really bad tabstop.
202  return min_gap - abs(*required_shift);
203 }
204 
205 // Find the gutter width and distance to inner neighbour for the given blob.
206 void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left,
207  BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap) {
208  const TBOX &box = bbox->bounding_box();
209  // The gutter and internal sides of the box.
210  int gutter_x = left ? box.left() : box.right();
211  int internal_x = left ? box.right() : box.left();
212  // On ragged edges, the gutter side of the box is away from the tabstop.
213  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
214  *gutter_width = max_gutter;
215  // If the box is away from the tabstop, we need to increase
216  // the allowed gutter width.
217  if (tab_gap > 0) {
218  *gutter_width += tab_gap;
219  }
220  bool debug = WithinTestRegion(2, box.left(), box.bottom());
221  if (debug) {
222  tprintf("Looking in gutter\n");
223  }
224  // Find the nearest blob on the outside of the column.
225  BLOBNBOX *gutter_bbox = AdjacentBlob(bbox, left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
226  *gutter_width, box.top(), box.bottom());
227  if (gutter_bbox != nullptr) {
228  const TBOX &gutter_box = gutter_bbox->bounding_box();
229  *gutter_width = left ? tab_x - gutter_box.right() : gutter_box.left() - tab_x;
230  }
231  if (*gutter_width >= max_gutter) {
232  // If there is no box because a tab was in the way, get the tab coord.
233  TBOX gutter_box(box);
234  if (left) {
235  gutter_box.set_left(tab_x - max_gutter - 1);
236  gutter_box.set_right(tab_x - max_gutter);
237  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
238  if (tab_gutter < tab_x - 1) {
239  *gutter_width = tab_x - tab_gutter;
240  }
241  } else {
242  gutter_box.set_left(tab_x + max_gutter);
243  gutter_box.set_right(tab_x + max_gutter + 1);
244  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
245  if (tab_gutter > tab_x + 1) {
246  *gutter_width = tab_gutter - tab_x;
247  }
248  }
249  }
250  if (*gutter_width > max_gutter) {
251  *gutter_width = max_gutter;
252  }
253  // Now look for a neighbour on the inside.
254  if (debug) {
255  tprintf("Looking for neighbour\n");
256  }
257  BLOBNBOX *neighbour = AdjacentBlob(bbox, !left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
258  *gutter_width, box.top(), box.bottom());
259  int neighbour_edge = left ? RightEdgeForBox(box, true, false) : LeftEdgeForBox(box, true, false);
260  if (neighbour != nullptr) {
261  const TBOX &n_box = neighbour->bounding_box();
262  if (debug) {
263  tprintf("Found neighbour:");
264  n_box.print();
265  }
266  if (left && n_box.left() < neighbour_edge) {
267  neighbour_edge = n_box.left();
268  } else if (!left && n_box.right() > neighbour_edge) {
269  neighbour_edge = n_box.right();
270  }
271  }
272  *neighbour_gap = left ? neighbour_edge - internal_x : internal_x - neighbour_edge;
273 }
274 
275 // Return the x-coord that corresponds to the right edge for the given
276 // box. If there is a rule line to the right that vertically overlaps it,
277 // then return the x-coord of the rule line, otherwise return the right
278 // edge of the page. For details see RightTabForBox below.
279 int TabFind::RightEdgeForBox(const TBOX &box, bool crossing, bool extended) {
280  TabVector *v = RightTabForBox(box, crossing, extended);
281  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
282 }
283 // As RightEdgeForBox, but finds the left Edge instead.
284 int TabFind::LeftEdgeForBox(const TBOX &box, bool crossing, bool extended) {
285  TabVector *v = LeftTabForBox(box, crossing, extended);
286  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
287 }
288 
289 // This comment documents how this function works.
290 // For its purpose and arguments, see the comment in tabfind.h.
291 // TabVectors are stored sorted by perpendicular distance of middle from
292 // the global mean vertical vector. Since the individual vectors can have
293 // differing directions, their XAtY for a given y is not necessarily in the
294 // right order. Therefore the search has to be run with a margin.
295 // The middle of a vector that passes through (x,y) cannot be higher than
296 // halfway from y to the top, or lower than halfway from y to the bottom
297 // of the coordinate range; therefore, the search margin is the range of
298 // sort keys between these halfway points. Any vector with a sort key greater
299 // than the upper margin must be to the right of x at y, and likewise any
300 // vector with a sort key less than the lower margin must pass to the left
301 // of x at y.
302 TabVector *TabFind::RightTabForBox(const TBOX &box, bool crossing, bool extended) {
303  if (v_it_.empty()) {
304  return nullptr;
305  }
306  int top_y = box.top();
307  int bottom_y = box.bottom();
308  int mid_y = (top_y + bottom_y) / 2;
309  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
310  int min_key, max_key;
311  SetupTabSearch(right, mid_y, &min_key, &max_key);
312  // Position the iterator at the first TabVector with sort_key >= min_key.
313  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) {
314  v_it_.backward();
315  }
316  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) {
317  v_it_.forward();
318  }
319  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320  TabVector *best_v = nullptr;
321  int best_x = -1;
322  int key_limit = -1;
323  do {
324  TabVector *v = v_it_.data();
325  int x = v->XAtY(mid_y);
326  if (x >= right && (v->VOverlap(top_y, bottom_y) > 0 ||
327  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
328  if (best_v == nullptr || x < best_x) {
329  best_v = v;
330  best_x = x;
331  // We can guarantee that no better vector can be found if the
332  // sort key exceeds that of the best by max_key - min_key.
333  key_limit = v->sort_key() + max_key - min_key;
334  }
335  }
336  // Break when the search is done to avoid wrapping the iterator and
337  // thereby potentially slowing the next search.
338  if (v_it_.at_last() || (best_v != nullptr && v->sort_key() > key_limit)) {
339  break; // Prevent restarting list for next call.
340  }
341  v_it_.forward();
342  } while (!v_it_.at_first());
343  return best_v;
344 }
345 
346 // As RightTabForBox, but finds the left TabVector instead.
347 TabVector *TabFind::LeftTabForBox(const TBOX &box, bool crossing, bool extended) {
348  if (v_it_.empty()) {
349  return nullptr;
350  }
351  int top_y = box.top();
352  int bottom_y = box.bottom();
353  int mid_y = (top_y + bottom_y) / 2;
354  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
355  int min_key, max_key;
356  SetupTabSearch(left, mid_y, &min_key, &max_key);
357  // Position the iterator at the last TabVector with sort_key <= max_key.
358  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) {
359  v_it_.forward();
360  }
361  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362  v_it_.backward();
363  }
364  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365  TabVector *best_v = nullptr;
366  int best_x = -1;
367  int key_limit = -1;
368  do {
369  TabVector *v = v_it_.data();
370  int x = v->XAtY(mid_y);
371  if (x <= left && (v->VOverlap(top_y, bottom_y) > 0 ||
372  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
373  if (best_v == nullptr || x > best_x) {
374  best_v = v;
375  best_x = x;
376  // We can guarantee that no better vector can be found if the
377  // sort key is less than that of the best by max_key - min_key.
378  key_limit = v->sort_key() - (max_key - min_key);
379  }
380  }
381  // Break when the search is done to avoid wrapping the iterator and
382  // thereby potentially slowing the next search.
383  if (v_it_.at_first() || (best_v != nullptr && v->sort_key() < key_limit)) {
384  break; // Prevent restarting list for next call.
385  }
386  v_it_.backward();
387  } while (!v_it_.at_last());
388  return best_v;
389 }
390 
391 // Return true if the given width is close to one of the common
392 // widths in column_widths_.
393 bool TabFind::CommonWidth(int width) {
394  width /= kColumnWidthFactor;
395  ICOORDELT_IT it(&column_widths_);
396  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
397  ICOORDELT *w = it.data();
398  if (w->x() - 1 <= width && width <= w->y() + 1) {
399  return true;
400  }
401  }
402  return false;
403 }
404 
405 // Return true if the sizes are more than a
406 // factor of 2 different.
407 bool TabFind::DifferentSizes(int size1, int size2) {
408  return size1 > size2 * 2 || size2 > size1 * 2;
409 }
410 
411 // Return true if the sizes are more than a
412 // factor of 5 different.
413 bool TabFind::VeryDifferentSizes(int size1, int size2) {
414  return size1 > size2 * 5 || size2 > size1 * 5;
415 }
416 
418 
419 // Top-level function to find TabVectors in an input page block.
420 // Returns false if the detected skew angle is impossible.
421 // Applies the detected skew angle to deskew the tabs, blobs and part_grid.
422 bool TabFind::FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block,
423  int min_gutter_width, double tabfind_aligned_gap_fraction,
424  ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew) {
425  ScrollView *tab_win =
426  FindInitialTabVectors(image_blobs, min_gutter_width, tabfind_aligned_gap_fraction, block);
427  ComputeColumnWidths(tab_win, part_grid);
429  SortVectors();
430  CleanupTabs();
431  if (!Deskew(hlines, image_blobs, block, deskew, reskew)) {
432  return false; // Skew angle is too large.
433  }
434  part_grid->Deskew(*deskew);
435  ApplyTabConstraints();
436 #ifndef GRAPHICS_DISABLED
437  if (textord_tabfind_show_finaltabs) {
438  tab_win = MakeWindow(640, 50, "FinalTabs");
439  DisplayBoxes(tab_win);
440  DisplayTabs("FinalTabs", tab_win);
441  tab_win = DisplayTabVectors(tab_win);
442  }
443 #endif // !GRAPHICS_DISABLED
444  return true;
445 }
446 
447 // Top-level function to not find TabVectors in an input page block,
448 // but setup for single column mode.
449 void TabFind::DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew,
450  FCOORD *reskew) {
451  InsertBlobsToGrid(false, false, image_blobs, this);
452  InsertBlobsToGrid(true, false, &block->blobs, this);
453  deskew->set_x(1.0f);
454  deskew->set_y(0.0f);
455  reskew->set_x(1.0f);
456  reskew->set_y(0.0f);
457 }
458 
459 // Cleans up the lists of blobs in the block ready for use by TabFind.
460 // Large blobs that look like text are moved to the main blobs list.
461 // Main blobs that are superseded by the image blobs are deleted.
463  BLOBNBOX_IT large_it = &block->large_blobs;
464  BLOBNBOX_IT blob_it = &block->blobs;
465  int b_count = 0;
466  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
467  BLOBNBOX *large_blob = large_it.data();
468  if (large_blob->owner() != nullptr) {
469  blob_it.add_to_end(large_it.extract());
470  ++b_count;
471  }
472  }
473  if (textord_debug_tabfind) {
474  tprintf("Moved %d large blobs to normal list\n", b_count);
475 #ifndef GRAPHICS_DISABLED
476  ScrollView *rej_win = MakeWindow(500, 300, "Image blobs");
477  block->plot_graded_blobs(rej_win);
478  block->plot_noise_blobs(rej_win);
479  rej_win->Update();
480 #endif // !GRAPHICS_DISABLED
481  }
482  block->DeleteUnownedNoise();
483 }
484 
485 // Helper function to setup search limits for *TabForBox.
486 void TabFind::SetupTabSearch(int x, int y, int *min_key, int *max_key) {
487  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
488  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
489  *min_key = std::min(key1, key2);
490  *max_key = std::max(key1, key2);
491 }
492 
493 #ifndef GRAPHICS_DISABLED
494 
496  // For every vector, display it.
497  TabVector_IT it(&vectors_);
498  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
499  TabVector *vector = it.data();
500  vector->Display(tab_win);
501  }
502  tab_win->Update();
503  return tab_win;
504 }
505 
506 #endif
507 
508 // PRIVATE CODE.
509 //
510 // First part of FindTabVectors, which may be used twice if the text
511 // is mostly of vertical alignment.
512 ScrollView *TabFind::FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width,
513  double tabfind_aligned_gap_fraction, TO_BLOCK *block) {
514 #ifndef GRAPHICS_DISABLED
515  if (textord_tabfind_show_initialtabs) {
516  ScrollView *line_win = MakeWindow(0, 0, "VerticalLines");
517  line_win = DisplayTabVectors(line_win);
518  }
519 #endif
520  // Prepare the grid.
521  if (image_blobs != nullptr) {
522  InsertBlobsToGrid(true, false, image_blobs, this);
523  }
524  InsertBlobsToGrid(true, false, &block->blobs, this);
525  ScrollView *initial_win = FindTabBoxes(min_gutter_width, tabfind_aligned_gap_fraction);
526  FindAllTabVectors(min_gutter_width);
527 
529  SortVectors();
530  EvaluateTabs();
531 #ifndef GRAPHICS_DISABLED
532  if (textord_tabfind_show_initialtabs && initial_win != nullptr) {
533  initial_win = DisplayTabVectors(initial_win);
534  }
535 #endif
536  MarkVerticalText();
537  return initial_win;
538 }
539 
540 #ifndef GRAPHICS_DISABLED
541 
542 // Helper displays all the boxes in the given vector on the given window.
543 static void DisplayBoxVector(const std::vector<BLOBNBOX *> &boxes, ScrollView *win) {
544  for (auto boxe : boxes) {
545  TBOX box = boxe->bounding_box();
546  int left_x = box.left();
547  int right_x = box.right();
548  int top_y = box.top();
549  int bottom_y = box.bottom();
550  ScrollView::Color box_color = boxe->BoxColor();
551  win->Pen(box_color);
552  win->Rectangle(left_x, bottom_y, right_x, top_y);
553  }
554  win->Update();
555 }
556 
557 #endif // !GRAPHICS_DISABLED
558 
559 // For each box in the grid, decide whether it is a candidate tab-stop,
560 // and if so add it to the left/right tab boxes.
561 ScrollView *TabFind::FindTabBoxes(int min_gutter_width, double tabfind_aligned_gap_fraction) {
562  left_tab_boxes_.clear();
563  right_tab_boxes_.clear();
564  // For every bbox in the grid, determine whether it uses a tab on an edge.
565  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
566  gsearch.StartFullSearch();
567  BLOBNBOX *bbox;
568  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
569  if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
570  // If it is any kind of tab, insert it into the vectors.
571  if (bbox->left_tab_type() != TT_NONE) {
572  left_tab_boxes_.push_back(bbox);
573  }
574  if (bbox->right_tab_type() != TT_NONE) {
575  right_tab_boxes_.push_back(bbox);
576  }
577  }
578  }
579  // Sort left tabs by left and right by right to see the outermost one first
580  // on a ragged tab.
581  std::sort(left_tab_boxes_.begin(), left_tab_boxes_.end(), StdSortByBoxLeft<BLOBNBOX>);
582  std::sort(right_tab_boxes_.begin(), right_tab_boxes_.end(), StdSortRightToLeft<BLOBNBOX>);
583  ScrollView *tab_win = nullptr;
584 #ifndef GRAPHICS_DISABLED
585  if (textord_tabfind_show_initialtabs) {
586  tab_win = MakeWindow(0, 100, "InitialTabs");
587  tab_win->Pen(ScrollView::BLUE);
588  tab_win->Brush(ScrollView::NONE);
589  // Display the left and right tab boxes.
590  DisplayBoxVector(left_tab_boxes_, tab_win);
591  DisplayBoxVector(right_tab_boxes_, tab_win);
592  tab_win = DisplayTabs("Tabs", tab_win);
593  }
594 #endif // !GRAPHICS_DISABLED
595  return tab_win;
596 }
597 
598 bool TabFind::TestBoxForTabs(BLOBNBOX *bbox, int min_gutter_width,
599  double tabfind_aligned_gap_fraction) {
600  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
601  TBOX box = bbox->bounding_box();
602  // If there are separator lines, get the column edges.
603  int left_column_edge = bbox->left_rule();
604  int right_column_edge = bbox->right_rule();
605  // The edges of the bounding box of the blob being processed.
606  int left_x = box.left();
607  int right_x = box.right();
608  int top_y = box.top();
609  int bottom_y = box.bottom();
610  int height = box.height();
611  bool debug = WithinTestRegion(3, left_x, top_y);
612  if (debug) {
613  tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", left_x, top_y, right_x,
614  bottom_y, left_column_edge, right_column_edge);
615  }
616  // Compute a search radius based on a multiple of the height.
617  int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
618  radsearch.StartRadSearch((left_x + right_x) / 2, (top_y + bottom_y) / 2, radius);
619  // In Vertical Page mode, once we have an estimate of the vertical line
620  // spacing, the minimum amount of gutter space before a possible tab is
621  // increased under the assumption that column partition is always larger
622  // than line spacing.
623  int min_spacing = static_cast<int>(height * tabfind_aligned_gap_fraction);
624  if (min_gutter_width > min_spacing) {
625  min_spacing = min_gutter_width;
626  }
627  int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
628  if (min_gutter_width > min_ragged_gutter) {
629  min_ragged_gutter = min_gutter_width;
630  }
631  int target_right = left_x - min_spacing;
632  int target_left = right_x + min_spacing;
633  // We will be evaluating whether the left edge could be a left tab, and
634  // whether the right edge could be a right tab.
635  // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
636  // that no blobs have been found in the gutter during the radial search.
637  // A box can also be a tab if there are objects in the gutter only above
638  // or only below, and there are aligned objects on the opposite side, but
639  // not too many unaligned objects. The maybe_(left/right)_tab_up counts
640  // aligned objects above and negatively counts unaligned objects above,
641  // and is set to -INT32_MAX if a gutter object is found above.
642  // The other 3 maybe ints work similarly for the other sides.
643  // These conditions are very strict, to minimize false positives, and really
644  // only aligned tabs and outermost ragged tab blobs will qualify, so we
645  // also have maybe_ragged_left/right with less stringent rules.
646  // A blob that is maybe_ragged_left/right will be further qualified later,
647  // using the min_ragged_gutter.
648  bool is_left_tab = true;
649  bool is_right_tab = true;
650  bool maybe_ragged_left = true;
651  bool maybe_ragged_right = true;
652  int maybe_left_tab_up = 0;
653  int maybe_right_tab_up = 0;
654  int maybe_left_tab_down = 0;
655  int maybe_right_tab_down = 0;
656  if (bbox->leader_on_left()) {
657  is_left_tab = false;
658  maybe_ragged_left = false;
659  maybe_left_tab_up = -INT32_MAX;
660  maybe_left_tab_down = -INT32_MAX;
661  }
662  if (bbox->leader_on_right()) {
663  is_right_tab = false;
664  maybe_ragged_right = false;
665  maybe_right_tab_up = -INT32_MAX;
666  maybe_right_tab_down = -INT32_MAX;
667  }
668  int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
669  BLOBNBOX *neighbour = nullptr;
670  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
671  if (neighbour == bbox) {
672  continue;
673  }
674  TBOX nbox = neighbour->bounding_box();
675  int n_left = nbox.left();
676  int n_right = nbox.right();
677  if (debug) {
678  tprintf("Neighbour at (%d,%d)->(%d,%d)\n", n_left, nbox.bottom(), n_right, nbox.top());
679  }
680  // If the neighbouring blob is the wrong side of a separator line, then it
681  // "doesn't exist" as far as we are concerned.
682  if (n_right > right_column_edge || n_left < left_column_edge ||
683  left_x < neighbour->left_rule() || right_x > neighbour->right_rule()) {
684  continue; // Separator line in the way.
685  }
686  int n_mid_x = (n_left + n_right) / 2;
687  int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
688  if (n_mid_x <= left_x && n_right >= target_right) {
689  if (debug) {
690  tprintf("Not a left tab\n");
691  }
692  is_left_tab = false;
693  if (n_mid_y < top_y) {
694  maybe_left_tab_down = -INT32_MAX;
695  }
696  if (n_mid_y > bottom_y) {
697  maybe_left_tab_up = -INT32_MAX;
698  }
699  } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
700  if (debug) {
701  tprintf("Maybe a left tab\n");
702  }
703  if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) {
704  ++maybe_left_tab_up;
705  }
706  if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) {
707  ++maybe_left_tab_down;
708  }
709  } else if (n_left < left_x && n_right >= left_x) {
710  // Overlaps but not aligned so negative points on a maybe.
711  if (debug) {
712  tprintf("Maybe Not a left tab\n");
713  }
714  if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) {
715  --maybe_left_tab_up;
716  }
717  if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) {
718  --maybe_left_tab_down;
719  }
720  }
721  if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
722  maybe_ragged_left = false;
723  if (debug) {
724  tprintf("Not a ragged left\n");
725  }
726  }
727  if (n_mid_x >= right_x && n_left <= target_left) {
728  if (debug) {
729  tprintf("Not a right tab\n");
730  }
731  is_right_tab = false;
732  if (n_mid_y < top_y) {
733  maybe_right_tab_down = -INT32_MAX;
734  }
735  if (n_mid_y > bottom_y) {
736  maybe_right_tab_up = -INT32_MAX;
737  }
738  } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
739  if (debug) {
740  tprintf("Maybe a right tab\n");
741  }
742  if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) {
743  ++maybe_right_tab_up;
744  }
745  if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) {
746  ++maybe_right_tab_down;
747  }
748  } else if (n_right > right_x && n_left <= right_x) {
749  // Overlaps but not aligned so negative points on a maybe.
750  if (debug) {
751  tprintf("Maybe Not a right tab\n");
752  }
753  if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) {
754  --maybe_right_tab_up;
755  }
756  if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) {
757  --maybe_right_tab_down;
758  }
759  }
760  if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
761  maybe_ragged_right = false;
762  if (debug) {
763  tprintf("Not a ragged right\n");
764  }
765  }
766  if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
767  maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX) {
768  break;
769  }
770  }
771  if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
772  bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
773  } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
774  bbox->set_left_tab_type(TT_MAYBE_RAGGED);
775  } else {
776  bbox->set_left_tab_type(TT_NONE);
777  }
778  if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
779  bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
780  } else if (maybe_ragged_right && ConfirmRaggedRight(bbox, min_ragged_gutter)) {
781  bbox->set_right_tab_type(TT_MAYBE_RAGGED);
782  } else {
783  bbox->set_right_tab_type(TT_NONE);
784  }
785  if (debug) {
786  tprintf("Left result = %s, Right result=%s\n",
787  bbox->left_tab_type() == TT_MAYBE_ALIGNED
788  ? "Aligned"
789  : (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
790  bbox->right_tab_type() == TT_MAYBE_ALIGNED
791  ? "Aligned"
792  : (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
793  }
794  return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
795 }
796 
797 // Returns true if there is nothing in the rectangle of width min_gutter to
798 // the left of bbox.
799 bool TabFind::ConfirmRaggedLeft(BLOBNBOX *bbox, int min_gutter) {
800  TBOX search_box(bbox->bounding_box());
801  search_box.set_right(search_box.left());
802  search_box.set_left(search_box.left() - min_gutter);
803  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
804 }
805 
806 // Returns true if there is nothing in the rectangle of width min_gutter to
807 // the right of bbox.
808 bool TabFind::ConfirmRaggedRight(BLOBNBOX *bbox, int min_gutter) {
809  TBOX search_box(bbox->bounding_box());
810  search_box.set_left(search_box.right());
811  search_box.set_right(search_box.right() + min_gutter);
812  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
813 }
814 
815 // Returns true if there is nothing in the given search_box that vertically
816 // overlaps target_box other than target_box itself.
817 bool TabFind::NothingYOverlapsInBox(const TBOX &search_box, const TBOX &target_box) {
818  BlobGridSearch rsearch(this);
819  rsearch.StartRectSearch(search_box);
820  BLOBNBOX *blob;
821  while ((blob = rsearch.NextRectSearch()) != nullptr) {
822  const TBOX &box = blob->bounding_box();
823  if (box.y_overlap(target_box) && !(box == target_box)) {
824  return false;
825  }
826  }
827  return true;
828 }
829 
830 void TabFind::FindAllTabVectors(int min_gutter_width) {
831  // A list of vectors that will be created in estimating the skew.
832  TabVector_LIST dummy_vectors;
833  // An estimate of the vertical direction, revised as more lines are added.
834  int vertical_x = 0;
835  int vertical_y = 1;
836  // Find an estimate of the vertical direction by finding some tab vectors.
837  // Slowly up the search size until we get some vectors.
838  for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
839  search_size += kMinVerticalSearch) {
840  int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED, min_gutter_width,
841  &dummy_vectors, &vertical_x, &vertical_y);
842  vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED, min_gutter_width, &dummy_vectors,
843  &vertical_x, &vertical_y);
844  if (vector_count > 0) {
845  break;
846  }
847  }
848  // Get rid of the test vectors and reset the types of the tabs.
849  dummy_vectors.clear();
850  for (auto bbox : left_tab_boxes_) {
851  if (bbox->left_tab_type() == TT_CONFIRMED) {
852  bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
853  }
854  }
855  for (auto bbox : right_tab_boxes_) {
856  if (bbox->right_tab_type() == TT_CONFIRMED) {
857  bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
858  }
859  }
860  if (textord_debug_tabfind) {
861  tprintf("Beginning real tab search with vertical = %d,%d...\n", vertical_x, vertical_y);
862  }
863  // Now do the real thing ,but keep the vectors in the dummy_vectors list
864  // until they are all done, so we don't get the tab vectors confused with
865  // the rule line vectors.
866  FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width, &dummy_vectors, &vertical_x,
867  &vertical_y);
868  FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width, &dummy_vectors,
869  &vertical_x, &vertical_y);
870  FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width, &dummy_vectors, &vertical_x,
871  &vertical_y);
872  FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width, &dummy_vectors, &vertical_x,
873  &vertical_y);
874  // Now add the vectors to the vectors_ list.
875  TabVector_IT v_it(&vectors_);
876  v_it.add_list_after(&dummy_vectors);
877  // Now use the summed (mean) vertical vector as the direction for everything.
878  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
879 }
880 
881 // Helper for FindAllTabVectors finds the vectors of a particular type.
882 int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment, int min_gutter_width,
883  TabVector_LIST *vectors, int *vertical_x, int *vertical_y) {
884  TabVector_IT vector_it(vectors);
885  int vector_count = 0;
886  // Search the right or left tab boxes, looking for tab vectors.
887  bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
888  const std::vector<BLOBNBOX *> &boxes = right ? right_tab_boxes_ : left_tab_boxes_;
889  for (auto bbox : boxes) {
890  if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
891  (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
892  TabVector *vector = FindTabVector(search_size_multiple, min_gutter_width, alignment, bbox,
893  vertical_x, vertical_y);
894  if (vector != nullptr) {
895  ++vector_count;
896  vector_it.add_to_end(vector);
897  }
898  }
899  }
900  return vector_count;
901 }
902 
903 // Finds a vector corresponding to a tabstop running through the
904 // given box of the given alignment type.
905 // search_size_multiple is a multiple of height used to control
906 // the size of the search.
907 // vertical_x and y are updated with an estimate of the real
908 // vertical direction. (skew finding.)
909 // Returns nullptr if no decent tabstop can be found.
910 TabVector *TabFind::FindTabVector(int search_size_multiple, int min_gutter_width,
911  TabAlignment alignment, BLOBNBOX *bbox, int *vertical_x,
912  int *vertical_y) {
913  int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize());
914  AlignedBlobParams align_params(*vertical_x, *vertical_y, height, search_size_multiple,
915  min_gutter_width, resolution_, alignment);
916  // FindVerticalAlignment is in the parent (AlignedBlob) class.
917  return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
918 }
919 
920 // Set the vertical_skew_ member from the given vector and refit
921 // all vectors parallel to the skew vector.
922 void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) {
923  // Fit the vertical vector into an ICOORD, which is 16 bit.
924  vertical_skew_.set_with_shrink(vertical_x, vertical_y);
925  if (textord_debug_tabfind) {
926  tprintf("Vertical skew vector=(%d,%d)\n", vertical_skew_.x(), vertical_skew_.y());
927  }
928  v_it_.set_to_list(&vectors_);
929  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
930  TabVector *v = v_it_.data();
931  v->Fit(vertical_skew_, true);
932  }
933  // Now sort the vectors as their direction has potentially changed.
934  SortVectors();
935 }
936 
937 // Sort all the current vectors using the given vertical direction vector.
938 void TabFind::SortVectors() {
939  vectors_.sort(TabVector::SortVectorsByKey);
940  v_it_.set_to_list(&vectors_);
941 }
942 
943 // Evaluate all the current tab vectors.
944 void TabFind::EvaluateTabs() {
945  TabVector_IT rule_it(&vectors_);
946  for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
947  TabVector *tab = rule_it.data();
948  if (!tab->IsSeparator()) {
949  tab->Evaluate(vertical_skew_, this);
950  if (tab->BoxCount() < kMinEvaluatedTabs) {
951  if (textord_debug_tabfind > 2) {
952  tab->Print("Too few boxes");
953  }
954  delete rule_it.extract();
955  v_it_.set_to_list(&vectors_);
956  } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
957  tab->Print("Evaluated tab");
958  }
959  }
960  }
961 }
962 
963 // Trace textlines from one side to the other of each tab vector, saving
964 // the most frequent column widths found in a list so that a given width
965 // can be tested for being a common width with a simple callback function.
966 void TabFind::ComputeColumnWidths(ScrollView *tab_win, ColPartitionGrid *part_grid) {
967 #ifndef GRAPHICS_DISABLED
968  if (tab_win != nullptr) {
969  tab_win->Pen(ScrollView::WHITE);
970  }
971 #endif // !GRAPHICS_DISABLED
972  // Accumulate column sections into a STATS
973  int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
974  STATS col_widths(0, col_widths_size + 1);
975  ApplyPartitionsToColumnWidths(part_grid, &col_widths);
976 #ifndef GRAPHICS_DISABLED
977  if (tab_win != nullptr) {
978  tab_win->Update();
979  }
980 #endif // !GRAPHICS_DISABLED
981  if (textord_debug_tabfind > 1) {
982  col_widths.print();
983  }
984  // Now make a list of column widths.
985  MakeColumnWidths(col_widths_size, &col_widths);
986  // Turn the column width into a range.
987  ApplyPartitionsToColumnWidths(part_grid, nullptr);
988 }
989 
990 // Finds column width and:
991 // if col_widths is not null (pass1):
992 // pair-up tab vectors with existing ColPartitions and accumulate widths.
993 // else (pass2):
994 // find the largest real partition width for each recorded column width,
995 // to be used as the minimum acceptable width.
996 void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid *part_grid, STATS *col_widths) {
997  // For every ColPartition in the part_grid, add partners to the tabvectors
998  // and accumulate the column widths.
999  ColPartitionGridSearch gsearch(part_grid);
1000  gsearch.StartFullSearch();
1001  ColPartition *part;
1002  while ((part = gsearch.NextFullSearch()) != nullptr) {
1003  BLOBNBOX_C_IT blob_it(part->boxes());
1004  if (blob_it.empty()) {
1005  continue;
1006  }
1007  BLOBNBOX *left_blob = blob_it.data();
1008  blob_it.move_to_last();
1009  BLOBNBOX *right_blob = blob_it.data();
1010  TabVector *left_vector = LeftTabForBox(left_blob->bounding_box(), true, false);
1011  if (left_vector == nullptr || left_vector->IsRightTab()) {
1012  continue;
1013  }
1014  TabVector *right_vector = RightTabForBox(right_blob->bounding_box(), true, false);
1015  if (right_vector == nullptr || right_vector->IsLeftTab()) {
1016  continue;
1017  }
1018 
1019  int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1020  int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1021  // Add to STATS of measurements if the width is significant.
1022  int width = line_right - line_left;
1023  if (col_widths != nullptr) {
1024  AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1025  if (width >= kMinColumnWidth) {
1026  col_widths->add(width / kColumnWidthFactor, 1);
1027  }
1028  } else {
1029  width /= kColumnWidthFactor;
1030  ICOORDELT_IT it(&column_widths_);
1031  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1032  ICOORDELT *w = it.data();
1033  if (NearlyEqual<int>(width, w->y(), 1)) {
1034  int true_width = part->bounding_box().width() / kColumnWidthFactor;
1035  if (true_width <= w->y() && true_width > w->x()) {
1036  w->set_x(true_width);
1037  }
1038  break;
1039  }
1040  }
1041  }
1042  }
1043 }
1044 
1045 // Helper makes the list of common column widths in column_widths_ from the
1046 // input col_widths. Destroys the content of col_widths by repeatedly
1047 // finding the mode and erasing the peak.
1048 void TabFind::MakeColumnWidths(int col_widths_size, STATS *col_widths) {
1049  ICOORDELT_IT w_it(&column_widths_);
1050  int total_col_count = col_widths->get_total();
1051  while (col_widths->get_total() > 0) {
1052  int width = col_widths->mode();
1053  int col_count = col_widths->pile_count(width);
1054  col_widths->add(width, -col_count);
1055  // Get the entire peak.
1056  for (int left = width - 1; left > 0 && col_widths->pile_count(left) > 0; --left) {
1057  int new_count = col_widths->pile_count(left);
1058  col_count += new_count;
1059  col_widths->add(left, -new_count);
1060  }
1061  for (int right = width + 1; right < col_widths_size && col_widths->pile_count(right) > 0;
1062  ++right) {
1063  int new_count = col_widths->pile_count(right);
1064  col_count += new_count;
1065  col_widths->add(right, -new_count);
1066  }
1067  if (col_count > kMinLinesInColumn &&
1068  col_count > kMinFractionalLinesInColumn * total_col_count) {
1069  auto *w = new ICOORDELT(0, width);
1070  w_it.add_after_then_move(w);
1071  if (textord_debug_tabfind) {
1072  tprintf("Column of width %d has %d = %.2f%% lines\n", width * kColumnWidthFactor, col_count,
1073  100.0 * col_count / total_col_count);
1074  }
1075  }
1076  }
1077 }
1078 
1079 // Mark blobs as being in a vertical text line where that is the case.
1080 // Returns true if the majority of the image is vertical text lines.
1081 void TabFind::MarkVerticalText() {
1082  if (textord_debug_tabfind) {
1083  tprintf("Checking for vertical lines\n");
1084  }
1085  BlobGridSearch gsearch(this);
1086  gsearch.StartFullSearch();
1087  BLOBNBOX *blob = nullptr;
1088  while ((blob = gsearch.NextFullSearch()) != nullptr) {
1089  if (blob->region_type() < BRT_UNKNOWN) {
1090  continue;
1091  }
1092  if (blob->UniquelyVertical()) {
1093  blob->set_region_type(BRT_VERT_TEXT);
1094  }
1095  }
1096 }
1097 
1098 int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1099  TabVector_IT it(lines);
1100  int prev_right = -1;
1101  int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
1102  STATS gaps(0, max_gap);
1103  STATS heights(0, max_gap);
1104  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1105  TabVector *v = it.data();
1106  TabVector *partner = v->GetSinglePartner();
1107  if (!v->IsLeftTab() || v->IsSeparator() || !partner) {
1108  continue;
1109  }
1110  heights.add(partner->startpt().x() - v->startpt().x(), 1);
1111  if (prev_right > 0 && v->startpt().x() > prev_right) {
1112  gaps.add(v->startpt().x() - prev_right, 1);
1113  }
1114  prev_right = partner->startpt().x();
1115  }
1116  if (textord_debug_tabfind) {
1117  tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n", gaps.get_total(),
1118  gaps.median(), heights.median());
1119  }
1120  if (gaps.get_total() < kMinLinesInColumn) {
1121  return 0;
1122  }
1123  return static_cast<int>(gaps.median());
1124 }
1125 
1126 // Find the next adjacent (looking to the left or right) blob on this text
1127 // line, with the constraint that it must vertically significantly overlap
1128 // the [top_y, bottom_y] range.
1129 // If ignore_images is true, then blobs with aligned_text() < 0 are treated
1130 // as if they do not exist.
1131 BLOBNBOX *TabFind::AdjacentBlob(const BLOBNBOX *bbox, bool look_left, bool ignore_images,
1132  double min_overlap_fraction, int gap_limit, int top_y,
1133  int bottom_y) {
1134  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
1135  const TBOX &box = bbox->bounding_box();
1136  int left = box.left();
1137  int right = box.right();
1138  int mid_x = (left + right) / 2;
1139  sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1140  int best_gap = 0;
1141  bool debug = WithinTestRegion(3, left, bottom_y);
1142  BLOBNBOX *result = nullptr;
1143  BLOBNBOX *neighbour = nullptr;
1144  while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) {
1145  if (debug) {
1146  tprintf("Adjacent blob: considering box:");
1147  neighbour->bounding_box().print();
1148  }
1149  if (neighbour == bbox || (ignore_images && neighbour->region_type() < BRT_UNKNOWN)) {
1150  continue;
1151  }
1152  const TBOX &nbox = neighbour->bounding_box();
1153  int n_top_y = nbox.top();
1154  int n_bottom_y = nbox.bottom();
1155  int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
1156  int height = top_y - bottom_y;
1157  int n_height = n_top_y - n_bottom_y;
1158  if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
1159  (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
1160  int n_left = nbox.left();
1161  int n_right = nbox.right();
1162  int h_gap = std::max(n_left, left) - std::min(n_right, right);
1163  int n_mid_x = (n_left + n_right) / 2;
1164  if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1165  if (h_gap > gap_limit) {
1166  // Hit a big gap before next tab so don't return anything.
1167  if (debug) {
1168  tprintf("Giving up due to big gap = %d vs %d\n", h_gap, gap_limit);
1169  }
1170  return result;
1171  }
1172  if (h_gap > 0 && (look_left ? neighbour->right_tab_type() : neighbour->left_tab_type()) >=
1173  TT_CONFIRMED) {
1174  // Hit a tab facing the wrong way. Stop in case we are crossing
1175  // the column boundary.
1176  if (debug) {
1177  tprintf("Collision with like tab of type %d at %d,%d\n",
1178  look_left ? neighbour->right_tab_type() : neighbour->left_tab_type(), n_left,
1179  nbox.bottom());
1180  }
1181  return result;
1182  }
1183  // This is a good fit to the line. Continue with this
1184  // neighbour as the bbox if the best gap.
1185  if (result == nullptr || h_gap < best_gap) {
1186  if (debug) {
1187  tprintf("Good result\n");
1188  }
1189  result = neighbour;
1190  best_gap = h_gap;
1191  } else {
1192  // The new one is worse, so we probably already have the best result.
1193  return result;
1194  }
1195  } else if (debug) {
1196  tprintf("Wrong way\n");
1197  }
1198  } else if (debug) {
1199  tprintf("Insufficient overlap\n");
1200  }
1201  }
1202  if (WithinTestRegion(3, left, box.top())) {
1203  tprintf("Giving up due to end of search\n");
1204  }
1205  return result; // Hit the edge and found nothing.
1206 }
1207 
1208 // Add a bi-directional partner relationship between the left
1209 // and the right. If one (or both) of the vectors is a separator,
1210 // extend a nearby extendable vector or create a new one of the
1211 // correct type, using the given left or right blob as a guide.
1212 void TabFind::AddPartnerVector(BLOBNBOX *left_blob, BLOBNBOX *right_blob, TabVector *left,
1213  TabVector *right) {
1214  const TBOX &left_box = left_blob->bounding_box();
1215  const TBOX &right_box = right_blob->bounding_box();
1216  if (left->IsSeparator()) {
1217  // Try to find a nearby left edge to extend.
1218  TabVector *v = LeftTabForBox(left_box, true, true);
1219  if (v != nullptr && v != left && v->IsLeftTab() &&
1220  v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1221  left = v; // Found a good replacement.
1222  left->ExtendToBox(left_blob);
1223  } else {
1224  // Fake a vector.
1225  left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1226  vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1227  v_it_.move_to_first();
1228  }
1229  }
1230  if (right->IsSeparator()) {
1231  // Try to find a nearby left edge to extend.
1232  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1233  tprintf("Box edge (%d,%d-%d)", right_box.right(), right_box.bottom(), right_box.top());
1234  right->Print(" looking for improvement for");
1235  }
1236  TabVector *v = RightTabForBox(right_box, true, true);
1237  if (v != nullptr && v != right && v->IsRightTab() &&
1238  v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1239  right = v; // Found a good replacement.
1240  right->ExtendToBox(right_blob);
1241  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1242  right->Print("Extended vector");
1243  }
1244  } else {
1245  // Fake a vector.
1246  right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_, right_blob);
1247  vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1248  v_it_.move_to_first();
1249  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1250  right->Print("Created new vector");
1251  }
1252  }
1253  }
1254  left->AddPartner(right);
1255  right->AddPartner(left);
1256 }
1257 
1258 // Remove separators and unused tabs from the main vectors_ list
1259 // to the dead_vectors_ list.
1260 void TabFind::CleanupTabs() {
1261  // TODO(rays) Before getting rid of separators and unused vectors, it
1262  // would be useful to try moving ragged vectors outwards to see if this
1263  // allows useful extension. Could be combined with checking ends of partners.
1264  TabVector_IT it(&vectors_);
1265  TabVector_IT dead_it(&dead_vectors_);
1266  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1267  TabVector *v = it.data();
1268  if (v->IsSeparator() || v->Partnerless()) {
1269  dead_it.add_after_then_move(it.extract());
1270  v_it_.set_to_list(&vectors_);
1271  } else {
1272  v->FitAndEvaluateIfNeeded(vertical_skew_, this);
1273  }
1274  }
1275 }
1276 
1277 // Apply the given rotation to the given list of blobs.
1278 void TabFind::RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs) {
1279  BLOBNBOX_IT it(blobs);
1280  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1281  it.data()->rotate_box(rotation);
1282  }
1283 }
1284 
1285 // Recreate the grid with deskewed BLOBNBOXes.
1286 // Returns false if the detected skew angle is impossible.
1287 bool TabFind::Deskew(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block,
1288  FCOORD *deskew, FCOORD *reskew) {
1289  ComputeDeskewVectors(deskew, reskew);
1290  if (deskew->x() < kCosMaxSkewAngle) {
1291  return false;
1292  }
1293  RotateBlobList(*deskew, image_blobs);
1294  RotateBlobList(*deskew, &block->blobs);
1295  RotateBlobList(*deskew, &block->small_blobs);
1296  RotateBlobList(*deskew, &block->noise_blobs);
1297 
1298  // Rotate the horizontal vectors. The vertical vectors don't need
1299  // rotating as they can just be refitted.
1300  TabVector_IT h_it(hlines);
1301  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1302  TabVector *h = h_it.data();
1303  h->Rotate(*deskew);
1304  }
1305  TabVector_IT d_it(&dead_vectors_);
1306  for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1307  TabVector *d = d_it.data();
1308  d->Rotate(*deskew);
1309  }
1310  SetVerticalSkewAndParallelize(0, 1);
1311  // Rebuild the grid to the new size.
1312  TBOX grid_box(bleft_, tright_);
1313  grid_box.rotate_large(*deskew);
1314  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1315  InsertBlobsToGrid(false, false, image_blobs, this);
1316  InsertBlobsToGrid(true, false, &block->blobs, this);
1317  return true;
1318 }
1319 
1320 // Flip the vertical and horizontal lines and rotate the grid ready
1321 // for working on the rotated image.
1322 // This also makes parameter adjustments for FindInitialTabVectors().
1323 void TabFind::ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate,
1324  TabVector_LIST *horizontal_lines, int *min_gutter_width) {
1325  // Rotate the horizontal and vertical vectors and swap them over.
1326  // Only the separators are kept and rotated; other tabs are used
1327  // to estimate the gutter width then thrown away.
1328  TabVector_LIST ex_verticals;
1329  TabVector_IT ex_v_it(&ex_verticals);
1330  TabVector_LIST vlines;
1331  TabVector_IT v_it(&vlines);
1332  while (!v_it_.empty()) {
1333  TabVector *v = v_it_.extract();
1334  if (v->IsSeparator()) {
1335  v->Rotate(rotate);
1336  ex_v_it.add_after_then_move(v);
1337  } else {
1338  v_it.add_after_then_move(v);
1339  }
1340  v_it_.forward();
1341  }
1342 
1343  // Adjust the min gutter width for better tabbox selection
1344  // in 2nd call to FindInitialTabVectors().
1345  int median_gutter = FindMedianGutterWidth(&vlines);
1346  if (median_gutter > *min_gutter_width) {
1347  *min_gutter_width = median_gutter;
1348  }
1349 
1350  TabVector_IT h_it(horizontal_lines);
1351  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1352  TabVector *h = h_it.data();
1353  h->Rotate(rotate);
1354  }
1355  v_it_.add_list_after(horizontal_lines);
1356  v_it_.move_to_first();
1357  h_it.set_to_list(horizontal_lines);
1358  h_it.add_list_after(&ex_verticals);
1359 
1360  // Rebuild the grid to the new size.
1361  TBOX grid_box(bleft(), tright());
1362  grid_box.rotate_large(rotate);
1363  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1364 }
1365 
1366 // Clear the grid and get rid of the tab vectors, but not separators,
1367 // ready to start again.
1369  v_it_.move_to_first();
1370  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1371  if (!v_it_.data()->IsSeparator()) {
1372  delete v_it_.extract();
1373  }
1374  }
1375  Clear();
1376 }
1377 
1378 // Reflect the separator tab vectors and the grids in the y-axis.
1379 // Can only be called after Reset!
1381  TabVector_LIST temp_list;
1382  TabVector_IT temp_it(&temp_list);
1383  v_it_.move_to_first();
1384  // The TabVector list only contains vertical lines, but they need to be
1385  // reflected and the list needs to be reversed, so they are still in
1386  // sort_key order.
1387  while (!v_it_.empty()) {
1388  TabVector *v = v_it_.extract();
1389  v_it_.forward();
1390  v->ReflectInYAxis();
1391  temp_it.add_before_then_move(v);
1392  }
1393  v_it_.add_list_after(&temp_list);
1394  v_it_.move_to_first();
1395  // Reset this grid with reflected bounding boxes.
1396  TBOX grid_box(bleft(), tright());
1397  int tmp = grid_box.left();
1398  grid_box.set_left(-grid_box.right());
1399  grid_box.set_right(-tmp);
1400  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1401 }
1402 
1403 // Compute the rotation required to deskew, and its inverse rotation.
1404 void TabFind::ComputeDeskewVectors(FCOORD *deskew, FCOORD *reskew) {
1405  double length = vertical_skew_ % vertical_skew_;
1406  length = sqrt(length);
1407  deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
1408  deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
1409  reskew->set_x(deskew->x());
1410  reskew->set_y(-deskew->y());
1411 }
1412 
1413 // Compute and apply constraints to the end positions of TabVectors so
1414 // that where possible partners end at the same y coordinate.
1415 void TabFind::ApplyTabConstraints() {
1416  TabVector_IT it(&vectors_);
1417  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1418  TabVector *v = it.data();
1419  v->SetupConstraints();
1420  }
1421  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1422  TabVector *v = it.data();
1423  // With the first and last partner, we want a common bottom and top,
1424  // respectively, and for each change of partner, we want a common
1425  // top of first with bottom of next.
1426  v->SetupPartnerConstraints();
1427  }
1428  // TODO(rays) The back-to-back pairs should really be done like the
1429  // front-to-front pairs, but there is no convenient way of producing the
1430  // list of partners like there is with the front-to-front.
1431  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1432  TabVector *v = it.data();
1433  if (!v->IsRightTab()) {
1434  continue;
1435  }
1436  // For each back-to-back pair of vectors, try for common top and bottom.
1437  TabVector_IT partner_it(it);
1438  for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1439  TabVector *partner = partner_it.data();
1440  if (!partner->IsLeftTab() || !v->VOverlap(*partner)) {
1441  continue;
1442  }
1443  v->SetupPartnerConstraints(partner);
1444  }
1445  }
1446  // Now actually apply the constraints to get common start/end points.
1447  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1448  TabVector *v = it.data();
1449  if (!v->IsSeparator()) {
1450  v->ApplyConstraints();
1451  }
1452  }
1453  // TODO(rays) Where constraint application fails, it would be good to try
1454  // checking the ends to see if they really should be moved.
1455 }
1456 
1457 } // namespace tesseract.
#define BOOL_VAR(name, val, comment)
Definition: params.h:359
@ TBOX
const int kColumnWidthFactor
Definition: tabfind.h:41
const int kTabRadiusFactor
Definition: tabfind.cpp:35
@ BRT_VERT_TEXT
Definition: blobbox.h:81
@ BRT_UNKNOWN
Definition: blobbox.h:80
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:51
const int kMinVerticalSearch
Definition: tabfind.cpp:37
const int kRaggedGutterMultiple
Definition: tabfind.cpp:51
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const double kMaxGutterWidthAbsolute
Definition: tabfind.cpp:49
const double kCosMaxSkewAngle
Definition: tabfind.cpp:60
const int kMaxVerticalSearch
Definition: tabfind.cpp:38
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
const int kMaxRaggedSearch
Definition: tabfind.cpp:39
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
int textord_debug_tabfind
Definition: alignedblob.cpp:29
const double kMinFractionalLinesInColumn
Definition: tabfind.cpp:45
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:45
@ TA_RIGHT_RAGGED
Definition: tabvector.h:46
@ TA_LEFT_ALIGNED
Definition: tabvector.h:42
@ TA_LEFT_RAGGED
Definition: tabvector.h:43
const double kAlignedFraction
Definition: alignedblob.cpp:46
const int kMinLinesInColumn
Definition: tabfind.cpp:41
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116
const double kMinColumnWidth
const int kMinEvaluatedTabs
Definition: tabfind.cpp:56
@ TT_MAYBE_RAGGED
Definition: blobbox.h:64
@ TT_MAYBE_ALIGNED
Definition: blobbox.h:65
@ TT_CONFIRMED
Definition: blobbox.h:66
@ TT_NONE
Definition: blobbox.h:62
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:30
BlobRegionType region_type() const
Definition: blobbox.h:298
void set_left_rule(int new_left)
Definition: blobbox.h:331
const TBOX & bounding_box() const
Definition: blobbox.h:239
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:343
tesseract::ColPartition * owner() const
Definition: blobbox.h:367
BlobTextFlowType flow() const
Definition: blobbox.h:310
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:349
void set_right_rule(int new_right)
Definition: blobbox.h:337
bool joined_to_prev() const
Definition: blobbox.h:265
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:447
BLOBNBOX_LIST blobs
Definition: blobbox.h:776
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:779
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1058
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1050
void DeleteUnownedNoise()
Definition: blobbox.cpp:1024
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:780
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:778
integer coordinate
Definition: points.h:36
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
Definition: points.cpp:52
TDimension y() const
access_function
Definition: points.h:62
TDimension x() const
access function
Definition: points.h:58
void set_y(float yin)
rewrite function
Definition: points.h:217
void set_x(float xin)
rewrite function
Definition: points.h:213
float y() const
Definition: points.h:209
float x() const
Definition: points.h:206
TDimension left() const
Definition: rect.h:82
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
void set_right(int x)
Definition: rect.h:92
const ICOORD & botleft() const
Definition: rect.h:102
void set_left(int x)
Definition: rect.h:85
TDimension top() const
Definition: rect.h:68
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:69
void print() const
Definition: rect.h:289
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
const ICOORD & topright() const
Definition: rect.h:114
static bool WithinTestRegion(int detail_level, int x, int y)
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:802
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:788
int gridsize() const
Definition: bbgrid.h:63
ICOORD tright_
Definition: bbgrid.h:91
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:633
void Deskew(const FCOORD &deskew)
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1278
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:89
~TabFind() override
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1323
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:284
bool CommonWidth(int width)
Definition: tabfind.cpp:393
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:449
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:279
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:346
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:422
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
Definition: tabfind.cpp:206
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:462
static bool VeryDifferentSizes(int size1, int size2)
Definition: tabfind.cpp:413
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:128
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:486
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:113
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:345
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:137
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:512
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:302
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
Definition: tabfind.cpp:156
TabVector_LIST * vectors()
Definition: tabfind.h:167
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:347
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:495
int XAtY(int y) const
Definition: tabvector.h:181
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:274
bool IsSeparator() const
Definition: tabvector.h:213
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:352
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:200
int sort_key() const
Definition: tabvector.h:150
bool IsLeftTab() const
Definition: tabvector.h:205
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:540
int VOverlap(const TabVector &other) const
Definition: tabvector.h:191
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:289
void Pen(Color color)
Definition: scrollview.cpp:723
static void Update()
Definition: scrollview.cpp:713
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:589