tesseract  5.0.0
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallback WidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Image pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 52 of file tabfind.h.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 65 of file tabfind.cpp.

68  , resolution_(resolution)
69  , image_origin_(0, tright.y() - 1)
70  , v_it_(&vectors_) {
71  width_cb_ = nullptr;
72  v_it_.add_list_after(vlines);
73  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
74  using namespace std::placeholders; // for _1
75  width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
76 }
TDimension y() const
access_function
Definition: points.h:62
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
bool CommonWidth(int width)
Definition: tabfind.cpp:393
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:346

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
overridedefault

Member Function Documentation

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 393 of file tabfind.cpp.

393  {
394  width /= kColumnWidthFactor;
395  ICOORDELT_IT it(&column_widths_);
396  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
397  ICOORDELT *w = it.data();
398  if (w->x() - 1 <= width && width <= w->y() + 1) {
399  return true;
400  }
401  }
402  return false;
403 }
const int kColumnWidthFactor
Definition: tabfind.h:41

◆ dead_vectors()

TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 170 of file tabfind.h.

170  {
171  return &dead_vectors_;
172  }

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 407 of file tabfind.cpp.

407  {
408  return size1 > size2 * 2 || size2 > size1 * 2;
409 }

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 495 of file tabfind.cpp.

495  {
496  // For every vector, display it.
497  TabVector_IT it(&vectors_);
498  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
499  TabVector *vector = it.data();
500  vector->Display(tab_win);
501  }
502  tab_win->Update();
503  return tab_win;
504 }

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 449 of file tabfind.cpp.

450  {
451  InsertBlobsToGrid(false, false, image_blobs, this);
452  InsertBlobsToGrid(true, false, &block->blobs, this);
453  deskew->set_x(1.0f);
454  deskew->set_y(0.0f);
455  reskew->set_x(1.0f);
456  reskew->set_y(0.0f);
457 }
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:89

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 512 of file tabfind.cpp.

513  {
514 #ifndef GRAPHICS_DISABLED
515  if (textord_tabfind_show_initialtabs) {
516  ScrollView *line_win = MakeWindow(0, 0, "VerticalLines");
517  line_win = DisplayTabVectors(line_win);
518  }
519 #endif
520  // Prepare the grid.
521  if (image_blobs != nullptr) {
522  InsertBlobsToGrid(true, false, image_blobs, this);
523  }
524  InsertBlobsToGrid(true, false, &block->blobs, this);
525  ScrollView *initial_win = FindTabBoxes(min_gutter_width, tabfind_aligned_gap_fraction);
526  FindAllTabVectors(min_gutter_width);
527 
529  SortVectors();
530  EvaluateTabs();
531 #ifndef GRAPHICS_DISABLED
532  if (textord_tabfind_show_initialtabs && initial_win != nullptr) {
533  initial_win = DisplayTabVectors(initial_win);
534  }
535 #endif
536  MarkVerticalText();
537  return initial_win;
538 }
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:633
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:345
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:495
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:352

◆ FindTabVectors()

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 422 of file tabfind.cpp.

424  {
425  ScrollView *tab_win =
426  FindInitialTabVectors(image_blobs, min_gutter_width, tabfind_aligned_gap_fraction, block);
427  ComputeColumnWidths(tab_win, part_grid);
429  SortVectors();
430  CleanupTabs();
431  if (!Deskew(hlines, image_blobs, block, deskew, reskew)) {
432  return false; // Skew angle is too large.
433  }
434  part_grid->Deskew(*deskew);
435  ApplyTabConstraints();
436 #ifndef GRAPHICS_DISABLED
437  if (textord_tabfind_show_finaltabs) {
438  tab_win = MakeWindow(640, 50, "FinalTabs");
439  DisplayBoxes(tab_win);
440  DisplayTabs("FinalTabs", tab_win);
441  tab_win = DisplayTabVectors(tab_win);
442  }
443 #endif // !GRAPHICS_DISABLED
444  return true;
445 }
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:512

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 156 of file tabfind.cpp.

157  {
158  bool right_to_left = v.IsLeftTab();
159  int bottom_x = v.XAtY(bottom_y);
160  int top_x = v.XAtY(top_y);
161  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
162  BlobGridSearch sidesearch(this);
163  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
164  int min_gap = max_gutter_width;
165  *required_shift = 0;
166  BLOBNBOX *blob = nullptr;
167  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
168  const TBOX &box = blob->bounding_box();
169  if (box.bottom() >= top_y || box.top() <= bottom_y) {
170  continue; // Doesn't overlap enough.
171  }
172  if (box.height() >= gridsize() * 2 && box.height() > box.width() * kLineFragmentAspectRatio) {
173  // Skip likely separator line residue.
174  continue;
175  }
176  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type())) {
177  continue; // Skip non-text if required.
178  }
179  int mid_y = (box.bottom() + box.top()) / 2;
180  // We use the x at the mid-y so that the required_shift guarantees
181  // to clear all the blobs on the tab-stop. If we use the min/max
182  // of x at top/bottom of the blob, then exactness would be required,
183  // which is not a good thing.
184  int tab_x = v.XAtY(mid_y);
185  int gap;
186  if (right_to_left) {
187  gap = tab_x - box.right();
188  if (gap < 0 && box.left() - tab_x < *required_shift) {
189  *required_shift = box.left() - tab_x;
190  }
191  } else {
192  gap = box.left() - tab_x;
193  if (gap < 0 && box.right() - tab_x > *required_shift) {
194  *required_shift = box.right() - tab_x;
195  }
196  }
197  if (gap > 0 && gap < min_gap) {
198  min_gap = gap;
199  }
200  }
201  // Result may be negative, in which case, this is a really bad tabstop.
202  return min_gap - abs(*required_shift);
203 }
@ TBOX
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:30
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:447

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 206 of file tabfind.cpp.

207  {
208  const TBOX &box = bbox->bounding_box();
209  // The gutter and internal sides of the box.
210  int gutter_x = left ? box.left() : box.right();
211  int internal_x = left ? box.right() : box.left();
212  // On ragged edges, the gutter side of the box is away from the tabstop.
213  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
214  *gutter_width = max_gutter;
215  // If the box is away from the tabstop, we need to increase
216  // the allowed gutter width.
217  if (tab_gap > 0) {
218  *gutter_width += tab_gap;
219  }
220  bool debug = WithinTestRegion(2, box.left(), box.bottom());
221  if (debug) {
222  tprintf("Looking in gutter\n");
223  }
224  // Find the nearest blob on the outside of the column.
225  BLOBNBOX *gutter_bbox = AdjacentBlob(bbox, left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
226  *gutter_width, box.top(), box.bottom());
227  if (gutter_bbox != nullptr) {
228  const TBOX &gutter_box = gutter_bbox->bounding_box();
229  *gutter_width = left ? tab_x - gutter_box.right() : gutter_box.left() - tab_x;
230  }
231  if (*gutter_width >= max_gutter) {
232  // If there is no box because a tab was in the way, get the tab coord.
233  TBOX gutter_box(box);
234  if (left) {
235  gutter_box.set_left(tab_x - max_gutter - 1);
236  gutter_box.set_right(tab_x - max_gutter);
237  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
238  if (tab_gutter < tab_x - 1) {
239  *gutter_width = tab_x - tab_gutter;
240  }
241  } else {
242  gutter_box.set_left(tab_x + max_gutter);
243  gutter_box.set_right(tab_x + max_gutter + 1);
244  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
245  if (tab_gutter > tab_x + 1) {
246  *gutter_width = tab_gutter - tab_x;
247  }
248  }
249  }
250  if (*gutter_width > max_gutter) {
251  *gutter_width = max_gutter;
252  }
253  // Now look for a neighbour on the inside.
254  if (debug) {
255  tprintf("Looking for neighbour\n");
256  }
257  BLOBNBOX *neighbour = AdjacentBlob(bbox, !left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
258  *gutter_width, box.top(), box.bottom());
259  int neighbour_edge = left ? RightEdgeForBox(box, true, false) : LeftEdgeForBox(box, true, false);
260  if (neighbour != nullptr) {
261  const TBOX &n_box = neighbour->bounding_box();
262  if (debug) {
263  tprintf("Found neighbour:");
264  n_box.print();
265  }
266  if (left && n_box.left() < neighbour_edge) {
267  neighbour_edge = n_box.left();
268  } else if (!left && n_box.right() > neighbour_edge) {
269  neighbour_edge = n_box.right();
270  }
271  }
272  *neighbour_gap = left ? neighbour_edge - internal_x : internal_x - neighbour_edge;
273 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116
static bool WithinTestRegion(int detail_level, int x, int y)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:284
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:279

◆ image_origin()

const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 159 of file tabfind.h.

159  {
160  return image_origin_;
161  }

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 113 of file tabfind.cpp.

114  {
115  TBOX box = blob->bounding_box();
116  blob->set_left_rule(LeftEdgeForBox(box, false, false));
117  blob->set_right_rule(RightEdgeForBox(box, false, false));
118  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
119  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
120  if (blob->joined_to_prev()) {
121  return false;
122  }
123  grid->InsertBBox(h_spread, v_spread, blob);
124  return true;
125 }
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 89 of file tabfind.cpp.

90  {
91  BLOBNBOX_IT blob_it(blobs);
92  int b_count = 0;
93  int reject_count = 0;
94  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
95  BLOBNBOX *blob = blob_it.data();
96  // if (InsertBlob(true, true, blob, grid)) {
97  if (InsertBlob(h_spread, v_spread, blob, grid)) {
98  ++b_count;
99  } else {
100  ++reject_count;
101  }
102  }
103  if (textord_debug_tabfind) {
104  tprintf("Inserted %d blobs into grid, %d rejected.\n", b_count, reject_count);
105  }
106 }
int textord_debug_tabfind
Definition: alignedblob.cpp:29
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:113

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 284 of file tabfind.cpp.

284  {
285  TabVector *v = LeftTabForBox(box, crossing, extended);
286  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
287 }
TDimension x() const
access function
Definition: points.h:58
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:347

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 347 of file tabfind.cpp.

347  {
348  if (v_it_.empty()) {
349  return nullptr;
350  }
351  int top_y = box.top();
352  int bottom_y = box.bottom();
353  int mid_y = (top_y + bottom_y) / 2;
354  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
355  int min_key, max_key;
356  SetupTabSearch(left, mid_y, &min_key, &max_key);
357  // Position the iterator at the last TabVector with sort_key <= max_key.
358  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) {
359  v_it_.forward();
360  }
361  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362  v_it_.backward();
363  }
364  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365  TabVector *best_v = nullptr;
366  int best_x = -1;
367  int key_limit = -1;
368  do {
369  TabVector *v = v_it_.data();
370  int x = v->XAtY(mid_y);
371  if (x <= left && (v->VOverlap(top_y, bottom_y) > 0 ||
372  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
373  if (best_v == nullptr || x > best_x) {
374  best_v = v;
375  best_x = x;
376  // We can guarantee that no better vector can be found if the
377  // sort key is less than that of the best by max_key - min_key.
378  key_limit = v->sort_key() - (max_key - min_key);
379  }
380  }
381  // Break when the search is done to avoid wrapping the iterator and
382  // thereby potentially slowing the next search.
383  if (v_it_.at_first() || (best_v != nullptr && v->sort_key() < key_limit)) {
384  break; // Prevent restarting list for next call.
385  }
386  v_it_.backward();
387  } while (!v_it_.at_last());
388  return best_v;
389 }
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:486

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1380 of file tabfind.cpp.

1380  {
1381  TabVector_LIST temp_list;
1382  TabVector_IT temp_it(&temp_list);
1383  v_it_.move_to_first();
1384  // The TabVector list only contains vertical lines, but they need to be
1385  // reflected and the list needs to be reversed, so they are still in
1386  // sort_key order.
1387  while (!v_it_.empty()) {
1388  TabVector *v = v_it_.extract();
1389  v_it_.forward();
1390  v->ReflectInYAxis();
1391  temp_it.add_before_then_move(v);
1392  }
1393  v_it_.add_list_after(&temp_list);
1394  v_it_.move_to_first();
1395  // Reset this grid with reflected bounding boxes.
1396  TBOX grid_box(bleft(), tright());
1397  int tmp = grid_box.left();
1398  grid_box.set_left(-grid_box.right());
1399  grid_box.set_right(-tmp);
1400  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1401 }
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

Definition at line 1368 of file tabfind.cpp.

1368  {
1369  v_it_.move_to_first();
1370  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1371  if (!v_it_.data()->IsSeparator()) {
1372  delete v_it_.extract();
1373  }
1374  }
1375  Clear();
1376 }

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1323 of file tabfind.cpp.

1324  {
1325  // Rotate the horizontal and vertical vectors and swap them over.
1326  // Only the separators are kept and rotated; other tabs are used
1327  // to estimate the gutter width then thrown away.
1328  TabVector_LIST ex_verticals;
1329  TabVector_IT ex_v_it(&ex_verticals);
1330  TabVector_LIST vlines;
1331  TabVector_IT v_it(&vlines);
1332  while (!v_it_.empty()) {
1333  TabVector *v = v_it_.extract();
1334  if (v->IsSeparator()) {
1335  v->Rotate(rotate);
1336  ex_v_it.add_after_then_move(v);
1337  } else {
1338  v_it.add_after_then_move(v);
1339  }
1340  v_it_.forward();
1341  }
1342 
1343  // Adjust the min gutter width for better tabbox selection
1344  // in 2nd call to FindInitialTabVectors().
1345  int median_gutter = FindMedianGutterWidth(&vlines);
1346  if (median_gutter > *min_gutter_width) {
1347  *min_gutter_width = median_gutter;
1348  }
1349 
1350  TabVector_IT h_it(horizontal_lines);
1351  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1352  TabVector *h = h_it.data();
1353  h->Rotate(rotate);
1354  }
1355  v_it_.add_list_after(horizontal_lines);
1356  v_it_.move_to_first();
1357  h_it.set_to_list(horizontal_lines);
1358  h_it.add_list_after(&ex_verticals);
1359 
1360  // Rebuild the grid to the new size.
1361  TBOX grid_box(bleft(), tright());
1362  grid_box.rotate_large(rotate);
1363  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1364 }

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 279 of file tabfind.cpp.

279  {
280  TabVector *v = RightTabForBox(box, crossing, extended);
281  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
282 }
ICOORD tright_
Definition: bbgrid.h:91
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:302

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return nullptr. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 302 of file tabfind.cpp.

302  {
303  if (v_it_.empty()) {
304  return nullptr;
305  }
306  int top_y = box.top();
307  int bottom_y = box.bottom();
308  int mid_y = (top_y + bottom_y) / 2;
309  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
310  int min_key, max_key;
311  SetupTabSearch(right, mid_y, &min_key, &max_key);
312  // Position the iterator at the first TabVector with sort_key >= min_key.
313  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) {
314  v_it_.backward();
315  }
316  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) {
317  v_it_.forward();
318  }
319  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320  TabVector *best_v = nullptr;
321  int best_x = -1;
322  int key_limit = -1;
323  do {
324  TabVector *v = v_it_.data();
325  int x = v->XAtY(mid_y);
326  if (x >= right && (v->VOverlap(top_y, bottom_y) > 0 ||
327  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
328  if (best_v == nullptr || x < best_x) {
329  best_v = v;
330  best_x = x;
331  // We can guarantee that no better vector can be found if the
332  // sort key exceeds that of the best by max_key - min_key.
333  key_limit = v->sort_key() + max_key - min_key;
334  }
335  }
336  // Break when the search is done to avoid wrapping the iterator and
337  // thereby potentially slowing the next search.
338  if (v_it_.at_last() || (best_v != nullptr && v->sort_key() > key_limit)) {
339  break; // Prevent restarting list for next call.
340  }
341  v_it_.forward();
342  } while (!v_it_.at_first());
343  return best_v;
344 }

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1278 of file tabfind.cpp.

1278  {
1279  BLOBNBOX_IT it(blobs);
1280  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1281  it.data()->rotate_box(rotation);
1282  }
1283 }

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 137 of file tabfind.cpp.

137  {
138  BLOBNBOX_IT blob_it(blobs);
139  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
140  BLOBNBOX *blob = blob_it.data();
141  TBOX box = blob->bounding_box();
142  blob->set_left_rule(LeftEdgeForBox(box, false, false));
143  blob->set_right_rule(RightEdgeForBox(box, false, false));
144  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
145  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
146  }
147 }

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 128 of file tabfind.cpp.

128  {
129  SetBlobRuleEdges(&block->blobs);
130  SetBlobRuleEdges(&block->small_blobs);
131  SetBlobRuleEdges(&block->noise_blobs);
132  SetBlobRuleEdges(&block->large_blobs);
133 }
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:137

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 486 of file tabfind.cpp.

486  {
487  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
488  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
489  *min_key = std::min(key1, key2);
490  *max_key = std::max(key1, key2);
491 }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 462 of file tabfind.cpp.

462  {
463  BLOBNBOX_IT large_it = &block->large_blobs;
464  BLOBNBOX_IT blob_it = &block->blobs;
465  int b_count = 0;
466  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
467  BLOBNBOX *large_blob = large_it.data();
468  if (large_blob->owner() != nullptr) {
469  blob_it.add_to_end(large_it.extract());
470  ++b_count;
471  }
472  }
473  if (textord_debug_tabfind) {
474  tprintf("Moved %d large blobs to normal list\n", b_count);
475 #ifndef GRAPHICS_DISABLED
476  ScrollView *rej_win = MakeWindow(500, 300, "Image blobs");
477  block->plot_graded_blobs(rej_win);
478  block->plot_noise_blobs(rej_win);
479  rej_win->Update();
480 #endif // !GRAPHICS_DISABLED
481  }
482  block->DeleteUnownedNoise();
483 }

◆ vectors()

TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 167 of file tabfind.h.

167  {
168  return &vectors_;
169  }

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 413 of file tabfind.cpp.

413  {
414  return size1 > size2 * 5 || size2 > size1 * 5;
415 }

◆ WidthCB()

WidthCallback tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 152 of file tabfind.h.

152  {
153  return width_cb_;
154  }

Member Data Documentation

◆ resolution_

int tesseract::TabFind::resolution_
protected

Of source image in pixels per inch.

Definition at line 346 of file tabfind.h.

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

Estimate of true vertical in this image.

Definition at line 345 of file tabfind.h.


The documentation for this class was generated from the following files: