tesseract  5.0.0
tesseract::TestableTableFinder Class Reference
Inheritance diagram for tesseract::TestableTableFinder:
tesseract::TableFinder

Public Member Functions

void ExpectPartition (const TBOX &box)
 
void ExpectPartitionCount (int expected_count)
 
bool GapInXProjection (int *xprojection, int length)
 
bool HasLeaderAdjacent (const ColPartition &part)
 
void InsertLeaderPartition (ColPartition *part)
 
void InsertTextPartition (ColPartition *part)
 
void set_global_median_blob_width (int width)
 
void set_global_median_ledding (int ledding)
 
void set_global_median_xheight (int xheight)
 
void SplitAndInsertFragmentedTextPartition (ColPartition *part)
 
- Public Member Functions inherited from tesseract::TableFinder
 TableFinder ()
 
 ~TableFinder ()
 
void set_resolution (int resolution)
 
void set_left_to_right_language (bool order)
 
void Init (int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
 
void InsertCleanPartitions (ColPartitionGrid *grid, TO_BLOCK *block)
 
void LocateTables (ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb, const FCOORD &reskew)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::TableFinder
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void InsertTextPartition (ColPartition *part)
 
void InsertFragmentedTextPartition (ColPartition *part)
 
void InsertLeaderPartition (ColPartition *part)
 
void InsertRulingPartition (ColPartition *part)
 
void InsertImagePartition (ColPartition *part)
 
void SplitAndInsertFragmentedTextPartition (ColPartition *part)
 
bool AllowTextPartition (const ColPartition &part) const
 
bool AllowBlob (const BLOBNBOX &blob) const
 
void MoveColSegmentsToGrid (ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
 
void InitializePartitions (ColPartitionSet **all_columns)
 
void SetVerticalSpacing (ColPartition *part)
 
void SetGlobalSpacings (ColPartitionGrid *grid)
 
void set_global_median_xheight (int xheight)
 
void set_global_median_blob_width (int width)
 
void set_global_median_ledding (int ledding)
 
void FindNeighbors ()
 
void MarkTablePartitions ()
 
void MarkPartitionsUsingLocalInformation ()
 
bool HasWideOrNoInterWordGap (ColPartition *part) const
 
bool HasLeaderAdjacent (const ColPartition &part)
 
void FilterFalseAlarms ()
 
void FilterParagraphEndings ()
 
void FilterHeaderAndFooter ()
 
void SmoothTablePartitionRuns ()
 
void GetColumnBlocks (ColPartitionSet **columns, ColSegment_LIST *col_segments)
 
void GroupColumnBlocks (ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
 
bool ConsecutiveBoxes (const TBOX &b1, const TBOX &b2)
 
void SetColumnsType (ColSegment_LIST *col_segments)
 
void GridMergeColumnBlocks ()
 
void GetTableColumns (ColSegment_LIST *table_columns)
 
void GetTableRegions (ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
 
void GridMergeTableRegions ()
 
bool BelongToOneTable (const TBOX &box1, const TBOX &box2)
 
void AdjustTableBoundaries ()
 
void GrowTableBox (const TBOX &table_box, TBOX *result_box)
 
void GrowTableToIncludePartials (const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
void GrowTableToIncludeLines (const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
bool HLineBelongsToTable (const ColPartition &part, const TBOX &table_box)
 
void IncludeLeftOutColumnHeaders (TBOX *table_box)
 
void DeleteSingleColumnTables ()
 
bool GapInXProjection (int *xprojection, int length)
 
void RecognizeTables ()
 
void DisplayColSegments (ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
 
void DisplayColPartitions (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
 
void DisplayColPartitions (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
void DisplayColPartitionConnections (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
void MakeTableBlocks (ColPartitionGrid *grid, ColPartitionSet **columns, const WidthCallback &width_cb)
 
- Static Protected Member Functions inherited from tesseract::TableFinder
static void SetPartitionSpacings (ColPartitionGrid *grid, ColPartitionSet **all_columns)
 
- Protected Attributes inherited from tesseract::TableFinder
int resolution_
 
int global_median_xheight_
 
int global_median_blob_width_
 
int global_median_ledding_
 
ColPartitionGrid clean_part_grid_
 
ColPartitionGrid leader_and_ruling_grid_
 
ColPartitionGrid fragmented_text_grid_
 
ColSegmentGrid col_seg_grid_
 
ColSegmentGrid table_grid_
 
bool left_to_right_language_
 

Detailed Description

Definition at line 22 of file tablefind_test.cc.

Member Function Documentation

◆ ExpectPartition()

void tesseract::TestableTableFinder::ExpectPartition ( const TBOX box)
inline

Definition at line 33 of file tablefind_test.cc.

33  {
35  gsearch.SetUniqueMode(true);
36  gsearch.StartFullSearch();
37  ColPartition *part = nullptr;
38  bool found = false;
39  while ((part = gsearch.NextFullSearch()) != nullptr) {
40  if (part->bounding_box().left() == box.left() &&
41  part->bounding_box().bottom() == box.bottom() &&
42  part->bounding_box().right() == box.right() && part->bounding_box().top() == box.top()) {
43  found = true;
44  }
45  }
46  EXPECT_TRUE(found);
47  }
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:401

◆ ExpectPartitionCount()

void tesseract::TestableTableFinder::ExpectPartitionCount ( int  expected_count)
inline

Definition at line 48 of file tablefind_test.cc.

48  {
50  gsearch.SetUniqueMode(true);
51  gsearch.StartFullSearch();
52  ColPartition *part = nullptr;
53  int count = 0;
54  while ((part = gsearch.NextFullSearch()) != nullptr) {
55  ++count;
56  }
57  EXPECT_EQ(expected_count, count);
58  }

◆ GapInXProjection()

bool tesseract::TableFinder::GapInXProjection

Definition at line 345 of file tablefind.cpp.

1838  {
1839  // Find peak value of the histogram
1840  int peak_value = 0;
1841  for (int i = 0; i < length; i++) {
1842  if (xprojection[i] > peak_value) {
1843  peak_value = xprojection[i];
1844  }
1845  }
1846  // Peak value represents the maximum number of horizontally
1847  // overlapping colpartitions, so this can be considered as the
1848  // number of rows in the table
1849  if (peak_value < kMinRowsInTable) {
1850  return false;
1851  }
1852  double projection_threshold = kSmallTableProjectionThreshold * peak_value;
1853  if (peak_value >= kLargeTableRowCount) {
1854  projection_threshold = kLargeTableProjectionThreshold * peak_value;
1855  }
1856  // Threshold the histogram
1857  for (int i = 0; i < length; i++) {
1858  xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
1859  }
1860  // Find the largest run of zeros between two ones
1861  int largest_gap = 0;
1862  int run_start = -1;
1863  for (int i = 1; i < length; i++) {
1864  // detect start of a run of zeros
1865  if (xprojection[i - 1] && !xprojection[i]) {
1866  run_start = i;
1867  }
1868  // detect end of a run of zeros and update the value of largest gap
1869  if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
1870  int gap = i - run_start;
1871  if (gap > largest_gap) {
1872  largest_gap = gap;
1873  }
1874  run_start = -1;
1875  }
1876  }
1877  return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
1878 }
const double kLargeTableProjectionThreshold
Definition: tablefind.cpp:107
const int kMinRowsInTable
Definition: tablefind.cpp:112
const int kLargeTableRowCount
Definition: tablefind.cpp:109
const double kSmallTableProjectionThreshold
Definition: tablefind.cpp:106
const double kMaxXProjectionGapFactor
Definition: tablefind.cpp:136

◆ HasLeaderAdjacent()

bool tesseract::TableFinder::HasLeaderAdjacent

Definition at line 244 of file tablefind.cpp.

969  {
970  if (part.flow() == BTFT_LEADER) {
971  return true;
972  }
973  // Search range is left and right bounded by an offset of the
974  // median xheight. This offset is to allow some tolerance to the
975  // the leaders on the page in the event that the alignment is still
976  // a bit off.
977  const TBOX &box = part.bounding_box();
978  const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_;
979  const int top = box.top() + search_size;
980  const int bottom = box.bottom() - search_size;
982  for (int direction = 0; direction < 2; ++direction) {
983  bool right_to_left = (direction == 0);
984  int x = right_to_left ? box.right() : box.left();
985  hsearch.StartSideSearch(x, bottom, top);
986  ColPartition *leader = nullptr;
987  while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) {
988  // The leader could be a horizontal ruling in the grid.
989  // Make sure it is actually a leader.
990  if (leader->flow() != BTFT_LEADER) {
991  continue;
992  }
993  // This should not happen, they are in different grids.
994  ASSERT_HOST(&part != leader);
995  // Make sure the leader shares a page column with the partition,
996  // otherwise we are spreading across columns.
997  if (!part.IsInSameColumnAs(*leader)) {
998  break;
999  }
1000  // There should be a significant vertical overlap
1001  if (!leader->VSignificantCoreOverlap(part)) {
1002  continue;
1003  }
1004  // Leader passed all tests, so it is adjacent.
1005  return true;
1006  }
1007  }
1008  // No leaders are adjacent to the given partition.
1009  return false;
1010 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
@ TBOX
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
const int kAdjacentLeaderSearchPadding
Definition: tablefind.cpp:117
@ BTFT_LEADER
Definition: blobbox.h:117
ColPartitionGrid leader_and_ruling_grid_
Definition: tablefind.h:397

◆ InsertLeaderPartition()

void tesseract::TableFinder::InsertLeaderPartition

Definition at line 169 of file tablefind.cpp.

411  {
412  ASSERT_HOST(part != nullptr);
413  if (!part->IsEmpty() && part->bounding_box().area() > 0) {
414  leader_and_ruling_grid_.InsertBBox(true, true, part);
415  } else {
416  delete part;
417  }
418 }
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529

◆ InsertTextPartition()

void tesseract::TableFinder::InsertTextPartition

Definition at line 167 of file tablefind.cpp.

395  {
396  ASSERT_HOST(part != nullptr);
397  if (AllowTextPartition(*part)) {
398  clean_part_grid_.InsertBBox(true, true, part);
399  } else {
400  delete part;
401  }
402 }
ColPartitionGrid clean_part_grid_
Definition: tablefind.h:395
bool AllowTextPartition(const ColPartition &part) const
Definition: tablefind.cpp:490

◆ set_global_median_blob_width()

void tesseract::TableFinder::set_global_median_blob_width

Definition at line 214 of file tablefind.cpp.

766  {
768 }

◆ set_global_median_ledding()

void tesseract::TableFinder::set_global_median_ledding

Definition at line 219 of file tablefind.cpp.

769  {
770  global_median_ledding_ = ledding;
771 }

◆ set_global_median_xheight()

void tesseract::TableFinder::set_global_median_xheight

Definition at line 211 of file tablefind.cpp.

763  {
764  global_median_xheight_ = xheight;
765 }

◆ SplitAndInsertFragmentedTextPartition()

void tesseract::TableFinder::SplitAndInsertFragmentedTextPartition

Definition at line 172 of file tablefind.cpp.

437  {
438  ASSERT_HOST(part != nullptr);
439  // Bye bye empty partitions!
440  if (part->boxes()->empty()) {
441  delete part;
442  return;
443  }
444 
445  // The AllowBlob function prevents this.
446  ASSERT_HOST(part->median_width() > 0);
447  const double kThreshold = part->median_width() * kSplitPartitionSize;
448 
449  ColPartition *right_part = part;
450  bool found_split = true;
451  while (found_split) {
452  found_split = false;
453  BLOBNBOX_C_IT box_it(right_part->boxes());
454  // Blobs are sorted left side first. If blobs overlap,
455  // the previous blob may have a "more right" right side.
456  // Account for this by always keeping the largest "right"
457  // so far.
458  int previous_right = INT32_MIN;
459 
460  // Look for the next split in the partition.
461  for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
462  const TBOX &box = box_it.data()->bounding_box();
463  if (previous_right != INT32_MIN &&
464  box.left() - previous_right > kThreshold) {
465  // We have a split position. Split the partition in two pieces.
466  // Insert the left piece in the grid and keep processing the right.
467  int mid_x = (box.left() + previous_right) / 2;
468  ColPartition *left_part = right_part;
469  right_part = left_part->SplitAt(mid_x);
470 
472  found_split = true;
473  break;
474  }
475 
476  // The right side of the previous blobs.
477  previous_right = std::max(previous_right, static_cast<int>(box.right()));
478  }
479  }
480  // When a split is not found, the right part is minimized
481  // as much as possible, so process it.
482  InsertFragmentedTextPartition(right_part);
483 }
const double kSplitPartitionSize
Definition: tablefind.cpp:44
void InsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:403

The documentation for this class was generated from the following file: