tesseract  5.0.0
tesseract::StructuredTable Class Reference

#include <tablerecog.h>

Inheritance diagram for tesseract::StructuredTable:
tesseract::TestableStructuredTable

Public Member Functions

 StructuredTable ()
 
 ~StructuredTable ()=default
 
void Init ()
 
void set_text_grid (ColPartitionGrid *text)
 
void set_line_grid (ColPartitionGrid *lines)
 
void set_max_text_height (int height)
 
bool is_lined () const
 
unsigned row_count () const
 
unsigned column_count () const
 
unsigned cell_count () const
 
void set_bounding_box (const TBOX &box)
 
const TBOXbounding_box () const
 
int median_cell_height ()
 
int median_cell_width ()
 
int row_height (unsigned row) const
 
int column_width (unsigned column) const
 
int space_above () const
 
int space_below () const
 
bool FindLinedStructure ()
 
bool FindWhitespacedStructure ()
 
bool DoesPartitionFit (const ColPartition &part) const
 
int CountFilledCells ()
 
int CountFilledCellsInRow (int row)
 
int CountFilledCellsInColumn (int column)
 
int CountFilledCells (unsigned row_start, unsigned row_end, unsigned column_start, unsigned column_end)
 
bool VerifyRowFilled (int row)
 
double CalculateCellFilledPercentage (unsigned row, unsigned column)
 
void Display (ScrollView *window, ScrollView::Color color)
 

Protected Member Functions

void ClearStructure ()
 
bool VerifyLinedTableCells ()
 
bool VerifyWhitespacedTable ()
 
void FindWhitespacedColumns ()
 
void FindWhitespacedRows ()
 
void CalculateMargins ()
 
void UpdateMargins (ColPartitionGrid *grid)
 
int FindVerticalMargin (ColPartitionGrid *grid, int start_x, bool decrease) const
 
int FindHorizontalMargin (ColPartitionGrid *grid, int start_y, bool decrease) const
 
void CalculateStats ()
 
void AbsorbNearbyLines ()
 
int CountVerticalIntersections (int x)
 
int CountHorizontalIntersections (int y)
 
int CountPartitions (const TBOX &box)
 

Static Protected Member Functions

static void FindCellSplitLocations (const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
 

Protected Attributes

ColPartitionGridtext_grid_
 
ColPartitionGridline_grid_
 
TBOX bounding_box_
 
std::vector< int > cell_x_
 
std::vector< int > cell_y_
 
bool is_lined_
 
int space_above_
 
int space_below_
 
int space_left_
 
int space_right_
 
int median_cell_height_
 
int median_cell_width_
 
int max_text_height_
 

Detailed Description

Definition at line 70 of file tablerecog.h.

Constructor & Destructor Documentation

◆ StructuredTable()

tesseract::StructuredTable::StructuredTable ( )

Definition at line 66 of file tablerecog.cpp.

67  : text_grid_(nullptr)
68  , line_grid_(nullptr)
69  , is_lined_(false)
70  , space_above_(0)
71  , space_below_(0)
72  , space_left_(0)
73  , space_right_(0)
76  , max_text_height_(INT32_MAX) {}
ColPartitionGrid * text_grid_
Definition: tablerecog.h:231
ColPartitionGrid * line_grid_
Definition: tablerecog.h:232

◆ ~StructuredTable()

tesseract::StructuredTable::~StructuredTable ( )
default

Member Function Documentation

◆ AbsorbNearbyLines()

void tesseract::StructuredTable::AbsorbNearbyLines ( )
protected

Definition at line 551 of file tablerecog.cpp.

551  {
553  gsearch.SetUniqueMode(true);
554 
555  // Is the closest line above good? Loop multiple times for tables with
556  // multi-line (sometimes 2) borders. Limit the number of lines by
557  // making sure they stay within a table cell or so.
558  ColPartition *line = nullptr;
559  gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), bounding_box_.top());
560  while ((line = gsearch.NextVerticalSearch(false)) != nullptr) {
561  if (!line->IsHorizontalLine()) {
562  break;
563  }
564  TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1, bounding_box_.right(),
565  line->MidY());
566  if (text_search.height() > median_cell_height_ * 2) {
567  break;
568  }
569  if (CountPartitions(text_search) > 0) {
570  break;
571  }
572  bounding_box_.set_top(line->MidY());
573  }
574  // As above, is the closest line below good?
575  line = nullptr;
576  gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), bounding_box_.bottom());
577  while ((line = gsearch.NextVerticalSearch(true)) != nullptr) {
578  if (!line->IsHorizontalLine()) {
579  break;
580  }
581  TBOX text_search(bounding_box_.left(), line->MidY(), bounding_box_.right(),
582  bounding_box_.bottom() - 1);
583  if (text_search.height() > median_cell_height_ * 2) {
584  break;
585  }
586  if (CountPartitions(text_search) > 0) {
587  break;
588  }
589  bounding_box_.set_bottom(line->MidY());
590  }
591  // TODO(nbeato): vertical lines
592 }
@ TBOX
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
void set_bottom(int y)
Definition: rect.h:78
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
void set_top(int y)
Definition: rect.h:71
int CountPartitions(const TBOX &box)
Definition: tablerecog.cpp:705

◆ bounding_box()

const TBOX & tesseract::StructuredTable::bounding_box ( ) const

Definition at line 104 of file tablerecog.cpp.

104  {
105  return bounding_box_;
106 }

◆ CalculateCellFilledPercentage()

double tesseract::StructuredTable::CalculateCellFilledPercentage ( unsigned  row,
unsigned  column 
)

Definition at line 272 of file tablerecog.cpp.

272  {
273  ASSERT_HOST(row <= row_count());
274  ASSERT_HOST(column <= column_count());
275  const TBOX kCellBox(cell_x_[column], cell_y_[row], cell_x_[column + 1], cell_y_[row + 1]);
276  ASSERT_HOST(!kCellBox.null_box());
277 
279  gsearch.SetUniqueMode(true);
280  gsearch.StartRectSearch(kCellBox);
281  double area_covered = 0;
282  ColPartition *text = nullptr;
283  while ((text = gsearch.NextRectSearch()) != nullptr) {
284  if (text->IsTextType()) {
285  area_covered += text->bounding_box().intersection(kCellBox).area();
286  }
287  }
288  const int32_t current_area = kCellBox.area();
289  if (current_area == 0) {
290  return 1.0;
291  }
292  return std::min(1.0, area_covered / current_area);
293 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
std::vector< int > cell_y_
Definition: tablerecog.h:238
unsigned column_count() const
Definition: tablerecog.cpp:95
std::vector< int > cell_x_
Definition: tablerecog.h:237
unsigned row_count() const
Definition: tablerecog.cpp:92

◆ CalculateMargins()

void tesseract::StructuredTable::CalculateMargins ( )
protected

Definition at line 474 of file tablerecog.cpp.

474  {
475  space_above_ = INT32_MAX;
476  space_below_ = INT32_MAX;
477  space_right_ = INT32_MAX;
478  space_left_ = INT32_MAX;
481 }
void UpdateMargins(ColPartitionGrid *grid)
Definition: tablerecog.cpp:484

◆ CalculateStats()

void tesseract::StructuredTable::CalculateStats ( )
protected

Definition at line 529 of file tablerecog.cpp.

529  {
530  const int kMaxCellHeight = 1000;
531  const int kMaxCellWidth = 1000;
532  STATS height_stats(0, kMaxCellHeight + 1);
533  STATS width_stats(0, kMaxCellWidth + 1);
534 
535  for (unsigned i = 0; i < row_count(); ++i) {
536  height_stats.add(row_height(i), column_count());
537  }
538  for (unsigned i = 0; i < column_count(); ++i) {
539  width_stats.add(column_width(i), row_count());
540  }
541 
542  median_cell_height_ = static_cast<int>(height_stats.median() + 0.5);
543  median_cell_width_ = static_cast<int>(width_stats.median() + 0.5);
544 }
int row_height(unsigned row) const
Definition: tablerecog.cpp:113
int column_width(unsigned column) const
Definition: tablerecog.cpp:117

◆ cell_count()

unsigned tesseract::StructuredTable::cell_count ( ) const

Definition at line 98 of file tablerecog.cpp.

98  {
99  return row_count() * column_count();
100 }

◆ ClearStructure()

void tesseract::StructuredTable::ClearStructure ( )
protected

Definition at line 314 of file tablerecog.cpp.

314  {
315  cell_x_.clear();
316  cell_y_.clear();
317  is_lined_ = false;
318  space_above_ = 0;
319  space_below_ = 0;
320  space_left_ = 0;
321  space_right_ = 0;
323  median_cell_width_ = 0;
324 }

◆ column_count()

unsigned tesseract::StructuredTable::column_count ( ) const

Definition at line 95 of file tablerecog.cpp.

95  {
96  return cell_x_.empty() ? 0 : cell_x_.size() - 1;
97 }

◆ column_width()

int tesseract::StructuredTable::column_width ( unsigned  column) const

Definition at line 117 of file tablerecog.cpp.

117  {
118  ASSERT_HOST(column < column_count());
119  return cell_x_[column + 1] - cell_x_[column];
120 }

◆ CountFilledCells() [1/2]

int tesseract::StructuredTable::CountFilledCells ( )

Definition at line 228 of file tablerecog.cpp.

228  {
229  return CountFilledCells(0, row_count() - 1, 0, column_count() - 1);
230 }

◆ CountFilledCells() [2/2]

int tesseract::StructuredTable::CountFilledCells ( unsigned  row_start,
unsigned  row_end,
unsigned  column_start,
unsigned  column_end 
)

Definition at line 237 of file tablerecog.cpp.

238  {
239  ASSERT_HOST(row_start <= row_end && row_end < row_count());
240  ASSERT_HOST(column_start <= column_end && column_end < column_count());
241  int cell_count = 0;
242  TBOX cell_box;
243  for (unsigned row = row_start; row <= row_end; ++row) {
244  cell_box.set_bottom(cell_y_[row]);
245  cell_box.set_top(cell_y_[row + 1]);
246  for (unsigned col = column_start; col <= column_end; ++col) {
247  cell_box.set_left(cell_x_[col]);
248  cell_box.set_right(cell_x_[col + 1]);
249  if (CountPartitions(cell_box) > 0) {
250  ++cell_count;
251  }
252  }
253  }
254  return cell_count;
255 }
unsigned cell_count() const
Definition: tablerecog.cpp:98

◆ CountFilledCellsInColumn()

int tesseract::StructuredTable::CountFilledCellsInColumn ( int  column)

Definition at line 234 of file tablerecog.cpp.

234  {
235  return CountFilledCells(0, row_count() - 1, column, column);
236 }

◆ CountFilledCellsInRow()

int tesseract::StructuredTable::CountFilledCellsInRow ( int  row)

Definition at line 231 of file tablerecog.cpp.

231  {
232  return CountFilledCells(row, row, 0, column_count() - 1);
233 }

◆ CountHorizontalIntersections()

int tesseract::StructuredTable::CountHorizontalIntersections ( int  y)
protected

Definition at line 677 of file tablerecog.cpp.

677  {
678  int count = 0;
679  // Make a small box to keep the search time down.
680  const int kGridSize = text_grid_->gridsize();
681  TBOX horizontal_box = bounding_box_;
682  horizontal_box.set_bottom(y - kGridSize);
683  horizontal_box.set_top(y + kGridSize);
684 
686  gsearch.SetUniqueMode(true);
687  gsearch.StartRectSearch(horizontal_box);
688  ColPartition *text = nullptr;
689  while ((text = gsearch.NextRectSearch()) != nullptr) {
690  if (!text->IsTextType()) {
691  continue;
692  }
693 
694  const TBOX &box = text->bounding_box();
695  if (box.bottom() < y && y < box.top()) {
696  ++count;
697  }
698  }
699  return count;
700 }
int gridsize() const
Definition: bbgrid.h:63

◆ CountPartitions()

int tesseract::StructuredTable::CountPartitions ( const TBOX box)
protected

Definition at line 705 of file tablerecog.cpp.

705  {
707  gsearch.SetUniqueMode(true);
708  gsearch.StartRectSearch(box);
709  int count = 0;
710  ColPartition *text = nullptr;
711  while ((text = gsearch.NextRectSearch()) != nullptr) {
712  if (text->IsTextType()) {
713  ++count;
714  }
715  }
716  return count;
717 }

◆ CountVerticalIntersections()

int tesseract::StructuredTable::CountVerticalIntersections ( int  x)
protected

Definition at line 651 of file tablerecog.cpp.

651  {
652  int count = 0;
653  // Make a small box to keep the search time down.
654  const int kGridSize = text_grid_->gridsize();
655  TBOX vertical_box = bounding_box_;
656  vertical_box.set_left(x - kGridSize);
657  vertical_box.set_right(x + kGridSize);
658 
660  gsearch.SetUniqueMode(true);
661  gsearch.StartRectSearch(vertical_box);
662  ColPartition *text = nullptr;
663  while ((text = gsearch.NextRectSearch()) != nullptr) {
664  if (!text->IsTextType()) {
665  continue;
666  }
667  const TBOX &box = text->bounding_box();
668  if (box.left() < x && x < box.right()) {
669  ++count;
670  }
671  }
672  return count;
673 }
void set_left(int x)
Definition: rect.h:85

◆ Display()

void tesseract::StructuredTable::Display ( ScrollView window,
ScrollView::Color  color 
)

Definition at line 297 of file tablerecog.cpp.

297  {
298  window->Brush(ScrollView::NONE);
299  window->Pen(color);
300  window->Rectangle(bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
301  bounding_box_.top());
302  for (int i : cell_x_) {
303  window->Line(i, bounding_box_.bottom(), i, bounding_box_.top());
304  }
305  for (int i : cell_y_) {
306  window->Line(bounding_box_.left(), i, bounding_box_.right(), i);
307  }
308  window->UpdateWindow();
309 }

◆ DoesPartitionFit()

bool tesseract::StructuredTable::DoesPartitionFit ( const ColPartition part) const

Definition at line 212 of file tablerecog.cpp.

212  {
213  const TBOX &box = part.bounding_box();
214  for (int i : cell_x_) {
215  if (box.left() < i && i < box.right()) {
216  return false;
217  }
218  }
219  for (int i : cell_y_) {
220  if (box.bottom() < i && i < box.top()) {
221  return false;
222  }
223  }
224  return true;
225 }

◆ FindCellSplitLocations()

void tesseract::StructuredTable::FindCellSplitLocations ( const std::vector< int > &  min_list,
const std::vector< int > &  max_list,
int  max_merged,
std::vector< int > *  locations 
)
staticprotected

Definition at line 608 of file tablerecog.cpp.

610  {
611  locations->clear();
612  ASSERT_HOST(min_list.size() == max_list.size());
613  if (min_list.empty()) {
614  return;
615  }
616  ASSERT_HOST(min_list.at(0) < max_list.at(0));
617  ASSERT_HOST(min_list.at(min_list.size() - 1) < max_list.at(max_list.size() - 1));
618 
619  locations->push_back(min_list.at(0));
620  unsigned min_index = 0;
621  unsigned max_index = 0;
622  int stacked_partitions = 0;
623  int last_cross_position = INT32_MAX;
624  // max_index will expire after min_index.
625  // However, we can't "increase" the hill size if min_index expired.
626  // So finish processing when min_index expires.
627  while (min_index < min_list.size()) {
628  // Increase the hill count.
629  if (min_list[min_index] < max_list[max_index]) {
630  ++stacked_partitions;
631  if (last_cross_position != INT32_MAX && stacked_partitions > max_merged) {
632  int mid = (last_cross_position + min_list[min_index]) / 2;
633  locations->push_back(mid);
634  last_cross_position = INT32_MAX;
635  }
636  ++min_index;
637  } else {
638  // Decrease the hill count.
639  --stacked_partitions;
640  if (last_cross_position == INT32_MAX && stacked_partitions <= max_merged) {
641  last_cross_position = max_list[max_index];
642  }
643  ++max_index;
644  }
645  }
646  locations->push_back(max_list.at(max_list.size() - 1));
647 }

◆ FindHorizontalMargin()

int tesseract::StructuredTable::FindHorizontalMargin ( ColPartitionGrid grid,
int  start_y,
bool  decrease 
) const
protected

Definition at line 511 of file tablerecog.cpp.

511  {
512  ColPartitionGridSearch gsearch(grid);
513  gsearch.SetUniqueMode(true);
514  gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top());
515  ColPartition *part = nullptr;
516  while ((part = gsearch.NextSideSearch(decrease)) != nullptr) {
517  if (!part->IsTextType() && !part->IsVerticalLine()) {
518  continue;
519  }
520  int distance =
521  decrease ? border - part->bounding_box().right() : part->bounding_box().left() - border;
522  if (distance >= 0) {
523  return distance;
524  }
525  }
526  return INT32_MAX;
527 }
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44

◆ FindLinedStructure()

bool tesseract::StructuredTable::FindLinedStructure ( )

Definition at line 135 of file tablerecog.cpp.

135  {
136  ClearStructure();
137 
138  // Search for all of the lines in the current box.
139  // Update the cellular structure with the exact lines.
141  box_search.SetUniqueMode(true);
142  box_search.StartRectSearch(bounding_box_);
143  ColPartition *line = nullptr;
144 
145  while ((line = box_search.NextRectSearch()) != nullptr) {
146  if (line->IsHorizontalLine()) {
147  cell_y_.push_back(line->MidY());
148  }
149  if (line->IsVerticalLine()) {
150  cell_x_.push_back(line->MidX());
151  }
152  }
153 
154  // HasSignificantLines should guarantee cells.
155  // Because that code is a different class, just gracefully
156  // return false. This could be an assert.
157  if (cell_x_.size() < 3 || cell_y_.size() < 3) {
158  return false;
159  }
160 
161  // Sort and remove duplicates that may have occurred due to split lines.
162  std::sort(cell_x_.begin(), cell_x_.end());
163  auto last_x = std::unique(cell_x_.begin(), cell_x_.end());
164  cell_x_.erase(last_x, cell_x_.end());
165  std::sort(cell_y_.begin(), cell_y_.end());
166  auto last_y = std::unique(cell_y_.begin(), cell_y_.end());
167  cell_y_.erase(last_y, cell_y_.end());
168 
169  // The border should be the extents of line boxes, not middle.
170  cell_x_[0] = bounding_box_.left();
171  cell_x_[cell_x_.size() - 1] = bounding_box_.right();
173  cell_y_[cell_y_.size() - 1] = bounding_box_.top();
174 
175  // Remove duplicates that may have occurred due to moving the borders.
176  last_x = std::unique(cell_x_.begin(), cell_x_.end());
177  cell_x_.erase(last_x, cell_x_.end());
178  last_y = std::unique(cell_y_.begin(), cell_y_.end());
179  cell_y_.erase(last_y, cell_y_.end());
180 
182  CalculateStats();
184  return is_lined_;
185 }

◆ FindVerticalMargin()

int tesseract::StructuredTable::FindVerticalMargin ( ColPartitionGrid grid,
int  start_x,
bool  decrease 
) const
protected

Definition at line 494 of file tablerecog.cpp.

494  {
495  ColPartitionGridSearch gsearch(grid);
496  gsearch.SetUniqueMode(true);
497  gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), border);
498  ColPartition *part = nullptr;
499  while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) {
500  if (!part->IsTextType() && !part->IsHorizontalLine()) {
501  continue;
502  }
503  int distance =
504  decrease ? border - part->bounding_box().top() : part->bounding_box().bottom() - border;
505  if (distance >= 0) {
506  return distance;
507  }
508  }
509  return INT32_MAX;
510 }

◆ FindWhitespacedColumns()

void tesseract::StructuredTable::FindWhitespacedColumns ( )
protected

Definition at line 362 of file tablerecog.cpp.

362  {
363  // Set of the extents of all partitions on the page.
364  std::vector<int> left_sides;
365  std::vector<int> right_sides;
366 
367  // Look at each text partition. We want to find the partitions
368  // that have extremal left/right sides. These will give us a basis
369  // for the table columns.
371  gsearch.SetUniqueMode(true);
372  gsearch.StartRectSearch(bounding_box_);
373  ColPartition *text = nullptr;
374  while ((text = gsearch.NextRectSearch()) != nullptr) {
375  if (!text->IsTextType()) {
376  continue;
377  }
378 
379  ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right());
380  int spacing = static_cast<int>(text->median_width() * kHorizontalSpacing / 2.0 + 0.5);
381  left_sides.push_back(text->bounding_box().left() - spacing);
382  right_sides.push_back(text->bounding_box().right() + spacing);
383  }
384  // It causes disaster below, so avoid it!
385  if (left_sides.empty() || right_sides.empty()) {
386  return;
387  }
388 
389  // Since data may be inserted in grid order, we sort the left/right sides.
390  std::sort(left_sides.begin(), left_sides.end());
391  std::sort(right_sides.begin(), right_sides.end());
392 
393  // At this point, in the "merged list", we expect to have a left side,
394  // followed by either more left sides or a right side. The last number
395  // should be a right side. We find places where the splits occur by looking
396  // for "valleys". If we want to force gap sizes or allow overlap, change
397  // the spacing above. If you want to let lines "slice" partitions as long
398  // as it is infrequent, change the following function.
399  FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, &cell_x_);
400 }
const double kHorizontalSpacing
Definition: tablerecog.cpp:34
const int kCellSplitColumnThreshold
Definition: tablerecog.cpp:41
static void FindCellSplitLocations(const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
Definition: tablerecog.cpp:608

◆ FindWhitespacedRows()

void tesseract::StructuredTable::FindWhitespacedRows ( )
protected

Definition at line 407 of file tablerecog.cpp.

407  {
408  // Set of the extents of all partitions on the page.
409  std::vector<int> bottom_sides;
410  std::vector<int> top_sides;
411  // We will be "shrinking" partitions, so keep the min/max around to
412  // make sure the bottom/top lines do not intersect text.
413  int min_bottom = INT32_MAX;
414  int max_top = INT32_MIN;
415 
416  // Look at each text partition. We want to find the partitions
417  // that have extremal bottom/top sides. These will give us a basis
418  // for the table rows. Because the textlines can be skewed and close due
419  // to warping, the height of the partitions is toned down a little bit.
421  gsearch.SetUniqueMode(true);
422  gsearch.StartRectSearch(bounding_box_);
423  ColPartition *text = nullptr;
424  while ((text = gsearch.NextRectSearch()) != nullptr) {
425  if (!text->IsTextType()) {
426  continue;
427  }
428 
429  ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top());
430  min_bottom = std::min(min_bottom, static_cast<int>(text->bounding_box().bottom()));
431  max_top = std::max(max_top, static_cast<int>(text->bounding_box().top()));
432 
433  // Ignore "tall" text partitions, as these are usually false positive
434  // vertical text or multiple lines pulled together.
435  if (text->bounding_box().height() > max_text_height_) {
436  continue;
437  }
438 
439  int spacing = static_cast<int>(text->bounding_box().height() * kVerticalSpacing / 2.0 + 0.5);
440  int bottom = text->bounding_box().bottom() - spacing;
441  int top = text->bounding_box().top() + spacing;
442  // For horizontal text, the factor can be negative. This should
443  // probably cause a warning or failure. I haven't actually checked if
444  // it happens.
445  if (bottom >= top) {
446  continue;
447  }
448 
449  bottom_sides.push_back(bottom);
450  top_sides.push_back(top);
451  }
452  // It causes disaster below, so avoid it!
453  if (bottom_sides.empty() || top_sides.empty()) {
454  return;
455  }
456 
457  // Since data may be inserted in grid order, we sort the bottom/top sides.
458  std::sort(bottom_sides.begin(), bottom_sides.end());
459  std::sort(top_sides.begin(), top_sides.end());
460 
461  // At this point, in the "merged list", we expect to have a bottom side,
462  // followed by either more bottom sides or a top side. The last number
463  // should be a top side. We find places where the splits occur by looking
464  // for "valleys". If we want to force gap sizes or allow overlap, change
465  // the spacing above. If you want to let lines "slice" partitions as long
466  // as it is infrequent, change the following function.
467  FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold, &cell_y_);
468 
469  // Recover the min/max correctly since it was shifted.
470  cell_y_[0] = min_bottom;
471  cell_y_[cell_y_.size() - 1] = max_top;
472 }
const double kVerticalSpacing
Definition: tablerecog.cpp:37
const int kCellSplitRowThreshold
Definition: tablerecog.cpp:40

◆ FindWhitespacedStructure()

bool tesseract::StructuredTable::FindWhitespacedStructure ( )

Definition at line 188 of file tablerecog.cpp.

188  {
189  ClearStructure();
192 
193  if (!VerifyWhitespacedTable()) {
194  return false;
195  } else {
197  bounding_box_.set_right(cell_x_[cell_x_.size() - 1]);
199  bounding_box_.set_top(cell_y_[cell_y_.size() - 1]);
202  CalculateStats();
203  return true;
204  }
205 }
void set_right(int x)
Definition: rect.h:92

◆ Init()

void tesseract::StructuredTable::Init ( )

Definition at line 78 of file tablerecog.cpp.

78 {}

◆ is_lined()

bool tesseract::StructuredTable::is_lined ( ) const

Definition at line 89 of file tablerecog.cpp.

89  {
90  return is_lined_;
91 }

◆ median_cell_height()

int tesseract::StructuredTable::median_cell_height ( )

Definition at line 107 of file tablerecog.cpp.

107  {
108  return median_cell_height_;
109 }

◆ median_cell_width()

int tesseract::StructuredTable::median_cell_width ( )

Definition at line 110 of file tablerecog.cpp.

110  {
111  return median_cell_width_;
112 }

◆ row_count()

unsigned tesseract::StructuredTable::row_count ( ) const

Definition at line 92 of file tablerecog.cpp.

92  {
93  return cell_y_.empty() ? 0 : cell_y_.size() - 1;
94 }

◆ row_height()

int tesseract::StructuredTable::row_height ( unsigned  row) const

Definition at line 113 of file tablerecog.cpp.

113  {
114  ASSERT_HOST(row < row_count());
115  return cell_y_[row + 1] - cell_y_[row];
116 }

◆ set_bounding_box()

void tesseract::StructuredTable::set_bounding_box ( const TBOX box)

Definition at line 101 of file tablerecog.cpp.

101  {
102  bounding_box_ = box;
103 }

◆ set_line_grid()

void tesseract::StructuredTable::set_line_grid ( ColPartitionGrid lines)

Definition at line 83 of file tablerecog.cpp.

83  {
84  line_grid_ = line_grid;
85 }

◆ set_max_text_height()

void tesseract::StructuredTable::set_max_text_height ( int  height)

Definition at line 86 of file tablerecog.cpp.

86  {
87  max_text_height_ = height;
88 }

◆ set_text_grid()

void tesseract::StructuredTable::set_text_grid ( ColPartitionGrid text)

Definition at line 80 of file tablerecog.cpp.

80  {
81  text_grid_ = text_grid;
82 }

◆ space_above()

int tesseract::StructuredTable::space_above ( ) const

Definition at line 121 of file tablerecog.cpp.

121  {
122  return space_above_;
123 }

◆ space_below()

int tesseract::StructuredTable::space_below ( ) const

Definition at line 124 of file tablerecog.cpp.

124  {
125  return space_below_;
126 }

◆ UpdateMargins()

void tesseract::StructuredTable::UpdateMargins ( ColPartitionGrid grid)
protected

Definition at line 484 of file tablerecog.cpp.

484  {
485  int below = FindVerticalMargin(grid, bounding_box_.bottom(), true);
486  space_below_ = std::min(space_below_, below);
487  int above = FindVerticalMargin(grid, bounding_box_.top(), false);
488  space_above_ = std::min(space_above_, above);
489  int left = FindHorizontalMargin(grid, bounding_box_.left(), true);
490  space_left_ = std::min(space_left_, left);
491  int right = FindHorizontalMargin(grid, bounding_box_.right(), false);
492  space_right_ = std::min(space_right_, right);
493 }
int FindVerticalMargin(ColPartitionGrid *grid, int start_x, bool decrease) const
Definition: tablerecog.cpp:494
int FindHorizontalMargin(ColPartitionGrid *grid, int start_y, bool decrease) const
Definition: tablerecog.cpp:511

◆ VerifyLinedTableCells()

bool tesseract::StructuredTable::VerifyLinedTableCells ( )
protected

Definition at line 328 of file tablerecog.cpp.

328  {
329  // Function only called when lines exist.
330  ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2);
331  for (int i : cell_y_) {
332  if (CountHorizontalIntersections(i) > 0) {
333  return false;
334  }
335  }
336  for (int i : cell_x_) {
337  if (CountVerticalIntersections(i) > 0) {
338  return false;
339  }
340  }
341  return true;
342 }
int CountHorizontalIntersections(int y)
Definition: tablerecog.cpp:677
int CountVerticalIntersections(int x)
Definition: tablerecog.cpp:651

◆ VerifyRowFilled()

bool tesseract::StructuredTable::VerifyRowFilled ( int  row)

Definition at line 260 of file tablerecog.cpp.

260  {
261  for (unsigned i = 0; i < column_count(); ++i) {
262  auto area_filled = CalculateCellFilledPercentage(row, i);
263  if (area_filled >= kMinFilledArea) {
264  return true;
265  }
266  }
267  return false;
268 }
const double kMinFilledArea
Definition: tablerecog.cpp:60
double CalculateCellFilledPercentage(unsigned row, unsigned column)
Definition: tablerecog.cpp:272

◆ VerifyWhitespacedTable()

bool tesseract::StructuredTable::VerifyWhitespacedTable ( )
protected

Definition at line 352 of file tablerecog.cpp.

352  {
353  // criteria for a table, must be at least 2x3 or 3x2
354  return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6;
355 }

Member Data Documentation

◆ bounding_box_

TBOX tesseract::StructuredTable::bounding_box_
protected

Definition at line 236 of file tablerecog.h.

◆ cell_x_

std::vector<int> tesseract::StructuredTable::cell_x_
protected

Definition at line 237 of file tablerecog.h.

◆ cell_y_

std::vector<int> tesseract::StructuredTable::cell_y_
protected

Definition at line 238 of file tablerecog.h.

◆ is_lined_

bool tesseract::StructuredTable::is_lined_
protected

Definition at line 239 of file tablerecog.h.

◆ line_grid_

ColPartitionGrid* tesseract::StructuredTable::line_grid_
protected

Definition at line 232 of file tablerecog.h.

◆ max_text_height_

int tesseract::StructuredTable::max_text_height_
protected

Definition at line 248 of file tablerecog.h.

◆ median_cell_height_

int tesseract::StructuredTable::median_cell_height_
protected

Definition at line 245 of file tablerecog.h.

◆ median_cell_width_

int tesseract::StructuredTable::median_cell_width_
protected

Definition at line 246 of file tablerecog.h.

◆ space_above_

int tesseract::StructuredTable::space_above_
protected

Definition at line 241 of file tablerecog.h.

◆ space_below_

int tesseract::StructuredTable::space_below_
protected

Definition at line 242 of file tablerecog.h.

◆ space_left_

int tesseract::StructuredTable::space_left_
protected

Definition at line 243 of file tablerecog.h.

◆ space_right_

int tesseract::StructuredTable::space_right_
protected

Definition at line 244 of file tablerecog.h.

◆ text_grid_

ColPartitionGrid* tesseract::StructuredTable::text_grid_
protected

Definition at line 231 of file tablerecog.h.


The documentation for this class was generated from the following files: