tesseract  5.0.0
tablefind.h
Go to the documentation of this file.
1 // File: tablefind.h
3 // Description: Helper classes to find tables from ColPartitions.
4 // Author: Faisal Shafait (faisal.shafait@dfki.de)
5 //
6 // (C) Copyright 2009, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_TEXTORD_TABLEFIND_H_
20 #define TESSERACT_TEXTORD_TABLEFIND_H_
21 
22 #include "colpartitiongrid.h"
23 #include "elst.h"
24 #include "rect.h"
25 
26 namespace tesseract {
27 
28 // Possible types for a column segment.
30 
31 class ColPartitionSet;
32 
33 // ColSegment holds rectangular blocks that represent segmentation of a page
34 // into regions containing single column text/table.
35 class ColSegment;
36 ELISTIZEH(ColSegment)
37 CLISTIZEH(ColSegment)
38 
39 class ColSegment : public ELIST_LINK {
40 public:
41  ColSegment();
42  ~ColSegment() = default;
43 
44  // Simple accessors and mutators
45  const TBOX &bounding_box() const {
46  return bounding_box_;
47  }
48 
49  void set_top(int y) {
50  bounding_box_.set_top(y);
51  }
52 
53  void set_bottom(int y) {
54  bounding_box_.set_bottom(y);
55  }
56 
57  void set_left(int x) {
58  bounding_box_.set_left(x);
59  }
60 
61  void set_right(int x) {
62  bounding_box_.set_right(x);
63  }
64 
65  void set_bounding_box(const TBOX &other) {
66  bounding_box_ = other;
67  }
68 
69  int get_num_table_cells() const {
70  return num_table_cells_;
71  }
72 
73  // set the number of table colpartitions covered by the bounding_box_
74  void set_num_table_cells(int n) {
75  num_table_cells_ = n;
76  }
77 
78  int get_num_text_cells() const {
79  return num_text_cells_;
80  }
81 
82  // set the number of text colpartitions covered by the bounding_box_
83  void set_num_text_cells(int n) {
84  num_text_cells_ = n;
85  }
86 
87  ColSegType type() const {
88  return type_;
89  }
90 
91  // set the type of the block based on the ratio of table to text
92  // colpartitions covered by it.
93  void set_type();
94 
95  // Provides a color for BBGrid to draw the rectangle.
96  ScrollView::Color BoxColor() const;
97 
98  // Insert a rectangle into bounding_box_
99  void InsertBox(const TBOX &other);
100 
101 private:
102  TBOX bounding_box_; // bounding box
103  int num_table_cells_;
104  int num_text_cells_;
105  ColSegType type_;
106 };
107 
108 // Typedef BBGrid of ColSegments
112 
113 // TableFinder is a utility class to find a set of tables given a set of
114 // ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
115 // based on research in "Table Detection in Heterogeneous Documents".
116 // Usage flow is as follows:
117 // TableFinder finder;
118 // finder.InsertCleanPartitions(/* grid info */)
119 // finder.LocateTables(/* ColPartitions and Columns */);
120 // finder.Update TODO(nbeato)
122 public:
123  // Constructor is simple initializations
124  TableFinder();
125  ~TableFinder();
126 
127  // Set the resolution of the connected components in ppi.
128  void set_resolution(int resolution) {
129  resolution_ = resolution;
130  }
131  // Change the reading order. Initially it is left to right.
132  void set_left_to_right_language(bool order);
133 
134  // Initialize
135  void Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right);
136 
137  // Copy cleaned partitions from ColumnFinder's part_grid_ to this
138  // clean_part_grid_ and insert dot-like noise into period_grid_.
139  // It resizes the grids in this object to the dimensions of grid.
140  void InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block);
141 
142  // High level function to perform table detection
143  // Finds tables and updates the grid object with new partitions for the
144  // tables. The columns and width callbacks are used to merge tables.
145  // The reskew argument is only used to write the tables to the out.png
146  // if that feature is enabled.
147  void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns,
148  WidthCallback width_cb, const FCOORD &reskew);
149 
150 protected:
151  // Access for the grid dimensions.
152  // The results will not be correct until InsertCleanPartitions
153  // has been called. The values are taken from the grid passed as an argument
154  // to that function.
155  int gridsize() const;
156  int gridwidth() const;
157  int gridheight() const;
158  const ICOORD &bleft() const;
159  const ICOORD &tright() const;
160 
161  // Makes a window for debugging, see BBGrid
162  ScrollView *MakeWindow(int x, int y, const char *window_name);
163 
166  // Inserts text into the table finder.
167  void InsertTextPartition(ColPartition *part);
168  void InsertFragmentedTextPartition(ColPartition *part);
169  void InsertLeaderPartition(ColPartition *part);
170  void InsertRulingPartition(ColPartition *part);
171  void InsertImagePartition(ColPartition *part);
172  void SplitAndInsertFragmentedTextPartition(ColPartition *part);
173  bool AllowTextPartition(const ColPartition &part) const;
174  bool AllowBlob(const BLOBNBOX &blob) const;
175 
179 
180  // Utility function to move segments to col_seg_grid
181  // Note: Move includes ownership,
182  // so segments will be be owned by col_seg_grid
183  void MoveColSegmentsToGrid(ColSegment_LIST *segments,
184  ColSegmentGrid *col_seg_grid);
185 
189 
190  // Initialize the grid and partitions
191  void InitializePartitions(ColPartitionSet **all_columns);
192 
193  // Set left, right and top, bottom spacings of each colpartition.
194  // Left/right spacings are w.r.t the column boundaries
195  // Top/bottom spacings are w.r.t. previous and next colpartitions
196  static void SetPartitionSpacings(ColPartitionGrid *grid,
197  ColPartitionSet **all_columns);
198 
199  // Set spacing and closest neighbors above and below a given colpartition.
200  void SetVerticalSpacing(ColPartition *part);
201 
202  // Set global spacing estimates. This function is dependent on the
203  // partition spacings. So make sure SetPartitionSpacings is called
204  // on the same grid before this.
205  void SetGlobalSpacings(ColPartitionGrid *grid);
206  // Access to the global median xheight. The xheight is the height
207  // of a lowercase 'x' character on the page. This can be viewed as the
208  // average height of a lowercase letter in a textline. As a result
209  // it is used to make assumptions about spacing between words and
210  // table cells.
211  void set_global_median_xheight(int xheight);
212  // Access to the global median blob width. The width is useful
213  // when deciding if a partition is noise.
214  void set_global_median_blob_width(int width);
215  // Access to the global median ledding. The ledding is the distance between
216  // two adjacent text lines. This value can be used to get a rough estimate
217  // for the amount of space between two lines of text. As a result, it
218  // is used to calculate appropriate spacing between adjacent rows of text.
219  void set_global_median_ledding(int ledding);
220 
221  // Updates the nearest neighbors for each ColPartition in clean_part_grid_.
222  // The neighbors are most likely SingletonPartner calls after the neighbors
223  // are assigned. This is hear until it is decided to remove the
224  // nearest_neighbor code in ColPartition
225  void FindNeighbors();
226 
231 
232  // High level function to mark partitions as table rows/cells.
233  // When this function is done, the column partitions in clean_part_grid_
234  // should mostly be marked as tables.
235  void MarkTablePartitions();
236  // Marks partitions given a local view of a single partition
237  void MarkPartitionsUsingLocalInformation();
239  // Check if the partition has at least one large gap between words or no
240  // significant gap at all
241  // TODO(nbeato): Make const, prevented because blobnbox array access
242  bool HasWideOrNoInterWordGap(ColPartition *part) const;
243  // Checks if a partition is adjacent to leaders on the page
244  bool HasLeaderAdjacent(const ColPartition &part);
245  // Filter individual text partitions marked as table partitions
246  // consisting of paragraph endings, small section headings, and
247  // headers and footers.
248  void FilterFalseAlarms();
249  void FilterParagraphEndings();
250  void FilterHeaderAndFooter();
251  // Mark all ColPartitions as table cells that have a table cell above
252  // and below them
253  void SmoothTablePartitionRuns();
254 
267 
268  // Get Column segments from best_columns_
269  void GetColumnBlocks(ColPartitionSet **columns,
270  ColSegment_LIST *col_segments);
271 
272  // Group Column segments into consecutive single column regions.
273  void GroupColumnBlocks(ColSegment_LIST *current_segments,
274  ColSegment_LIST *col_segments);
275 
276  // Check if two boxes are consecutive within the same column
277  bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
278 
279  // Set the ratio of candidate table partitions in each column
280  void SetColumnsType(ColSegment_LIST *col_segments);
281 
282  // Merge Column Blocks that were split due to the presence of a table
283  void GridMergeColumnBlocks();
284 
290 
291  // Merge partititons cells into table columns
292  // Differs from paper by just looking at marked table partitions
293  // instead of similarity metric.
294  // Modified section 4.1 of paper.
295  void GetTableColumns(ColSegment_LIST *table_columns);
296 
297  // Finds regions within a column that potentially contain a table.
298  // Ie, the table columns from GetTableColumns are turned into boxes
299  // that span the entire page column (using ColumnBlocks found in
300  // earlier functions) in the x direction and the min/max extent of
301  // overlapping table columns in the y direction.
302  // Section 4.2 of paper.
303  void GetTableRegions(ColSegment_LIST *table_columns,
304  ColSegment_LIST *table_regions);
305 
308 
309  // Merge table regions corresponding to tables spanning multiple columns
310  void GridMergeTableRegions();
311  bool BelongToOneTable(const TBOX &box1, const TBOX &box2);
312 
313  // Adjust table boundaries by building a tight bounding box around all
314  // ColPartitions contained in it.
315  void AdjustTableBoundaries();
316 
317  // Grows a table to include partitions that are partially covered
318  // by the table. This includes lines and text. It does not include
319  // noise or images.
320  // On entry, result_box is the minimum size of the result. The results of the
321  // function will union the actual result with result_box.
322  void GrowTableBox(const TBOX &table_box, TBOX *result_box);
323  // Grow a table by increasing the size of the box to include
324  // partitions with significant overlap with the table.
325  void GrowTableToIncludePartials(const TBOX &table_box,
326  const TBOX &search_range, TBOX *result_box);
327  // Grow a table by expanding to the extents of significantly
328  // overlapping lines.
329  void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range,
330  TBOX *result_box);
331  // Checks whether the horizontal line belong to the table by looking at the
332  // side spacing of extra ColParitions that will be included in the table
333  // due to expansion
334  bool HLineBelongsToTable(const ColPartition &part, const TBOX &table_box);
335 
336  // Look for isolated column headers above the given table box and
337  // include them in the table
338  void IncludeLeftOutColumnHeaders(TBOX *table_box);
339 
340  // Remove false alarms consisting of a single column
341  void DeleteSingleColumnTables();
342 
343  // Return true if at least one gap larger than the global x-height
344  // exists in the horizontal projection
345  bool GapInXProjection(int *xprojection, int length);
346 
349  // This function will run the table recognizer and try to find better
350  // bounding boxes. The structures of the tables never leave this function
351  // right now. It just tries to prune and merge tables based on info it
352  // has available.
353  void RecognizeTables();
354 
358 
359  // Displays Colpartitions marked as table row. Overlays them on top of
360  // part_grid_.
361  void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols,
362  ScrollView::Color color);
363 
364  // Displays the colpartitions using a new coloring on an existing window.
365  // Note: This method is only for debug purpose during development and
366  // would not be part of checked in code
367  void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
368  ScrollView::Color text_color,
369  ScrollView::Color table_color);
370  void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
371  ScrollView::Color default_color);
372  void DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid,
373  ScrollView::Color default_color);
374 
375  // Merge all colpartitions in table regions to make them a single
376  // colpartition and revert types of isolated table cells not
377  // assigned to any table to their original types.
378  void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns,
379  const WidthCallback &width_cb);
380 
382  // Useful objects used during table find process.
384  // Resolution of the connected components in ppi.
386  // Estimate of median x-height over the page
388  // Estimate of the median blob width on the page
390  // Estimate of median leading on the page
392  // Grid to hold cleaned colpartitions after removing all
393  // colpartitions that consist of only noise blobs, and removing
394  // noise blobs from remaining colpartitions.
396  // Grid contains the leaders and ruling lines.
398  // Grid contains the broken down column partitions. It can be thought
399  // of as a "word" grid. However, it usually doesn't break apart text lines.
400  // It does break apart table data (most of the time).
402  // Grid of page column blocks
404  // Grid of detected tables
406  // The reading order of text. Defaults to true, for languages such as English.
408 };
409 
410 } // namespace tesseract.
411 
412 #endif // TESSERACT_TEXTORD_TABLEFIND_H_
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:705
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:803
@ COL_TEXT
Definition: tablefind.h:29
@ COL_MIXED
Definition: tablefind.h:29
@ COL_TABLE
Definition: tablefind.h:29
@ COL_UNKNOWN
Definition: tablefind.h:29
@ COL_COUNT
Definition: tablefind.h:29
std::function< bool(int)> WidthCallback
Definition: tabfind.h:35
integer coordinate
Definition: points.h:36
void set_bounding_box(const TBOX &other)
Definition: tablefind.h:65
int get_num_table_cells() const
Definition: tablefind.h:69
void set_num_table_cells(int n)
Definition: tablefind.h:74
void set_num_text_cells(int n)
Definition: tablefind.h:83
ColSegType type() const
Definition: tablefind.h:87
void set_top(int y)
Definition: tablefind.h:49
void set_right(int x)
Definition: tablefind.h:61
int get_num_text_cells() const
Definition: tablefind.h:78
const TBOX & bounding_box() const
Definition: tablefind.h:45
void set_left(int x)
Definition: tablefind.h:57
void set_bottom(int y)
Definition: tablefind.h:53
void set_resolution(int resolution)
Definition: tablefind.h:128
ColPartitionGrid leader_and_ruling_grid_
Definition: tablefind.h:397
ColSegmentGrid col_seg_grid_
Definition: tablefind.h:403
ColSegmentGrid table_grid_
Definition: tablefind.h:405
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:401
ColPartitionGrid clean_part_grid_
Definition: tablefind.h:395
#define TESS_API
Definition: export.h:34