tesseract  5.0.0
tablerecog_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 
14 #include "colpartition.h"
15 #include "colpartitiongrid.h"
16 #include "tablerecog.h"
17 
18 #include "include_gunit.h"
19 
20 namespace tesseract {
21 
23 public:
29 };
30 
32 public:
39 
40  void InjectCellY(int y) {
41  cell_y_.push_back(y);
42  std::sort(cell_y_.begin(), cell_y_.end());
43  }
44  void InjectCellX(int x) {
45  cell_x_.push_back(x);
46  std::sort(cell_x_.begin(), cell_x_.end());
47  }
48 
49  void ExpectCellX(int x_min, int second, int add, int almost_done, int x_max) {
50  ASSERT_EQ(0, (almost_done - second) % add);
51  EXPECT_EQ(3 + (almost_done - second) / add, cell_x_.size());
52  EXPECT_EQ(x_min, cell_x_.at(0));
53  EXPECT_EQ(x_max, cell_x_.at(cell_x_.size() - 1));
54  for (unsigned i = 1; i < cell_x_.size() - 1; ++i) {
55  EXPECT_EQ(second + add * (i - 1), cell_x_.at(i));
56  }
57  }
58 
59  void ExpectSortedX() {
60  EXPECT_GT(cell_x_.size(), 0);
61  for (unsigned i = 1; i < cell_x_.size(); ++i) {
62  EXPECT_LT(cell_x_.at(i - 1), cell_x_.at(i));
63  }
64  }
65 };
66 
67 class SharedTest : public testing::Test {
68 protected:
69  void SetUp() override {
70  std::locale::global(std::locale(""));
71  ICOORD bleft(0, 0);
72  ICOORD tright(1000, 1000);
73  text_grid_ = std::make_unique<ColPartitionGrid>(5, bleft, tright);
74  line_grid_ = std::make_unique<ColPartitionGrid>(5, bleft, tright);
75  }
76 
77  void TearDown() override {
78  tesseract::ColPartition_IT memory(&allocated_parts_);
79  for (memory.mark_cycle_pt(); !memory.cycled_list(); memory.forward()) {
80  memory.data()->DeleteBoxes();
81  }
82  }
83 
85  for (int row = 0; row < 800; row += 20) {
86  for (int col = 0; col < 500; col += 25) {
87  InsertPartition(col + 1, row + 1, col + 24, row + 19);
88  }
89  }
90  }
91 
92  void InsertPartition(int left, int bottom, int right, int top) {
93  TBOX box(left, bottom, right, top);
95  part->set_median_width(3);
96  part->set_median_height(3);
97  text_grid_->InsertBBox(true, true, part);
98 
99  tesseract::ColPartition_IT add_it(&allocated_parts_);
100  add_it.add_after_stay_put(part);
101  }
102 
103  void InsertLines() {
104  line_box_.set_to_given_coords(100 - line_grid_->gridsize(), 10 - line_grid_->gridsize(),
105  450 + line_grid_->gridsize(), 50 + line_grid_->gridsize());
106  for (int i = 10; i <= 50; i += 10) {
107  InsertHorizontalLine(100, 450, i);
108  }
109  for (int i = 100; i <= 450; i += 50) {
110  InsertVerticalLine(i, 10, 50);
111  }
112 
113  for (int i = 100; i <= 200; i += 20) {
114  InsertHorizontalLine(0, 100, i);
115  }
116  }
117 
118  void InsertHorizontalLine(int left, int right, int y) {
119  TBOX box(left, y - line_grid_->gridsize(), right, y + line_grid_->gridsize());
121  line_grid_->InsertBBox(true, true, part);
122 
123  tesseract::ColPartition_IT add_it(&allocated_parts_);
124  add_it.add_after_stay_put(part);
125  }
126  void InsertVerticalLine(int x, int bottom, int top) {
127  TBOX box(x - line_grid_->gridsize(), bottom, x + line_grid_->gridsize(), top);
129  line_grid_->InsertBBox(true, true, part);
130 
131  tesseract::ColPartition_IT add_it(&allocated_parts_);
132  add_it.add_after_stay_put(part);
133  }
134 
136  for (int y = 10; y <= 50; y += 10) {
137  for (int x = 100; x <= 450; x += 50) {
138  InsertPartition(x + 1, y + 1, x + 49, y + 9);
139  }
140  }
141  }
142 
144  std::unique_ptr<ColPartitionGrid> text_grid_;
145  std::unique_ptr<ColPartitionGrid> line_grid_;
146  ColPartition_LIST allocated_parts_;
147 };
148 
150 protected:
151  void SetUp() override {
153  recognizer_ = std::make_unique<TestableTableRecognizer>();
154  recognizer_->Init();
155  recognizer_->set_text_grid(text_grid_.get());
156  recognizer_->set_line_grid(line_grid_.get());
157  }
158 
159  std::unique_ptr<TestableTableRecognizer> recognizer_;
160 };
161 
163 protected:
164  void SetUp() override {
166  table_ = std::make_unique<TestableStructuredTable>();
167  table_->Init();
168  table_->set_text_grid(text_grid_.get());
169  table_->set_line_grid(line_grid_.get());
170  }
171 
172  std::unique_ptr<TestableStructuredTable> table_;
173 };
174 
175 TEST_F(TableRecognizerTest, HasSignificantLinesBasicPass) {
176  InsertLines();
177  TBOX smaller_guess(120, 15, 370, 45);
178  TBOX larger_guess(90, 5, 490, 70);
179  EXPECT_TRUE(recognizer_->HasSignificantLines(line_box_));
180  EXPECT_TRUE(recognizer_->HasSignificantLines(larger_guess));
181  EXPECT_TRUE(recognizer_->HasSignificantLines(smaller_guess));
182 }
183 
184 TEST_F(TableRecognizerTest, HasSignificantLinesBasicFail) {
185  InsertLines();
186  TBOX box(370, 35, 500, 45);
187  EXPECT_FALSE(recognizer_->HasSignificantLines(box));
188 }
189 
190 TEST_F(TableRecognizerTest, HasSignificantLinesHorizontalOnlyFails) {
191  InsertLines();
192  TBOX box(0, 100, 200, 200);
193  EXPECT_FALSE(recognizer_->HasSignificantLines(box));
194 }
195 
196 TEST_F(TableRecognizerTest, FindLinesBoundingBoxBasic) {
197  InsertLines();
198  TBOX box(0, 0, 200, 50);
199  bool result = recognizer_->FindLinesBoundingBox(&box);
200  EXPECT_TRUE(result);
201  EXPECT_EQ(line_box_.left(), box.left());
202  EXPECT_EQ(line_box_.right(), box.right());
203  EXPECT_EQ(line_box_.bottom(), box.bottom());
204  EXPECT_EQ(line_box_.top(), box.top());
205 }
206 
207 TEST_F(TableRecognizerTest, RecognizeLinedTableBasic) {
208  InsertLines();
209  TBOX guess(120, 15, 370, 45);
211  table.set_text_grid(text_grid_.get());
212  table.set_line_grid(line_grid_.get());
213 
214  EXPECT_TRUE(recognizer_->RecognizeLinedTable(guess, &table));
215  EXPECT_EQ(line_box_.bottom(), table.bounding_box().bottom());
216  EXPECT_EQ(line_box_.top(), table.bounding_box().top());
217  EXPECT_EQ(line_box_.left(), table.bounding_box().left());
218  EXPECT_EQ(line_box_.right(), table.bounding_box().right());
219  EXPECT_EQ(line_box_.area(), table.bounding_box().area());
220  EXPECT_EQ(7, table.column_count());
221  EXPECT_EQ(4, table.row_count());
222  EXPECT_EQ(28, table.cell_count());
223  EXPECT_TRUE(table.is_lined());
224 }
225 
226 TEST_F(TableRecognizerTest, RecognizeWhitespacedTableBasic) {
227  InsertPartitions();
228  TBOX guess(0, 0, 500, 800);
229 
231  table.set_text_grid(text_grid_.get());
232  table.set_line_grid(line_grid_.get());
233  EXPECT_TRUE(recognizer_->RecognizeWhitespacedTable(guess, &table));
234  EXPECT_EQ(1, table.bounding_box().bottom());
235  EXPECT_EQ(799, table.bounding_box().top());
236  EXPECT_EQ(1, table.bounding_box().left());
237  EXPECT_EQ(499, table.bounding_box().right());
238  EXPECT_EQ(798 * 498, table.bounding_box().area());
239  EXPECT_EQ(500 / 25, table.column_count());
240  EXPECT_EQ(800 / 20, table.row_count());
241  EXPECT_EQ(500 * 800 / 20 / 25, table.cell_count());
242  EXPECT_FALSE(table.is_lined());
243 }
244 
245 TEST_F(StructuredTableTest, CountVerticalIntersectionsAll) {
246  table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
247  InsertPartition(0, 0, 100, 10);
248  InsertPartition(1, 12, 43, 21);
249  EXPECT_EQ(2, table_->CountVerticalIntersections(4));
250  EXPECT_EQ(2, table_->CountVerticalIntersections(20));
251  EXPECT_EQ(2, table_->CountVerticalIntersections(40));
252  EXPECT_EQ(1, table_->CountVerticalIntersections(50));
253  EXPECT_EQ(1, table_->CountVerticalIntersections(60));
254  EXPECT_EQ(1, table_->CountVerticalIntersections(80));
255  EXPECT_EQ(1, table_->CountVerticalIntersections(95));
256  EXPECT_EQ(0, table_->CountVerticalIntersections(104));
257  EXPECT_EQ(0, table_->CountVerticalIntersections(150));
258 }
259 
260 TEST_F(StructuredTableTest, CountHorizontalIntersectionsAll) {
261  table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
262  InsertPartition(0, 3, 100, 10);
263  InsertPartition(110, 5, 200, 16);
264 
265  EXPECT_EQ(0, table_->CountHorizontalIntersections(0));
266  EXPECT_EQ(1, table_->CountHorizontalIntersections(4));
267  EXPECT_EQ(2, table_->CountHorizontalIntersections(8));
268  EXPECT_EQ(1, table_->CountHorizontalIntersections(12));
269  EXPECT_EQ(0, table_->CountHorizontalIntersections(20));
270 }
271 
272 TEST_F(StructuredTableTest, VerifyLinedTableBasicPass) {
273  for (int y = 10; y <= 50; y += 10) {
274  table_->InjectCellY(y);
275  }
276  for (int x = 100; x <= 450; x += 50) {
277  table_->InjectCellX(x);
278  }
279  InsertLines();
280  InsertCellsInLines();
281  table_->set_bounding_box(line_box_);
282  EXPECT_TRUE(table_->VerifyLinedTableCells());
283 }
284 
285 TEST_F(StructuredTableTest, VerifyLinedTableHorizontalFail) {
286  for (int y = 10; y <= 50; y += 10) {
287  table_->InjectCellY(y);
288  }
289  for (int x = 100; x <= 450; x += 50) {
290  table_->InjectCellX(x);
291  }
292  InsertLines();
293  InsertCellsInLines();
294  InsertPartition(101, 11, 299, 19);
295  table_->set_bounding_box(line_box_);
296  EXPECT_FALSE(table_->VerifyLinedTableCells());
297 }
298 
299 TEST_F(StructuredTableTest, VerifyLinedTableVerticalFail) {
300  for (int y = 10; y <= 50; y += 10) {
301  table_->InjectCellY(y);
302  }
303  for (int x = 100; x <= 450; x += 50) {
304  table_->InjectCellX(x);
305  }
306  InsertLines();
307  InsertCellsInLines();
308  InsertPartition(151, 21, 199, 39);
309  table_->set_bounding_box(line_box_);
310  EXPECT_FALSE(table_->VerifyLinedTableCells());
311 }
312 
313 TEST_F(StructuredTableTest, FindWhitespacedColumnsBasic) {
314  InsertPartitions();
315  TBOX guess(0, 0, 500, 800);
316  table_->set_bounding_box(guess);
317  table_->FindWhitespacedColumns();
318  table_->ExpectCellX(1, 25, 25, 475, 499);
319 }
320 
321 TEST_F(StructuredTableTest, FindWhitespacedColumnsSorted) {
322  InsertPartitions();
323  TBOX guess(0, 0, 500, 800);
324  table_->set_bounding_box(guess);
325  table_->FindWhitespacedColumns();
326  table_->ExpectSortedX();
327 }
328 
329 // TODO(nbeato): check failure cases
330 // TODO(nbeato): check Recognize processes correctly on trivial real examples.
331 
332 } // namespace tesseract
@ TBOX
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_HLINE
Definition: blobbox.h:76
@ BRT_VLINE
Definition: blobbox.h:77
@ BTFT_NONE
Definition: blobbox.h:111
TEST_F(EuroText, FastLatinOCR)
@ PT_HORZ_LINE
Definition: publictypes.h:66
@ PT_VERT_LINE
Definition: publictypes.h:67
@ PT_FLOWING_TEXT
Definition: publictypes.h:55
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:282
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
int32_t area() const
Definition: rect.h:134
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
void set_median_width(int width)
Definition: colpartition.h:144
void set_median_height(int height)
Definition: colpartition.h:138
std::vector< int > cell_y_
Definition: tablerecog.h:238
unsigned column_count() const
Definition: tablerecog.cpp:95
const TBOX & bounding_box() const
Definition: tablerecog.cpp:104
std::vector< int > cell_x_
Definition: tablerecog.h:237
unsigned cell_count() const
Definition: tablerecog.cpp:98
void set_line_grid(ColPartitionGrid *lines)
Definition: tablerecog.cpp:83
int CountHorizontalIntersections(int y)
Definition: tablerecog.cpp:677
int CountVerticalIntersections(int x)
Definition: tablerecog.cpp:651
void set_text_grid(ColPartitionGrid *text)
Definition: tablerecog.cpp:80
unsigned row_count() const
Definition: tablerecog.cpp:92
bool RecognizeLinedTable(const TBOX &guess_box, StructuredTable *table)
Definition: tablerecog.cpp:766
bool FindLinesBoundingBox(TBOX *bounding_box)
Definition: tablerecog.cpp:825
bool HasSignificantLines(const TBOX &guess)
Definition: tablerecog.cpp:784
bool RecognizeWhitespacedTable(const TBOX &guess_box, StructuredTable *table)
Definition: tablerecog.cpp:886
StructuredTable * RecognizeTable(const TBOX &guess_box)
Definition: tablerecog.cpp:741
void ExpectCellX(int x_min, int second, int add, int almost_done, int x_max)
void SetUp() override
ColPartition_LIST allocated_parts_
std::unique_ptr< ColPartitionGrid > text_grid_
std::unique_ptr< ColPartitionGrid > line_grid_
void TearDown() override
void InsertVerticalLine(int x, int bottom, int top)
void InsertHorizontalLine(int left, int right, int y)
void InsertPartition(int left, int bottom, int right, int top)
std::unique_ptr< TestableTableRecognizer > recognizer_
std::unique_ptr< TestableStructuredTable > table_