tesseract  5.0.0
colpartitionset.cpp
Go to the documentation of this file.
1 // File: colpartitionset.cpp
3 // Description: Class to hold a list of ColPartitions of the page that
4 // correspond roughly to columns.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23 
24 #include "colpartitionset.h"
25 #include "tablefind.h"
26 #include "workingpartset.h"
27 
28 namespace tesseract {
29 
30 // Minimum width of a column to be interesting as a multiple of resolution.
31 const double kMinColumnWidth = 2.0 / 3;
32 
33 ColPartitionSet::ColPartitionSet(ColPartition_LIST *partitions) {
34  ColPartition_IT it(&parts_);
35  it.add_list_after(partitions);
36  ComputeCoverage();
37 }
38 
40  ColPartition_IT it(&parts_);
41  it.add_after_then_move(part);
42  ComputeCoverage();
43 }
44 
45 // Returns the number of columns of good width.
47  int num_good_cols = 0;
48  // This is a read-only iteration of the list.
49  ColPartition_IT it(const_cast<ColPartition_LIST *>(&parts_));
50  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
51  if (it.data()->good_width()) {
52  ++num_good_cols;
53  }
54  }
55  return num_good_cols;
56 }
57 
58 // Return an element of the parts_ list from its index.
60  ColPartition_IT it(&parts_);
61  it.mark_cycle_pt();
62  for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()) {
63  ;
64  }
65  if (it.cycled_list()) {
66  return nullptr;
67  }
68  return it.data();
69 }
70 
71 // Return the ColPartition that contains the given coords, if any, else nullptr.
73  ColPartition_IT it(&parts_);
74  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
75  ColPartition *part = it.data();
76  if (part->ColumnContains(x, y)) {
77  return part;
78  }
79  }
80  return nullptr;
81 }
82 
83 // Extract all the parts from the list, relinquishing ownership.
85  ColPartition_IT it(&parts_);
86  while (!it.empty()) {
87  it.extract();
88  it.forward();
89  }
90 }
91 
92 // Attempt to improve this by adding partitions or expanding partitions.
94  PartSetVector *src_sets) {
95  int set_size = src_sets->size();
96  // Iterate over the provided column sets, as each one may have something
97  // to improve this.
98  for (int i = 0; i < set_size; ++i) {
99  ColPartitionSet *column_set = src_sets->at(i);
100  if (column_set == nullptr) {
101  continue;
102  }
103  // Iterate over the parts in this and column_set, adding bigger or
104  // new parts in column_set to this.
105  ColPartition_IT part_it(&parts_);
106  ASSERT_HOST(!part_it.empty());
107  int prev_right = INT32_MIN;
108  part_it.mark_cycle_pt();
109  ColPartition_IT col_it(&column_set->parts_);
110  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
111  ColPartition *col_part = col_it.data();
112  if (col_part->blob_type() < BRT_UNKNOWN) {
113  continue; // Ignore image partitions.
114  }
115  int col_left = col_part->left_key();
116  int col_right = col_part->right_key();
117  // Sync-up part_it (in this) so it matches the col_part in column_set.
118  ColPartition *part = part_it.data();
119  while (!part_it.at_last() && part->right_key() < col_left) {
120  prev_right = part->right_key();
121  part_it.forward();
122  part = part_it.data();
123  }
124  int part_left = part->left_key();
125  int part_right = part->right_key();
126  if (part_right < col_left || col_right < part_left) {
127  // There is no overlap so this is a new partition.
128  AddPartition(col_part->ShallowCopy(), &part_it);
129  continue;
130  }
131  // Check the edges of col_part to see if they can improve part.
132  bool part_width_ok = cb(part->KeyWidth(part_left, part_right));
133  if (col_left < part_left && col_left > prev_right) {
134  // The left edge of the column is better and it doesn't overlap,
135  // so we can potentially expand it.
136  int col_box_left = col_part->BoxLeftKey();
137  bool tab_width_ok = cb(part->KeyWidth(col_left, part_right));
138  bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right));
139  if (tab_width_ok || (!part_width_ok)) {
140  // The tab is leaving the good column metric at least as good as
141  // it was before, so use the tab.
142  part->CopyLeftTab(*col_part, false);
143  part->SetColumnGoodness(cb);
144  } else if (col_box_left < part_left &&
145  (box_width_ok || !part_width_ok)) {
146  // The box is leaving the good column metric at least as good as
147  // it was before, so use the box.
148  part->CopyLeftTab(*col_part, true);
149  part->SetColumnGoodness(cb);
150  }
151  part_left = part->left_key();
152  }
153  if (col_right > part_right &&
154  (part_it.at_last() ||
155  part_it.data_relative(1)->left_key() > col_right)) {
156  // The right edge is better, so we can possibly expand it.
157  int col_box_right = col_part->BoxRightKey();
158  bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
159  bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right));
160  if (tab_width_ok || (!part_width_ok)) {
161  // The tab is leaving the good column metric at least as good as
162  // it was before, so use the tab.
163  part->CopyRightTab(*col_part, false);
164  part->SetColumnGoodness(cb);
165  } else if (col_box_right > part_right &&
166  (box_width_ok || !part_width_ok)) {
167  // The box is leaving the good column metric at least as good as
168  // it was before, so use the box.
169  part->CopyRightTab(*col_part, true);
170  part->SetColumnGoodness(cb);
171  }
172  }
173  }
174  }
175  ComputeCoverage();
176 }
177 
178 // If this set is good enough to represent a new partitioning into columns,
179 // add it to the vector of sets, otherwise delete it.
181  const WidthCallback &cb) {
182  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
183  bounding_box_.bottom());
184  if (debug) {
185  tprintf("Considering new column candidate:\n");
186  Print();
187  }
188  if (!LegalColumnCandidate()) {
189  if (debug) {
190  tprintf("Not a legal column candidate:\n");
191  Print();
192  }
193  delete this;
194  return;
195  }
196  for (unsigned i = 0; i < column_sets->size(); ++i) {
197  ColPartitionSet *columns = column_sets->at(i);
198  // In ordering the column set candidates, good_coverage_ is king,
199  // followed by good_column_count_ and then bad_coverage_.
200  bool better = good_coverage_ > columns->good_coverage_;
201  if (good_coverage_ == columns->good_coverage_) {
202  better = good_column_count_ > columns->good_column_count_;
203  if (good_column_count_ == columns->good_column_count_) {
204  better = bad_coverage_ > columns->bad_coverage_;
205  }
206  }
207  if (better) {
208  // The new one is better so add it.
209  if (debug) {
210  tprintf("Good one\n");
211  }
212  column_sets->insert(column_sets->begin() + i, this);
213  return;
214  }
215  if (columns->CompatibleColumns(false, this, cb)) {
216  if (debug) {
217  tprintf("Duplicate\n");
218  }
219  delete this;
220  return; // It is not unique.
221  }
222  }
223  if (debug) {
224  tprintf("Added to end\n");
225  }
226  column_sets->push_back(this);
227 }
228 
229 // Return true if the partitions in other are all compatible with the columns
230 // in this.
232  const WidthCallback &cb) {
233  if (debug) {
234  tprintf("CompatibleColumns testing compatibility\n");
235  Print();
236  other->Print();
237  }
238  if (other->parts_.empty()) {
239  if (debug) {
240  tprintf("CompatibleColumns true due to empty other\n");
241  }
242  return true;
243  }
244  ColPartition_IT it(&other->parts_);
245  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
246  ColPartition *part = it.data();
247  if (part->blob_type() < BRT_UNKNOWN) {
248  if (debug) {
249  tprintf("CompatibleColumns ignoring image partition\n");
250  part->Print();
251  }
252  continue; // Image partitions are irrelevant to column compatibility.
253  }
254  int y = part->MidY();
255  int left = part->bounding_box().left();
256  int right = part->bounding_box().right();
257  ColPartition *left_col = ColumnContaining(left, y);
258  ColPartition *right_col = ColumnContaining(right, y);
259  if (right_col == nullptr || left_col == nullptr) {
260  if (debug) {
261  tprintf("CompatibleColumns false due to partition edge outside\n");
262  part->Print();
263  }
264  return false; // A partition edge lies outside of all columns
265  }
266  if (right_col != left_col && cb(right - left)) {
267  if (debug) {
268  tprintf("CompatibleColumns false due to good width in multiple cols\n");
269  part->Print();
270  }
271  return false; // Partition with a good width must be in a single column.
272  }
273 
274  ColPartition_IT it2 = it;
275  while (!it2.at_last()) {
276  it2.forward();
277  ColPartition *next_part = it2.data();
278  if (!BLOBNBOX::IsTextType(next_part->blob_type())) {
279  continue; // Non-text partitions are irrelevant.
280  }
281  int next_left = next_part->bounding_box().left();
282  if (next_left == right) {
283  break; // They share the same edge, so one must be a pull-out.
284  }
285  // Search to see if right and next_left fall within a single column.
286  ColPartition *next_left_col = ColumnContaining(next_left, y);
287  if (right_col == next_left_col) {
288  // There is a column break in this column.
289  // This can be due to a figure caption within a column, a pull-out
290  // block, or a simple broken textline that remains to be merged:
291  // all allowed, or a change in column layout: not allowed.
292  // If both partitions are of good width, then it is likely
293  // a change in column layout, otherwise probably an allowed situation.
294  if (part->good_width() && next_part->good_width()) {
295  if (debug) {
296  int next_right = next_part->bounding_box().right();
297  tprintf("CompatibleColumns false due to 2 parts of good width\n");
298  tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left,
299  next_right);
300  right_col->Print();
301  }
302  return false;
303  }
304  }
305  break;
306  }
307  }
308  if (debug) {
309  tprintf("CompatibleColumns true!\n");
310  }
311  return true;
312 }
313 
314 // Returns the total width of all blobs in the part_set that do not lie
315 // within an approved column. Used as a cost measure for using this
316 // column set over another that might be compatible.
318  int total_width = 0;
319  ColPartition_IT it(&part_set->parts_);
320  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
321  ColPartition *part = it.data();
322  if (!BLOBNBOX::IsTextType(part->blob_type())) {
323  continue; // Non-text partitions are irrelevant to column compatibility.
324  }
325  int y = part->MidY();
326  BLOBNBOX_C_IT box_it(part->boxes());
327  for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
328  const TBOX &box = it.data()->bounding_box();
329  // Assume that the whole blob is outside any column iff its x-middle
330  // is outside.
331  int x = (box.left() + box.right()) / 2;
332  ColPartition *col = ColumnContaining(x, y);
333  if (col == nullptr) {
334  total_width += box.width();
335  }
336  }
337  }
338  return total_width;
339 }
340 
341 // Return true if this ColPartitionSet makes a legal column candidate by
342 // having legal individual partitions and non-overlapping adjacent pairs.
344  ColPartition_IT it(&parts_);
345  if (it.empty()) {
346  return false;
347  }
348  bool any_text_parts = false;
349  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
350  ColPartition *part = it.data();
351  if (BLOBNBOX::IsTextType(part->blob_type())) {
352  if (!part->IsLegal()) {
353  return false; // Individual partition is illegal.
354  }
355  any_text_parts = true;
356  }
357  if (!it.at_last()) {
358  ColPartition *next_part = it.data_relative(1);
359  if (next_part->left_key() < part->right_key()) {
360  return false;
361  }
362  }
363  }
364  return any_text_parts;
365 }
366 
367 // Return a copy of this. If good_only will only copy the Good ColPartitions.
369  ColPartition_LIST copy_parts;
370  ColPartition_IT src_it(&parts_);
371  ColPartition_IT dest_it(&copy_parts);
372  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
373  ColPartition *part = src_it.data();
374  if (BLOBNBOX::IsTextType(part->blob_type()) &&
375  (!good_only || part->good_width() || part->good_column())) {
376  dest_it.add_after_then_move(part->ShallowCopy());
377  }
378  }
379  if (dest_it.empty()) {
380  return nullptr;
381  }
382  return new ColPartitionSet(&copy_parts);
383 }
384 
385 // Return the bounding boxes of columns at the given y-range
386 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
387  ColSegment_LIST *segments) {
388  ColPartition_IT it(&parts_);
389  ColSegment_IT col_it(segments);
390  col_it.move_to_last();
391  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
392  ColPartition *part = it.data();
393  ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
394  ICOORD top_right(part->RightAtY(y_bottom), y_top);
395  auto *col_seg = new ColSegment();
396  col_seg->InsertBox(TBOX(bot_left, top_right));
397  col_it.add_after_then_move(col_seg);
398  }
399 }
400 
401 #ifndef GRAPHICS_DISABLED
402 
403 // Display the edges of the columns at the given y coords.
404 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
405  ScrollView *win) {
406  ColPartition_IT it(&parts_);
407  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
408  ColPartition *part = it.data();
409  win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
410  win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
411  }
412 }
413 
414 #endif // !GRAPHICS_DISABLED
415 
416 // Return the ColumnSpanningType that best explains the columns overlapped
417 // by the given coords(left,right,y), with the given margins.
418 // Also return the first and last column index touched by the coords and
419 // the leftmost spanned column.
420 // Column indices are 2n + 1 for real columns (0 based) and even values
421 // represent the gaps in between columns, with 0 being left of the leftmost.
422 // resolution refers to the ppi resolution of the image.
424  int resolution, int left, int right, int height, int y, int left_margin,
425  int right_margin, int *first_col, int *last_col, int *first_spanned_col) {
426  *first_col = -1;
427  *last_col = -1;
428  *first_spanned_col = -1;
429  int margin_columns = 0;
430  ColPartition_IT it(&parts_);
431  int col_index = 1;
432  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
433  ColPartition *part = it.data();
434  if (part->ColumnContains(left, y) ||
435  (it.at_first() && part->ColumnContains(left + height, y))) {
436  // In the default case, first_col is set, but columns_spanned remains
437  // zero, so first_col will get reset in the first column genuinely
438  // spanned, but we can tell the difference from a noise partition
439  // that touches no column.
440  *first_col = col_index;
441  if (part->ColumnContains(right, y) ||
442  (it.at_last() && part->ColumnContains(right - height, y))) {
443  // Both within a single column.
444  *last_col = col_index;
445  return CST_FLOWING;
446  }
447  if (left_margin <= part->LeftAtY(y)) {
448  // It completely spans this column.
449  *first_spanned_col = col_index;
450  margin_columns = 1;
451  }
452  } else if (part->ColumnContains(right, y) ||
453  (it.at_last() && part->ColumnContains(right - height, y))) {
454  if (*first_col < 0) {
455  // It started in-between.
456  *first_col = col_index - 1;
457  }
458  if (right_margin >= part->RightAtY(y)) {
459  // It completely spans this column.
460  if (margin_columns == 0) {
461  *first_spanned_col = col_index;
462  }
463  ++margin_columns;
464  }
465  *last_col = col_index;
466  break;
467  } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
468  // Neither left nor right are contained within, so it spans this
469  // column.
470  if (*first_col < 0) {
471  // It started in between the previous column and the current column.
472  *first_col = col_index - 1;
473  }
474  if (margin_columns == 0) {
475  *first_spanned_col = col_index;
476  }
477  *last_col = col_index;
478  } else if (right < part->LeftAtY(y)) {
479  // We have gone past the end.
480  *last_col = col_index - 1;
481  if (*first_col < 0) {
482  // It must lie completely between columns =>noise.
483  *first_col = col_index - 1;
484  }
485  break;
486  }
487  }
488  if (*first_col < 0) {
489  *first_col = col_index - 1; // The last in-between.
490  }
491  if (*last_col < 0) {
492  *last_col = col_index - 1; // The last in-between.
493  }
494  ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
495  ASSERT_HOST(*first_col <= *last_col);
496  if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
497  // Neither end was in a column, and it didn't span any, so it lies
498  // entirely between columns, therefore noise.
499  return CST_NOISE;
500  } else if (margin_columns <= 1) {
501  // An exception for headings that stick outside of single-column text.
502  if (margin_columns == 1 && parts_.singleton()) {
503  return CST_HEADING;
504  }
505  // It is a pullout, as left and right were not in the same column, but
506  // it doesn't go to the edge of its start and end.
507  return CST_PULLOUT;
508  }
509  // Its margins went to the edges of first and last columns => heading.
510  return CST_HEADING;
511 }
512 
513 // The column_set has changed. Close down all in-progress WorkingPartSets in
514 // columns that do not match and start new ones for the new columns in this.
515 // As ColPartitions are turned into BLOCKs, the used ones are put in
516 // used_parts, as they still need to be referenced in the grid.
518  const ICOORD &tright, int resolution,
519  ColPartition_LIST *used_parts,
520  WorkingPartSet_LIST *working_set_list) {
521  // Move the input list to a temporary location so we can delete its elements
522  // as we add them to the output working_set.
523  WorkingPartSet_LIST work_src;
524  WorkingPartSet_IT src_it(&work_src);
525  src_it.add_list_after(working_set_list);
526  src_it.move_to_first();
527  WorkingPartSet_IT dest_it(working_set_list);
528  // Completed blocks and to_blocks are accumulated and given to the first new
529  // one whenever we keep a column, or at the end.
530  BLOCK_LIST completed_blocks;
531  TO_BLOCK_LIST to_blocks;
532  WorkingPartSet *first_new_set = nullptr;
533  WorkingPartSet *working_set = nullptr;
534  ColPartition_IT col_it(&parts_);
535  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
536  ColPartition *column = col_it.data();
537  // Any existing column to the left of column is completed.
538  while (!src_it.empty() &&
539  ((working_set = src_it.data())->column() == nullptr ||
540  working_set->column()->right_key() <= column->left_key())) {
541  src_it.extract();
542  working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
543  &completed_blocks, &to_blocks);
544  delete working_set;
545  src_it.forward();
546  }
547  // Make a new between-column WorkingSet for before the current column.
548  working_set = new WorkingPartSet(nullptr);
549  dest_it.add_after_then_move(working_set);
550  if (first_new_set == nullptr) {
551  first_new_set = working_set;
552  }
553  // A matching column gets to stay, and first_new_set gets all the
554  // completed_sets.
555  working_set = src_it.empty() ? nullptr : src_it.data();
556  if (working_set != nullptr &&
557  working_set->column()->MatchingColumns(*column)) {
558  working_set->set_column(column);
559  dest_it.add_after_then_move(src_it.extract());
560  src_it.forward();
561  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
562  first_new_set = nullptr;
563  } else {
564  // Just make a new working set for the current column.
565  working_set = new WorkingPartSet(column);
566  dest_it.add_after_then_move(working_set);
567  }
568  }
569  // Complete any remaining src working sets.
570  while (!src_it.empty()) {
571  working_set = src_it.extract();
572  working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
573  &completed_blocks, &to_blocks);
574  delete working_set;
575  src_it.forward();
576  }
577  // Make a new between-column WorkingSet for after the last column.
578  working_set = new WorkingPartSet(nullptr);
579  dest_it.add_after_then_move(working_set);
580  if (first_new_set == nullptr) {
581  first_new_set = working_set;
582  }
583  // The first_new_set now gets any accumulated completed_parts/blocks.
584  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
585 }
586 
587 // Accumulate the widths and gaps into the given variables.
589  int *width_samples,
590  int *total_gap,
591  int *gap_samples) {
592  ColPartition_IT it(&parts_);
593  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
594  ColPartition *part = it.data();
595  *total_width += part->ColumnWidth();
596  ++*width_samples;
597  if (!it.at_last()) {
598  ColPartition *next_part = it.data_relative(1);
599  int part_left = part->right_key();
600  int part_right = next_part->left_key();
601  int gap = part->KeyWidth(part_left, part_right);
602  *total_gap += gap;
603  ++*gap_samples;
604  }
605  }
606 }
607 
608 // Provide debug output for this ColPartitionSet and all the ColPartitions.
610  ColPartition_IT it(&parts_);
611  tprintf(
612  "Partition set of %d parts, %d good, coverage=%d+%d"
613  " (%d,%d)->(%d,%d)\n",
614  it.length(), good_column_count_, good_coverage_, bad_coverage_,
615  bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
616  bounding_box_.top());
617  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
618  ColPartition *part = it.data();
619  part->Print();
620  }
621 }
622 
623 // PRIVATE CODE.
624 
625 // Add the given partition to the list in the appropriate place.
626 void ColPartitionSet::AddPartition(ColPartition *new_part,
627  ColPartition_IT *it) {
628  AddPartitionCoverageAndBox(*new_part);
629  int new_right = new_part->right_key();
630  if (it->data()->left_key() >= new_right) {
631  it->add_before_stay_put(new_part);
632  } else {
633  it->add_after_stay_put(new_part);
634  }
635 }
636 
637 // Compute the coverage and good column count. Coverage is the amount of the
638 // width of the page (in pixels) that is covered by ColPartitions, which are
639 // used to provide candidate column layouts.
640 // Coverage is split into good and bad. Good coverage is provided by
641 // ColPartitions of a frequent width (according to the callback function
642 // provided by TabFinder::WidthCB, which accesses stored statistics on the
643 // widths of ColPartitions) and bad coverage is provided by all other
644 // ColPartitions, even if they have tab vectors at both sides. Thus:
645 // |-----------------------------------------------------------------|
646 // | Double width heading |
647 // |-----------------------------------------------------------------|
648 // |-------------------------------| |-------------------------------|
649 // | Common width ColParition | | Common width ColPartition |
650 // |-------------------------------| |-------------------------------|
651 // the layout with two common-width columns has better coverage than the
652 // double width heading, because the coverage is "good," even though less in
653 // total coverage than the heading, because the heading coverage is "bad."
654 void ColPartitionSet::ComputeCoverage() {
655  // Count the number of good columns and sum their width.
656  ColPartition_IT it(&parts_);
657  good_column_count_ = 0;
658  good_coverage_ = 0;
659  bad_coverage_ = 0;
660  bounding_box_ = TBOX();
661  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
662  ColPartition *part = it.data();
663  AddPartitionCoverageAndBox(*part);
664  }
665 }
666 
667 // Adds the coverage, column count and box for a single partition,
668 // without adding it to the list. (Helper factored from ComputeCoverage.)
669 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition &part) {
670  bounding_box_ += part.bounding_box();
671  int coverage = part.ColumnWidth();
672  if (part.good_width()) {
673  good_coverage_ += coverage;
674  good_column_count_ += 2;
675  } else {
676  if (part.blob_type() < BRT_UNKNOWN) {
677  coverage /= 2;
678  }
679  if (part.good_column()) {
680  ++good_column_count_;
681  }
682  bad_coverage_ += coverage;
683  }
684 }
685 
686 } // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:59
@ TBOX
@ BRT_UNKNOWN
Definition: blobbox.h:80
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
std::function< bool(int)> WidthCallback
Definition: tabfind.h:35
std::vector< ColPartitionSet * > PartSetVector
const double kMinColumnWidth
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:435
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
TDimension width() const
Definition: rect.h:126
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
static bool WithinTestRegion(int detail_level, int x, int y)
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324
void SetColumnGoodness(const WidthCallback &cb)
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:186
BlobRegionType blob_type() const
Definition: colpartition.h:147
bool good_column() const
Definition: colpartition.h:165
int LeftAtY(int y) const
Definition: colpartition.h:340
ColPartition * ShallowCopy() const
void CopyRightTab(const ColPartition &src, bool take_box)
const TBOX & bounding_box() const
Definition: colpartition.h:108
int RightAtY(int y) const
Definition: colpartition.h:344
bool ColumnContains(int x, int y) const
Definition: colpartition.h:353
bool MatchingColumns(const ColPartition &other) const
void CopyLeftTab(const ColPartition &src, bool take_box)
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
bool CompatibleColumns(bool debug, ColPartitionSet *other, const WidthCallback &cb)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
ColPartition * GetColumnByIndex(int index)
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
ColPartitionSet * Copy(bool good_only)
ColPartition * ColumnContaining(int x, int y)
void AddToColumnSetsIfUnique(PartSetVector *column_sets, const WidthCallback &cb)
int UnmatchedWidth(ColPartitionSet *part_set)
void ImproveColumnCandidate(const WidthCallback &cb, PartSetVector *src_sets)
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)
void set_column(ColPartition *col)
ColPartition * column() const
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:511