tesseract  5.0.0
tesseract::ColPartitionGrid Class Reference

#include <colpartitiongrid.h>

Inheritance diagram for tesseract::ColPartitionGrid:
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT > tesseract::GridBase

Public Member Functions

 ColPartitionGrid ()=default
 
 ColPartitionGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~ColPartitionGrid () override=default
 
void HandleClick (int x, int y) override
 
void Merges (const std::function< bool(ColPartition *, TBOX *)> &box_cb, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb)
 
bool MergePart (const std::function< bool(ColPartition *, TBOX *)> &box_cb, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, ColPartition *part)
 
int ComputeTotalOverlap (ColPartitionGrid **overlap_grid)
 
void FindOverlappingPartitions (const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
 
ColPartitionBestMergeCandidate (const ColPartition *part, ColPartition_CLIST *candidates, bool debug, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, int *overlap_increase)
 
void SplitOverlappingPartitions (ColPartition_LIST *big_parts)
 
bool GridSmoothNeighbours (BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box, const FCOORD &rerotation)
 
void ReflectInYAxis ()
 
void Deskew (const FCOORD &deskew)
 
void ExtractPartitionsAsBlocks (BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void SetTabStops (TabFind *tabgrid)
 
bool MakeColPartSets (PartSetVector *part_sets)
 
ColPartitionSetMakeSingleColumnSet (WidthCallback cb)
 
void ClaimBoxes ()
 
void ReTypeBlobs (BLOBNBOX_LIST *im_blobs)
 
void RecomputeBounds (int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical)
 
void GridFindMargins (ColPartitionSet **best_columns)
 
void ListFindMargins (ColPartitionSet **best_columns, ColPartition_LIST *parts)
 
void DeleteParts ()
 
void DeleteUnknownParts (TO_BLOCK *block)
 
void DeleteNonLeaderParts ()
 
void FindFigureCaptions ()
 
void FindPartitionPartners ()
 
void FindPartitionPartners (bool upper, ColPartition *part)
 
void FindVPartitionPartners (bool to_the_left, ColPartition *part)
 
void RefinePartitionPartners (bool get_desperate)
 
- Public Member Functions inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(ColPartition *))
 
void InsertBBox (bool h_spread, bool v_spread, ColPartition *bbox)
 
void InsertPixPtBBox (int left, int bottom, Image pix, ColPartition *bbox)
 
void RemoveBBox (ColPartition *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Protected Attributes inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
ColPartition_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 32 of file colpartitiongrid.h.

Constructor & Destructor Documentation

◆ ColPartitionGrid() [1/2]

tesseract::ColPartitionGrid::ColPartitionGrid ( )
default

◆ ColPartitionGrid() [2/2]

tesseract::ColPartitionGrid::ColPartitionGrid ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 67 of file colpartitiongrid.cpp.

69  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(
70  gridsize, bleft, tright) {}
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75

◆ ~ColPartitionGrid()

tesseract::ColPartitionGrid::~ColPartitionGrid ( )
overridedefault

Member Function Documentation

◆ BestMergeCandidate()

ColPartition * tesseract::ColPartitionGrid::BestMergeCandidate ( const ColPartition part,
ColPartition_CLIST *  candidates,
bool  debug,
const std::function< bool(const ColPartition *, const ColPartition *)> &  confirm_cb,
int *  overlap_increase 
)

Definition at line 410 of file colpartitiongrid.cpp.

414  {
415  if (overlap_increase != nullptr) {
416  *overlap_increase = 0;
417  }
418  if (candidates->empty()) {
419  return nullptr;
420  }
421  int ok_overlap =
422  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
423  // The best neighbour to merge with is the one that causes least
424  // total pairwise overlap among all the neighbours.
425  // If more than one offers the same total overlap, choose the one
426  // with the least total area.
427  const TBOX &part_box = part->bounding_box();
428  ColPartition_C_IT it(candidates);
429  ColPartition *best_candidate = nullptr;
430  // Find the total combined box of all candidates and the original.
431  TBOX full_box(part_box);
432  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
433  ColPartition *candidate = it.data();
434  full_box += candidate->bounding_box();
435  }
436  // Keep valid neighbours in a list.
437  ColPartition_CLIST neighbours;
438  // Now run a rect search of the merged box for overlapping neighbours, as
439  // we need anything that might be overlapped by the merged box.
440  FindOverlappingPartitions(full_box, part, &neighbours);
441  if (debug) {
442  tprintf("Finding best merge candidate from %d, %d neighbours for box:",
443  candidates->length(), neighbours.length());
444  part_box.print();
445  }
446  // If the best increase in overlap is positive, then we also check the
447  // worst non-candidate overlap. This catches the case of multiple good
448  // candidates that overlap each other when merged. If the worst
449  // non-candidate overlap is better than the best overlap, then return
450  // the worst non-candidate overlap instead.
451  ColPartition_CLIST non_candidate_neighbours;
452  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
453  &neighbours, candidates);
454  int worst_nc_increase = 0;
455  int best_increase = INT32_MAX;
456  int best_area = 0;
457  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
458  ColPartition *candidate = it.data();
459  if (confirm_cb != nullptr && !confirm_cb(part, candidate)) {
460  if (debug) {
461  tprintf("Candidate not confirmed:");
462  candidate->bounding_box().print();
463  }
464  continue;
465  }
466  int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
467  const TBOX &cand_box = candidate->bounding_box();
468  if (best_candidate == nullptr || increase < best_increase) {
469  best_candidate = candidate;
470  best_increase = increase;
471  best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
472  if (debug) {
473  tprintf("New best merge candidate has increase %d, area %d, over box:",
474  increase, best_area);
475  full_box.print();
476  candidate->Print();
477  }
478  } else if (increase == best_increase) {
479  int area = cand_box.bounding_union(part_box).area() - cand_box.area();
480  if (area < best_area) {
481  best_area = area;
482  best_candidate = candidate;
483  }
484  }
485  increase = IncreaseInOverlap(part, candidate, ok_overlap,
486  &non_candidate_neighbours);
487  if (increase > worst_nc_increase) {
488  worst_nc_increase = increase;
489  }
490  }
491  if (best_increase > 0) {
492  // If the worst non-candidate increase is less than the best increase
493  // including the candidates, then all the candidates can merge together
494  // and the increase in outside overlap would be less, so use that result,
495  // but only if each candidate is either a good diacritic merge with part,
496  // or an ok merge candidate with all the others.
497  // See TestCompatibleCandidates for more explanation and a picture.
498  if (worst_nc_increase < best_increase &&
499  TestCompatibleCandidates(*part, debug, candidates)) {
500  best_increase = worst_nc_increase;
501  }
502  }
503  if (overlap_increase != nullptr) {
504  *overlap_increase = best_increase;
505  }
506  return best_candidate;
507 }
@ TBOX
const double kTinyEnoughTextlineOverlapFraction
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)

◆ ClaimBoxes()

void tesseract::ColPartitionGrid::ClaimBoxes ( )

Definition at line 885 of file colpartitiongrid.cpp.

885  {
886  // Iterate the ColPartitions in the grid.
887  ColPartitionGridSearch gsearch(this);
888  gsearch.StartFullSearch();
889  ColPartition *part;
890  while ((part = gsearch.NextFullSearch()) != nullptr) {
891  part->ClaimBoxes();
892  }
893 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919

◆ ComputeTotalOverlap()

int tesseract::ColPartitionGrid::ComputeTotalOverlap ( ColPartitionGrid **  overlap_grid)

Definition at line 322 of file colpartitiongrid.cpp.

322  {
323  int total_overlap = 0;
324  // Iterate the ColPartitions in the grid.
325  ColPartitionGridSearch gsearch(this);
326  gsearch.StartFullSearch();
327  ColPartition *part;
328  while ((part = gsearch.NextFullSearch()) != nullptr) {
329  ColPartition_CLIST neighbors;
330  const TBOX &part_box = part->bounding_box();
331  FindOverlappingPartitions(part_box, part, &neighbors);
332  ColPartition_C_IT n_it(&neighbors);
333  bool any_part_overlap = false;
334  for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
335  const TBOX &n_box = n_it.data()->bounding_box();
336  int overlap = n_box.intersection(part_box).area();
337  if (overlap > 0 && overlap_grid != nullptr) {
338  if (*overlap_grid == nullptr) {
339  *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
340  }
341  (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
342  if (!any_part_overlap) {
343  (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
344  }
345  }
346  any_part_overlap = true;
347  total_overlap += overlap;
348  }
349  }
350  return total_overlap;
351 }

◆ DeleteNonLeaderParts()

void tesseract::ColPartitionGrid::DeleteNonLeaderParts ( )

Definition at line 1063 of file colpartitiongrid.cpp.

1063  {
1064  ColPartitionGridSearch gsearch(this);
1065  gsearch.StartFullSearch();
1066  ColPartition *part;
1067  while ((part = gsearch.NextFullSearch()) != nullptr) {
1068  if (part->flow() != BTFT_LEADER) {
1069  gsearch.RemoveBBox();
1070  if (part->ReleaseNonLeaderBoxes()) {
1071  InsertBBox(true, true, part);
1072  gsearch.RepositionIterator();
1073  } else {
1074  delete part;
1075  }
1076  }
1077  }
1078 }
@ BTFT_LEADER
Definition: blobbox.h:117
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:529

◆ DeleteParts()

void tesseract::ColPartitionGrid::DeleteParts ( )

Definition at line 1029 of file colpartitiongrid.cpp.

1029  {
1030  ColPartition_LIST dead_parts;
1031  ColPartition_IT dead_it(&dead_parts);
1032  ColPartitionGridSearch gsearch(this);
1033  gsearch.StartFullSearch();
1034  ColPartition *part;
1035  while ((part = gsearch.NextFullSearch()) != nullptr) {
1036  part->DisownBoxes();
1037  dead_it.add_to_end(part); // Parts will be deleted on return.
1038  }
1039  Clear();
1040 }

◆ DeleteUnknownParts()

void tesseract::ColPartitionGrid::DeleteUnknownParts ( TO_BLOCK block)

Definition at line 1044 of file colpartitiongrid.cpp.

1044  {
1045  ColPartitionGridSearch gsearch(this);
1046  gsearch.StartFullSearch();
1047  ColPartition *part;
1048  while ((part = gsearch.NextFullSearch()) != nullptr) {
1049  if (part->blob_type() == BRT_UNKNOWN) {
1050  gsearch.RemoveBBox();
1051  // Once marked, the blobs will be swept up by DeleteUnownedNoise.
1052  part->set_flow(BTFT_NONTEXT);
1053  part->set_blob_type(BRT_NOISE);
1054  part->SetBlobTypes();
1055  part->DisownBoxes();
1056  delete part;
1057  }
1058  }
1059  block->DeleteUnownedNoise();
1060 }
@ BRT_NOISE
Definition: blobbox.h:75
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BTFT_NONTEXT
Definition: blobbox.h:112

◆ Deskew()

void tesseract::ColPartitionGrid::Deskew ( const FCOORD deskew)

Definition at line 751 of file colpartitiongrid.cpp.

751  {
752  ColPartition_LIST parts;
753  ColPartition_IT part_it(&parts);
754  // Iterate the ColPartitions in the grid to extract them.
755  ColPartitionGridSearch gsearch(this);
756  gsearch.StartFullSearch();
757  ColPartition *part;
758  while ((part = gsearch.NextFullSearch()) != nullptr) {
759  part_it.add_after_then_move(part);
760  }
761  // Rebuild the grid to the new size.
762  TBOX grid_box(bleft_, tright_);
763  grid_box.rotate_large(deskew);
764  Init(gridsize(), grid_box.botleft(), grid_box.topright());
765  // Reinitializing the grid with rotated coords also clears all the
766  // pointers, so parts will now own the ColPartitions. (Briefly).
767  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
768  part = part_it.extract();
769  part->ComputeLimits();
770  InsertBBox(true, true, part);
771  }
772 }
ICOORD tright_
Definition: bbgrid.h:91
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488

◆ ExtractPartitionsAsBlocks()

void tesseract::ColPartitionGrid::ExtractPartitionsAsBlocks ( BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 688 of file colpartitiongrid.cpp.

689  {
690  TO_BLOCK_IT to_block_it(to_blocks);
691  BLOCK_IT block_it(blocks);
692  // All partitions will be put on this list and deleted on return.
693  ColPartition_LIST parts;
694  ColPartition_IT part_it(&parts);
695  // Iterate the ColPartitions in the grid to extract them.
696  ColPartitionGridSearch gsearch(this);
697  gsearch.StartFullSearch();
698  ColPartition *part;
699  while ((part = gsearch.NextFullSearch()) != nullptr) {
700  part_it.add_after_then_move(part);
701  // The partition has to be at least vaguely like text.
702  BlobRegionType blob_type = part->blob_type();
703  if (BLOBNBOX::IsTextType(blob_type) ||
704  (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
705  PolyBlockType type =
707  // Get metrics from the row that will be used for the block.
708  TBOX box = part->bounding_box();
709  int median_width = part->median_width();
710  int median_height = part->median_height();
711  // Turn the partition into a TO_ROW.
712  TO_ROW *row = part->MakeToRow();
713  if (row == nullptr) {
714  // This partition is dead.
715  part->DeleteBoxes();
716  continue;
717  }
718  auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
719  box.right(), box.top());
720  block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
721  auto *to_block = new TO_BLOCK(block);
722  TO_ROW_IT row_it(to_block->get_rows());
723  row_it.add_after_then_move(row);
724  // We haven't differentially rotated vertical and horizontal text at
725  // this point, so use width or height as appropriate.
726  if (blob_type == BRT_VERT_TEXT) {
727  to_block->line_size = static_cast<float>(median_width);
728  to_block->line_spacing = static_cast<float>(box.width());
729  to_block->max_blob_size = static_cast<float>(box.width() + 1);
730  } else {
731  to_block->line_size = static_cast<float>(median_height);
732  to_block->line_spacing = static_cast<float>(box.height());
733  to_block->max_blob_size = static_cast<float>(box.height() + 1);
734  }
735  if (to_block->line_size == 0) {
736  to_block->line_size = 1;
737  }
738  block_it.add_to_end(block);
739  to_block_it.add_to_end(to_block);
740  } else {
741  // This partition is dead.
742  part->DeleteBoxes();
743  }
744  }
745  Clear();
746  // Now it is safe to delete the ColPartitions as parts goes out of scope.
747 }
BlobRegionType
Definition: blobbox.h:74
@ BRT_VERT_TEXT
Definition: blobbox.h:81
@ PT_VERTICAL_TEXT
Definition: publictypes.h:61
@ PT_FLOWING_TEXT
Definition: publictypes.h:55
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:435

◆ FindFigureCaptions()

void tesseract::ColPartitionGrid::FindFigureCaptions ( )

Definition at line 1081 of file colpartitiongrid.cpp.

1081  {
1082  // For each image region find its best candidate text caption region,
1083  // if any and mark it as such.
1084  ColPartitionGridSearch gsearch(this);
1085  gsearch.StartFullSearch();
1086  ColPartition *part;
1087  while ((part = gsearch.NextFullSearch()) != nullptr) {
1088  if (part->IsImageType()) {
1089  const TBOX &part_box = part->bounding_box();
1090  bool debug =
1091  AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom());
1092  ColPartition *best_caption = nullptr;
1093  int best_dist = 0; // Distance to best_caption.
1094  int best_upper = 0; // Direction of best_caption.
1095  // Handle both lower and upper directions.
1096  for (int upper = 0; upper < 2; ++upper) {
1097  ColPartition_C_IT partner_it(upper ? part->upper_partners()
1098  : part->lower_partners());
1099  // If there are no image partners, then this direction is ok.
1100  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1101  partner_it.forward()) {
1102  ColPartition *partner = partner_it.data();
1103  if (partner->IsImageType()) {
1104  break;
1105  }
1106  }
1107  if (!partner_it.cycled_list()) {
1108  continue;
1109  }
1110  // Find the nearest totally overlapping text partner.
1111  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1112  partner_it.forward()) {
1113  ColPartition *partner = partner_it.data();
1114  if (!partner->IsTextType() || partner->type() == PT_TABLE) {
1115  continue;
1116  }
1117  const TBOX &partner_box = partner->bounding_box();
1118  if (debug) {
1119  tprintf("Finding figure captions for image part:");
1120  part_box.print();
1121  tprintf("Considering partner:");
1122  partner_box.print();
1123  }
1124  if (partner_box.left() >= part_box.left() &&
1125  partner_box.right() <= part_box.right()) {
1126  int dist = partner_box.y_gap(part_box);
1127  if (best_caption == nullptr || dist < best_dist) {
1128  best_dist = dist;
1129  best_caption = partner;
1130  best_upper = upper;
1131  }
1132  }
1133  }
1134  }
1135  if (best_caption != nullptr) {
1136  if (debug) {
1137  tprintf("Best caption candidate:");
1138  best_caption->bounding_box().print();
1139  }
1140  // We have a candidate caption. Qualify it as being separable from
1141  // any body text. We are looking for either a small number of lines
1142  // or a big gap that indicates a separation from the body text.
1143  int line_count = 0;
1144  int biggest_gap = 0;
1145  int smallest_gap = INT16_MAX;
1146  int total_height = 0;
1147  int mean_height = 0;
1148  ColPartition *end_partner = nullptr;
1149  ColPartition *next_partner = nullptr;
1150  for (ColPartition *partner = best_caption;
1151  partner != nullptr && line_count <= kMaxCaptionLines;
1152  partner = next_partner) {
1153  if (!partner->IsTextType()) {
1154  end_partner = partner;
1155  break;
1156  }
1157  ++line_count;
1158  total_height += partner->bounding_box().height();
1159  next_partner = partner->SingletonPartner(best_upper);
1160  if (next_partner != nullptr) {
1161  int gap =
1162  partner->bounding_box().y_gap(next_partner->bounding_box());
1163  if (gap > biggest_gap) {
1164  biggest_gap = gap;
1165  end_partner = next_partner;
1166  mean_height = total_height / line_count;
1167  } else if (gap < smallest_gap) {
1168  smallest_gap = gap;
1169  }
1170  // If the gap looks big compared to the text size and the smallest
1171  // gap seen so far, then we can stop.
1172  if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
1173  biggest_gap > smallest_gap * kMinCaptionGapRatio) {
1174  break;
1175  }
1176  }
1177  }
1178  if (debug) {
1179  tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1180  line_count, biggest_gap, smallest_gap, mean_height);
1181  if (end_partner != nullptr) {
1182  tprintf("End partner:");
1183  end_partner->bounding_box().print();
1184  }
1185  }
1186  if (next_partner == nullptr && line_count <= kMaxCaptionLines) {
1187  end_partner = nullptr; // No gap, but line count is small.
1188  }
1189  if (line_count <= kMaxCaptionLines) {
1190  // This is a qualified caption. Mark the text as caption.
1191  for (ColPartition *partner = best_caption;
1192  partner != nullptr && partner != end_partner;
1193  partner = next_partner) {
1194  partner->set_type(PT_CAPTION_TEXT);
1195  partner->SetBlobTypes();
1196  if (debug) {
1197  tprintf("Set caption type for partition:");
1198  partner->bounding_box().print();
1199  }
1200  next_partner = partner->SingletonPartner(best_upper);
1201  }
1202  }
1203  }
1204  }
1205  }
1206 }
const int kMaxCaptionLines
const double kMinCaptionGapHeightRatio
@ PT_CAPTION_TEXT
Definition: publictypes.h:62
const double kMinCaptionGapRatio
static bool WithinTestRegion(int detail_level, int x, int y)

◆ FindOverlappingPartitions()

void tesseract::ColPartitionGrid::FindOverlappingPartitions ( const TBOX box,
const ColPartition not_this,
ColPartition_CLIST *  parts 
)

Definition at line 356 of file colpartitiongrid.cpp.

358  {
359  ColPartitionGridSearch rsearch(this);
360  rsearch.StartRectSearch(box);
361  ColPartition *part;
362  while ((part = rsearch.NextRectSearch()) != nullptr) {
363  if (part != not_this) {
364  parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
365  }
366  }
367 }

◆ FindPartitionPartners() [1/2]

void tesseract::ColPartitionGrid::FindPartitionPartners ( )

Definition at line 1212 of file colpartitiongrid.cpp.

1212  {
1213  ColPartitionGridSearch gsearch(this);
1214  gsearch.StartFullSearch();
1215  ColPartition *part;
1216  while ((part = gsearch.NextFullSearch()) != nullptr) {
1217  if (part->IsVerticalType()) {
1218  FindVPartitionPartners(true, part);
1219  FindVPartitionPartners(false, part);
1220  } else {
1221  FindPartitionPartners(true, part);
1222  FindPartitionPartners(false, part);
1223  }
1224  }
1225 }
void FindVPartitionPartners(bool to_the_left, ColPartition *part)

◆ FindPartitionPartners() [2/2]

void tesseract::ColPartitionGrid::FindPartitionPartners ( bool  upper,
ColPartition part 
)

Definition at line 1229 of file colpartitiongrid.cpp.

1229  {
1230  if (part->type() == PT_NOISE) {
1231  return; // Noise is not allowed to partner anything.
1232  }
1233  const TBOX &box = part->bounding_box();
1234  int top = part->median_top();
1235  int bottom = part->median_bottom();
1236  int height = top - bottom;
1237  int mid_y = (bottom + top) / 2;
1238  ColPartitionGridSearch vsearch(this);
1239  // Search down for neighbour below
1240  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
1241  ColPartition *neighbour;
1242  ColPartition *best_neighbour = nullptr;
1243  int best_dist = INT32_MAX;
1244  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) {
1245  if (neighbour == part || neighbour->type() == PT_NOISE) {
1246  continue; // Noise is not allowed to partner anything.
1247  }
1248  int neighbour_bottom = neighbour->median_bottom();
1249  int neighbour_top = neighbour->median_top();
1250  int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1251  if (upper != (neighbour_y > mid_y)) {
1252  continue;
1253  }
1254  if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) {
1255  continue;
1256  }
1257  if (!part->TypesMatch(*neighbour)) {
1258  if (best_neighbour == nullptr) {
1259  best_neighbour = neighbour;
1260  }
1261  continue;
1262  }
1263  int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1264  if (dist <= kMaxPartitionSpacing * height) {
1265  if (dist < best_dist) {
1266  best_dist = dist;
1267  best_neighbour = neighbour;
1268  }
1269  } else {
1270  break;
1271  }
1272  }
1273  if (best_neighbour != nullptr) {
1274  part->AddPartner(upper, best_neighbour);
1275  }
1276 }
const double kMaxPartitionSpacing

◆ FindVPartitionPartners()

void tesseract::ColPartitionGrid::FindVPartitionPartners ( bool  to_the_left,
ColPartition part 
)

Definition at line 1280 of file colpartitiongrid.cpp.

1281  {
1282  if (part->type() == PT_NOISE) {
1283  return; // Noise is not allowed to partner anything.
1284  }
1285  const TBOX &box = part->bounding_box();
1286  int left = part->median_left();
1287  int right = part->median_right();
1288  int width = right >= left ? right - left : -1;
1289  int mid_x = (left + right) / 2;
1290  ColPartitionGridSearch hsearch(this);
1291  // Search left for neighbour to_the_left
1292  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
1293  ColPartition *neighbour;
1294  ColPartition *best_neighbour = nullptr;
1295  int best_dist = INT32_MAX;
1296  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) {
1297  if (neighbour == part || neighbour->type() == PT_NOISE) {
1298  continue; // Noise is not allowed to partner anything.
1299  }
1300  int neighbour_left = neighbour->median_left();
1301  int neighbour_right = neighbour->median_right();
1302  int neighbour_x = (neighbour_left + neighbour_right) / 2;
1303  if (to_the_left != (neighbour_x < mid_x)) {
1304  continue;
1305  }
1306  if (!part->VOverlaps(*neighbour)) {
1307  continue;
1308  }
1309  if (!part->TypesMatch(*neighbour)) {
1310  continue; // Only match to other vertical text.
1311  }
1312  int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1313  if (dist <= kMaxPartitionSpacing * width) {
1314  if (dist < best_dist || best_neighbour == nullptr) {
1315  best_dist = dist;
1316  best_neighbour = neighbour;
1317  }
1318  } else {
1319  break;
1320  }
1321  }
1322  // For vertical partitions, the upper partner is to the left, and lower is
1323  // to the right.
1324  if (best_neighbour != nullptr) {
1325  part->AddPartner(to_the_left, best_neighbour);
1326  }
1327 }

◆ GridFindMargins()

void tesseract::ColPartitionGrid::GridFindMargins ( ColPartitionSet **  best_columns)

Definition at line 988 of file colpartitiongrid.cpp.

988  {
989  // Iterate the ColPartitions in the grid.
990  ColPartitionGridSearch gsearch(this);
991  gsearch.StartFullSearch();
992  ColPartition *part;
993  while ((part = gsearch.NextFullSearch()) != nullptr) {
994  // Set up a rectangle search x-bounded by the column and y by the part.
995  ColPartitionSet *columns =
996  best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr;
997  FindPartitionMargins(columns, part);
998  const TBOX &box = part->bounding_box();
999  if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
1000  tprintf("Computed margins for part:");
1001  part->Print();
1002  }
1003  }
1004 }

◆ GridSmoothNeighbours()

bool tesseract::ColPartitionGrid::GridSmoothNeighbours ( BlobTextFlowType  source_type,
Image  nontext_map,
const TBOX im_box,
const FCOORD rerotation 
)

Definition at line 635 of file colpartitiongrid.cpp.

638  {
639  // Iterate the ColPartitions in the grid.
640  ColPartitionGridSearch gsearch(this);
641  gsearch.StartFullSearch();
642  ColPartition *part;
643  bool any_changed = false;
644  while ((part = gsearch.NextFullSearch()) != nullptr) {
645  if (part->flow() != source_type ||
646  BLOBNBOX::IsLineType(part->blob_type())) {
647  continue;
648  }
649  const TBOX &box = part->bounding_box();
650  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
651  if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) {
652  any_changed = true;
653  }
654  }
655  return any_changed;
656 }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:443

◆ HandleClick()

void tesseract::ColPartitionGrid::HandleClick ( int  x,
int  y 
)
overridevirtual

Reimplemented from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >.

Definition at line 73 of file colpartitiongrid.cpp.

73  {
75  y);
76  // Run a radial search for partitions that overlap.
77  ColPartitionGridSearch radsearch(this);
78  radsearch.SetUniqueMode(true);
79  radsearch.StartRadSearch(x, y, 1);
80  ColPartition *neighbour;
81  FCOORD click(x, y);
82  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
83  const TBOX &nbox = neighbour->bounding_box();
84  if (nbox.contains(click)) {
85  tprintf("Block box:");
86  neighbour->bounding_box().print();
87  neighbour->Print();
88  }
89  }
90 }
virtual void HandleClick(int x, int y)
Definition: bbgrid.h:691

◆ ListFindMargins()

void tesseract::ColPartitionGrid::ListFindMargins ( ColPartitionSet **  best_columns,
ColPartition_LIST *  parts 
)

Definition at line 1011 of file colpartitiongrid.cpp.

1012  {
1013  ColPartition_IT part_it(parts);
1014  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
1015  ColPartition *part = part_it.data();
1016  ColPartitionSet *columns = nullptr;
1017  if (best_columns != nullptr) {
1018  const TBOX &part_box = part->bounding_box();
1019  // Get the columns from the y grid coord.
1020  int grid_x, grid_y;
1021  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
1022  columns = best_columns[grid_y];
1023  }
1024  FindPartitionMargins(columns, part);
1025  }
1026 }
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:53

◆ MakeColPartSets()

bool tesseract::ColPartitionGrid::MakeColPartSets ( PartSetVector part_sets)

Definition at line 803 of file colpartitiongrid.cpp.

803  {
804  auto *part_lists = new ColPartition_LIST[gridheight()];
805  part_sets->reserve(gridheight());
806  // Iterate the ColPartitions in the grid to get parts onto lists for the
807  // y bottom of each.
808  ColPartitionGridSearch gsearch(this);
809  gsearch.StartFullSearch();
810  ColPartition *part;
811  bool any_parts_found = false;
812  while ((part = gsearch.NextFullSearch()) != nullptr) {
813  BlobRegionType blob_type = part->blob_type();
814  if (blob_type != BRT_NOISE &&
815  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
816  int grid_x, grid_y;
817  const TBOX &part_box = part->bounding_box();
818  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
819  ColPartition_IT part_it(&part_lists[grid_y]);
820  part_it.add_to_end(part);
821  any_parts_found = true;
822  }
823  }
824  if (any_parts_found) {
825  for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
826  ColPartitionSet *line_set = nullptr;
827  if (!part_lists[grid_y].empty()) {
828  line_set = new ColPartitionSet(&part_lists[grid_y]);
829  }
830  part_sets->push_back(line_set);
831  }
832  }
833  delete[] part_lists;
834  return any_parts_found;
835 }
int gridheight() const
Definition: bbgrid.h:69

◆ MakeSingleColumnSet()

ColPartitionSet * tesseract::ColPartitionGrid::MakeSingleColumnSet ( WidthCallback  cb)

Definition at line 841 of file colpartitiongrid.cpp.

841  {
842  ColPartition *single_column_part = nullptr;
843  // Iterate the ColPartitions in the grid to get parts onto lists for the
844  // y bottom of each.
845  ColPartitionGridSearch gsearch(this);
846  gsearch.StartFullSearch();
847  ColPartition *part;
848  while ((part = gsearch.NextFullSearch()) != nullptr) {
849  BlobRegionType blob_type = part->blob_type();
850  if (blob_type != BRT_NOISE &&
851  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
852  // Consider for single column.
853  BlobTextFlowType flow = part->flow();
854  if ((blob_type == BRT_TEXT &&
855  (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
856  flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
857  blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
858  if (single_column_part == nullptr) {
859  single_column_part = part->ShallowCopy();
860  single_column_part->set_blob_type(BRT_TEXT);
861  // Copy the tabs from itself to properly setup the margins.
862  single_column_part->CopyLeftTab(*single_column_part, false);
863  single_column_part->CopyRightTab(*single_column_part, false);
864  } else {
865  if (part->left_key() < single_column_part->left_key()) {
866  single_column_part->CopyLeftTab(*part, false);
867  }
868  if (part->right_key() > single_column_part->right_key()) {
869  single_column_part->CopyRightTab(*part, false);
870  }
871  }
872  }
873  }
874  }
875  if (single_column_part != nullptr) {
876  // Make a ColPartitionSet out of the single_column_part as a candidate
877  // for the single column case.
878  single_column_part->SetColumnGoodness(cb);
879  return new ColPartitionSet(single_column_part);
880  }
881  return nullptr;
882 }
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_RECTIMAGE
Definition: blobbox.h:78
BlobTextFlowType
Definition: blobbox.h:110
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116

◆ MergePart()

bool tesseract::ColPartitionGrid::MergePart ( const std::function< bool(ColPartition *, TBOX *)> &  box_cb,
const std::function< bool(const ColPartition *, const ColPartition *)> &  confirm_cb,
ColPartition part 
)

Definition at line 119 of file colpartitiongrid.cpp.

123  {
124  if (part->IsUnMergeableType()) {
125  return false;
126  }
127  bool any_done = false;
128  // Repeatedly merge part while we find a best merge candidate that works.
129  bool merge_done = false;
130  do {
131  merge_done = false;
132  TBOX box = part->bounding_box();
133  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
134  if (debug) {
135  tprintf("Merge candidate:");
136  box.print();
137  }
138  // Set up a rectangle search bounded by the part.
139  if (!box_cb(part, &box)) {
140  continue;
141  }
142  // Create a list of merge candidates.
143  ColPartition_CLIST merge_candidates;
144  FindMergeCandidates(part, box, debug, &merge_candidates);
145  // Find the best merge candidate based on minimal overlap increase.
146  int overlap_increase;
147  ColPartition *neighbour = BestMergeCandidate(part, &merge_candidates, debug,
148  confirm_cb, &overlap_increase);
149  if (neighbour != nullptr && overlap_increase <= 0) {
150  if (debug) {
151  tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
152  part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
153  overlap_increase);
154  }
155  // Looks like a good candidate so merge it.
156  RemoveBBox(neighbour);
157  // We will modify the box of part, so remove it from the grid, merge
158  // it and then re-insert it into the grid.
159  RemoveBBox(part);
160  part->Absorb(neighbour, nullptr);
161  InsertBBox(true, true, part);
162  merge_done = true;
163  any_done = true;
164  } else if (neighbour != nullptr) {
165  if (debug) {
166  tprintf("Overlapped when merged with increase %d: ", overlap_increase);
167  neighbour->bounding_box().print();
168  }
169  } else if (debug) {
170  tprintf("No candidate neighbour returned\n");
171  }
172  } while (merge_done);
173  return any_done;
174 }
ColPartition * BestMergeCandidate(const ColPartition *part, ColPartition_CLIST *candidates, bool debug, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, int *overlap_increase)

◆ Merges()

void tesseract::ColPartitionGrid::Merges ( const std::function< bool(ColPartition *, TBOX *)> &  box_cb,
const std::function< bool(const ColPartition *, const ColPartition *)> &  confirm_cb 
)

Definition at line 99 of file colpartitiongrid.cpp.

102  {
103  // Iterate the ColPartitions in the grid.
104  ColPartitionGridSearch gsearch(this);
105  gsearch.StartFullSearch();
106  ColPartition *part;
107  while ((part = gsearch.NextFullSearch()) != nullptr) {
108  if (MergePart(box_cb, confirm_cb, part)) {
109  gsearch.RepositionIterator();
110  }
111  }
112 }
bool MergePart(const std::function< bool(ColPartition *, TBOX *)> &box_cb, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, ColPartition *part)

◆ RecomputeBounds()

void tesseract::ColPartitionGrid::RecomputeBounds ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
const ICOORD vertical 
)

Definition at line 960 of file colpartitiongrid.cpp.

962  {
963  ColPartition_LIST saved_parts;
964  ColPartition_IT part_it(&saved_parts);
965  // Iterate the ColPartitions in the grid to get parts onto a list.
966  ColPartitionGridSearch gsearch(this);
967  gsearch.StartFullSearch();
968  ColPartition *part;
969  while ((part = gsearch.NextFullSearch()) != nullptr) {
970  part_it.add_to_end(part);
971  }
972  // Reinitialize grid to the new size.
974  // Recompute the bounds of the parts and put them back in the new grid.
975  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
976  part = part_it.extract();
977  part->set_vertical(vertical);
978  part->ComputeLimits();
979  InsertBBox(true, true, part);
980  }
981 }

◆ RefinePartitionPartners()

void tesseract::ColPartitionGrid::RefinePartitionPartners ( bool  get_desperate)

Definition at line 1332 of file colpartitiongrid.cpp.

1332  {
1333  ColPartitionGridSearch gsearch(this);
1334  // Refine in type order so that chasing multiple partners can be done
1335  // before eliminating type mis-matching partners.
1336  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
1337  // Iterate the ColPartitions in the grid.
1338  gsearch.StartFullSearch();
1339  ColPartition *part;
1340  while ((part = gsearch.NextFullSearch()) != nullptr) {
1341  part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate,
1342  this);
1343  // Iterator may have been messed up by a merge.
1344  gsearch.RepositionIterator();
1345  }
1346  }
1347 }

◆ ReflectInYAxis()

void tesseract::ColPartitionGrid::ReflectInYAxis ( )

Definition at line 660 of file colpartitiongrid.cpp.

660  {
661  ColPartition_LIST parts;
662  ColPartition_IT part_it(&parts);
663  // Iterate the ColPartitions in the grid to extract them.
664  ColPartitionGridSearch gsearch(this);
665  gsearch.StartFullSearch();
666  ColPartition *part;
667  while ((part = gsearch.NextFullSearch()) != nullptr) {
668  part_it.add_after_then_move(part);
669  }
670  ICOORD bot_left(-tright().x(), bleft().y());
671  ICOORD top_right(-bleft().x(), tright().y());
672  // Reinitializing the grid with reflected coords also clears all the
673  // pointers, so parts will now own the ColPartitions. (Briefly).
674  Init(gridsize(), bot_left, top_right);
675  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
676  part = part_it.extract();
677  part->ReflectInYAxis();
678  InsertBBox(true, true, part);
679  }
680 }

◆ ReTypeBlobs()

void tesseract::ColPartitionGrid::ReTypeBlobs ( BLOBNBOX_LIST *  im_blobs)

Definition at line 898 of file colpartitiongrid.cpp.

898  {
899  BLOBNBOX_IT im_blob_it(im_blobs);
900  ColPartition_LIST dead_parts;
901  ColPartition_IT dead_part_it(&dead_parts);
902  // Iterate the ColPartitions in the grid.
903  ColPartitionGridSearch gsearch(this);
904  gsearch.StartFullSearch();
905  ColPartition *part;
906  while ((part = gsearch.NextFullSearch()) != nullptr) {
907  BlobRegionType blob_type = part->blob_type();
908  BlobTextFlowType flow = part->flow();
909  bool any_blobs_moved = false;
910  if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
911  BLOBNBOX_C_IT blob_it(part->boxes());
912  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
913  BLOBNBOX *blob = blob_it.data();
914  im_blob_it.add_after_then_move(blob);
915  }
916  } else if (blob_type != BRT_NOISE) {
917  // Make sure the blobs are marked with the correct type and flow.
918  BLOBNBOX_C_IT blob_it(part->boxes());
919  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
920  BLOBNBOX *blob = blob_it.data();
921  if (blob->region_type() == BRT_NOISE) {
922  // TODO(rays) Deprecated. Change this section to an assert to verify
923  // and then delete.
924  ASSERT_HOST(blob->cblob()->area() != 0);
925  blob->set_owner(nullptr);
926  blob_it.extract();
927  any_blobs_moved = true;
928  } else {
929  blob->set_region_type(blob_type);
930  if (blob->flow() != BTFT_LEADER) {
931  blob->set_flow(flow);
932  }
933  }
934  }
935  }
936  if (blob_type == BRT_NOISE || part->boxes()->empty()) {
937  BLOBNBOX_C_IT blob_it(part->boxes());
938  part->DisownBoxes();
939  dead_part_it.add_to_end(part);
940  gsearch.RemoveBBox();
941  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
942  BLOBNBOX *blob = blob_it.data();
943  if (blob->cblob()->area() == 0) {
944  // Any blob with zero area is a fake image blob and should be deleted.
945  delete blob->cblob();
946  delete blob;
947  }
948  }
949  } else if (any_blobs_moved) {
950  gsearch.RemoveBBox();
951  part->ComputeLimits();
952  InsertBBox(true, true, part);
953  gsearch.RepositionIterator();
954  }
955  }
956 }
#define ASSERT_HOST(x)
Definition: errcode.h:59

◆ SetTabStops()

void tesseract::ColPartitionGrid::SetTabStops ( TabFind tabgrid)

Definition at line 775 of file colpartitiongrid.cpp.

775  {
776  // Iterate the ColPartitions in the grid.
777  ColPartitionGridSearch gsearch(this);
778  gsearch.StartFullSearch();
779  ColPartition *part;
780  while ((part = gsearch.NextFullSearch()) != nullptr) {
781  const TBOX &part_box = part->bounding_box();
782  TabVector *left_line = tabgrid->LeftTabForBox(part_box, true, false);
783  // If the overlapping line is not a left tab, try for non-overlapping.
784  if (left_line != nullptr && !left_line->IsLeftTab()) {
785  left_line = tabgrid->LeftTabForBox(part_box, false, false);
786  }
787  if (left_line != nullptr && left_line->IsLeftTab()) {
788  part->SetLeftTab(left_line);
789  }
790  TabVector *right_line = tabgrid->RightTabForBox(part_box, true, false);
791  if (right_line != nullptr && !right_line->IsRightTab()) {
792  right_line = tabgrid->RightTabForBox(part_box, false, false);
793  }
794  if (right_line != nullptr && right_line->IsRightTab()) {
795  part->SetRightTab(right_line);
796  }
797  part->SetColumnGoodness(tabgrid->WidthCB());
798  }
799 }

◆ SplitOverlappingPartitions()

void tesseract::ColPartitionGrid::SplitOverlappingPartitions ( ColPartition_LIST *  big_parts)

Definition at line 523 of file colpartitiongrid.cpp.

524  {
525  int ok_overlap =
526  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
527  // Iterate the ColPartitions in the grid.
528  ColPartitionGridSearch gsearch(this);
529  gsearch.StartFullSearch();
530  ColPartition *part;
531  while ((part = gsearch.NextFullSearch()) != nullptr) {
532  // Set up a rectangle search bounded by the part.
533  const TBOX &box = part->bounding_box();
534  ColPartitionGridSearch rsearch(this);
535  rsearch.SetUniqueMode(true);
536  rsearch.StartRectSearch(box);
537  int unresolved_overlaps = 0;
538 
539  ColPartition *neighbour;
540  while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
541  if (neighbour == part) {
542  continue;
543  }
544  const TBOX &neighbour_box = neighbour->bounding_box();
545  if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
546  part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) {
547  continue; // The overlap is OK both ways.
548  }
549 
550  // If removal of the biggest box from either partition eliminates the
551  // overlap, and it is much bigger than the box left behind, then
552  // it is either a drop-cap, an inter-line join, or some junk that
553  // we don't want anyway, so put it in the big_parts list.
554  if (!part->IsSingleton()) {
555  BLOBNBOX *excluded = part->BiggestBox();
556  TBOX shrunken = part->BoundsWithoutBox(excluded);
557  if (!shrunken.overlap(neighbour_box) &&
558  excluded->bounding_box().height() >
559  kBigPartSizeRatio * shrunken.height()) {
560  // Removing the biggest box fixes the overlap, so do it!
561  gsearch.RemoveBBox();
562  RemoveBadBox(excluded, part, big_parts);
563  InsertBBox(true, true, part);
564  gsearch.RepositionIterator();
565  break;
566  }
567  } else if (box.contains(neighbour_box)) {
568  ++unresolved_overlaps;
569  continue; // No amount of splitting will fix it.
570  }
571  if (!neighbour->IsSingleton()) {
572  BLOBNBOX *excluded = neighbour->BiggestBox();
573  TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
574  if (!shrunken.overlap(box) &&
575  excluded->bounding_box().height() >
576  kBigPartSizeRatio * shrunken.height()) {
577  // Removing the biggest box fixes the overlap, so do it!
578  rsearch.RemoveBBox();
579  RemoveBadBox(excluded, neighbour, big_parts);
580  InsertBBox(true, true, neighbour);
581  gsearch.RepositionIterator();
582  break;
583  }
584  }
585  int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
586  int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
587  ColPartition *right_part = nullptr;
588  if (neighbour_overlap_count <= part_overlap_count ||
589  part->IsSingleton()) {
590  // Try to split the neighbour to reduce overlap.
591  BLOBNBOX *split_blob = neighbour->OverlapSplitBlob(box);
592  if (split_blob != nullptr) {
593  rsearch.RemoveBBox();
594  right_part = neighbour->SplitAtBlob(split_blob);
595  InsertBBox(true, true, neighbour);
596  ASSERT_HOST(right_part != nullptr);
597  }
598  } else {
599  // Try to split part to reduce overlap.
600  BLOBNBOX *split_blob = part->OverlapSplitBlob(neighbour_box);
601  if (split_blob != nullptr) {
602  gsearch.RemoveBBox();
603  right_part = part->SplitAtBlob(split_blob);
604  InsertBBox(true, true, part);
605  ASSERT_HOST(right_part != nullptr);
606  }
607  }
608  if (right_part != nullptr) {
609  InsertBBox(true, true, right_part);
610  gsearch.RepositionIterator();
611  rsearch.RepositionIterator();
612  break;
613  }
614  }
615  if (unresolved_overlaps > 2 && part->IsSingleton()) {
616  // This part is no good so just add to big_parts.
617  RemoveBBox(part);
618  ColPartition_IT big_it(big_parts);
619  part->set_block_owned(true);
620  big_it.add_to_end(part);
621  gsearch.RepositionIterator();
622  }
623  }
624 }
const double kBigPartSizeRatio

The documentation for this class was generated from the following files: