tesseract  5.0.0
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
tesseract::ELIST2_LINK tesseract::TestableColPartition

Public Member Functions

 ColPartition ()=default
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_height () const
 
void set_median_height (int height)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uint8_t * color1 ()
 
uint8_t * color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, const WidthCallback &cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (const WidthCallback &cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from tesseract::ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)=delete
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.

Constructor & Destructor Documentation

◆ ColPartition() [1/2]

tesseract::ColPartition::ColPartition ( )
default

◆ ColPartition() [2/2]

tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 91 of file colpartition.cpp.

92  : left_margin_(-INT32_MAX),
93  right_margin_(INT32_MAX),
94  median_bottom_(INT32_MAX),
95  median_top_(-INT32_MAX),
96  median_left_(INT32_MAX),
97  median_right_(-INT32_MAX),
98  blob_type_(blob_type),
99  vertical_(vertical) {
100  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
101 }
BlobRegionType blob_type() const
Definition: colpartition.h:147

◆ ~ColPartition()

tesseract::ColPartition::~ColPartition ( )

Definition at line 146 of file colpartition.cpp.

146  {
147  // Remove this as a partner of all partners, as we don't want them
148  // referring to a deleted object.
149  ColPartition_C_IT it(&upper_partners_);
150  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
151  it.data()->RemovePartner(false, this);
152  }
153  it.set_to_list(&lower_partners_);
154  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
155  it.data()->RemovePartner(true, this);
156  }
157 }

Member Function Documentation

◆ Absorb()

void tesseract::ColPartition::Absorb ( ColPartition other,
const WidthCallback cb 
)

Definition at line 674 of file colpartition.cpp.

674  {
675  // The result has to either own all of the blobs or none of them.
676  // Verify the flag is consistent.
677  ASSERT_HOST(owns_blobs() == other->owns_blobs());
678  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
679  // should always be true when this is called. So there is no issues.
680  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
681  bounding_box_.bottom()) ||
682  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
683  other->bounding_box_.bottom())) {
684  tprintf("Merging:");
685  Print();
686  other->Print();
687  }
688 
689  // Update the special_blobs_densities_.
690  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
691  for (int type = 0; type < BSTT_COUNT; ++type) {
692  unsigned w1 = boxes_.length();
693  unsigned w2 = other->boxes_.length();
694  float new_val = special_blobs_densities_[type] * w1 +
695  other->special_blobs_densities_[type] * w2;
696  if (!w1 || !w2) {
697  ASSERT_HOST((w1 + w2) > 0);
698  special_blobs_densities_[type] = new_val / (w1 + w2);
699  }
700  }
701 
702  // Merge the two sorted lists.
703  BLOBNBOX_C_IT it(&boxes_);
704  BLOBNBOX_C_IT it2(&other->boxes_);
705  for (; !it2.empty(); it2.forward()) {
706  BLOBNBOX *bbox2 = it2.extract();
707  ColPartition *prev_owner = bbox2->owner();
708  if (prev_owner != other && prev_owner != nullptr) {
709  // A blob on other's list is owned by someone else; let them have it.
710  continue;
711  }
712  ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
713  if (prev_owner == other) {
714  bbox2->set_owner(this);
715  }
716  it.add_to_end(bbox2);
717  }
718  left_margin_ = std::min(left_margin_, other->left_margin_);
719  right_margin_ = std::max(right_margin_, other->right_margin_);
720  if (other->left_key_ < left_key_) {
721  left_key_ = other->left_key_;
722  left_key_tab_ = other->left_key_tab_;
723  }
724  if (other->right_key_ > right_key_) {
725  right_key_ = other->right_key_;
726  right_key_tab_ = other->right_key_tab_;
727  }
728  // Combine the flow and blob_type in a sensible way.
729  // Dominant flows stay.
730  if (!DominatesInMerge(flow_, other->flow_)) {
731  flow_ = other->flow_;
732  blob_type_ = other->blob_type_;
733  }
734  SetBlobTypes();
735  if (IsVerticalType()) {
736  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
737  last_add_was_vertical_ = true;
738  } else {
739  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
740  last_add_was_vertical_ = false;
741  }
742  ComputeLimits();
743  // Fix partner lists. other is going away, so remove it as a
744  // partner of all its partners and add this in its place.
745  for (int upper = 0; upper < 2; ++upper) {
746  ColPartition_CLIST partners;
747  ColPartition_C_IT part_it(&partners);
748  part_it.add_list_after(upper ? &other->upper_partners_
749  : &other->lower_partners_);
750  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
751  ColPartition *partner = part_it.extract();
752  partner->RemovePartner(!upper, other);
753  partner->RemovePartner(!upper, this);
754  partner->AddPartner(!upper, this);
755  }
756  }
757  delete other;
758  if (cb != nullptr) {
759  SetColumnGoodness(cb);
760  }
761 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:125
@ BSTT_COUNT
Definition: blobbox.h:99
TDimension left() const
Definition: rect.h:82
TDimension bottom() const
Definition: rect.h:75
static bool WithinTestRegion(int detail_level, int x, int y)
PolyBlockType type() const
Definition: colpartition.h:180
void SetColumnGoodness(const WidthCallback &cb)
bool IsVerticalType() const
Definition: colpartition.h:441

◆ AddBox()

void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 180 of file colpartition.cpp.

180  {
181  TBOX box = bbox->bounding_box();
182  // Update the partition limits.
183  if (boxes_.empty()) {
184  bounding_box_ = box;
185  } else {
186  bounding_box_ += box;
187  }
188 
189  if (IsVerticalType()) {
190  if (!last_add_was_vertical_) {
191  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
192  last_add_was_vertical_ = true;
193  }
194  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
195  } else {
196  if (last_add_was_vertical_) {
197  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
198  last_add_was_vertical_ = false;
199  }
200  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
201  }
202  if (!left_key_tab_) {
203  left_key_ = BoxLeftKey();
204  }
205  if (!right_key_tab_) {
206  right_key_ = BoxRightKey();
207  }
208  if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) {
209  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
210  box.left(), box.bottom(), box.right(), box.top(),
211  bounding_box_.left(), bounding_box_.right());
212  }
213 }
@ TBOX
TDimension right() const
Definition: rect.h:89

◆ AddPartner()

void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 638 of file colpartition.cpp.

638  {
639  if (upper) {
640  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
641  this);
642  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
643  } else {
644  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
645  this);
646  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
647  }
648 }

◆ AddToWorkingSet()

void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1411 of file colpartition.cpp.

1414  {
1415  if (block_owned_) {
1416  return; // Done it already.
1417  }
1418  block_owned_ = true;
1419  WorkingPartSet_IT it(working_sets);
1420  // If there is an upper partner use its working_set_ directly.
1421  ColPartition *partner = SingletonPartner(true);
1422  if (partner != nullptr && partner->working_set_ != nullptr) {
1423  working_set_ = partner->working_set_;
1424  working_set_->AddPartition(this);
1425  return;
1426  }
1427  if (partner != nullptr && textord_debug_bugs) {
1428  tprintf("Partition with partner has no working set!:");
1429  Print();
1430  partner->Print();
1431  }
1432  // Search for the column that the left edge fits in.
1433  WorkingPartSet *work_set = nullptr;
1434  it.move_to_first();
1435  int col_index = 0;
1436  for (it.mark_cycle_pt(); !it.cycled_list() && col_index != first_column_;
1437  it.forward(), ++col_index) {
1438  ;
1439  }
1440  if (textord_debug_tabfind >= 2) {
1441  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1442  Print();
1443  }
1444  if (it.cycled_list() && textord_debug_bugs) {
1445  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1446  }
1447  ASSERT_HOST(!it.cycled_list());
1448  work_set = it.data();
1449  // If last_column_ != first_column, then we need to scoop up all blocks
1450  // between here and the last_column_ and put back in work_set.
1451  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1452  // Find the column that the right edge falls in.
1453  BLOCK_LIST completed_blocks;
1454  TO_BLOCK_LIST to_blocks;
1455  for (; !it.cycled_list() && col_index <= last_column_;
1456  it.forward(), ++col_index) {
1457  WorkingPartSet *end_set = it.data();
1458  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1459  &completed_blocks, &to_blocks);
1460  }
1461  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1462  }
1463  working_set_ = work_set;
1464  work_set->AddPartition(this);
1465 }
int textord_debug_tabfind
Definition: alignedblob.cpp:29
int textord_debug_bugs
Definition: alignedblob.cpp:30
bool IsPulloutType() const
Definition: colpartition.h:437
ColPartition * SingletonPartner(bool upper)
void AddPartition(ColPartition *part)

◆ BiggestBox()

BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 229 of file colpartition.cpp.

229  {
230  BLOBNBOX *biggest = nullptr;
231  BLOBNBOX_C_IT bb_it(&boxes_);
232  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
233  BLOBNBOX *bbox = bb_it.data();
234  if (IsVerticalType()) {
235  if (biggest == nullptr ||
236  bbox->bounding_box().width() > biggest->bounding_box().width()) {
237  biggest = bbox;
238  }
239  } else {
240  if (biggest == nullptr ||
241  bbox->bounding_box().height() > biggest->bounding_box().height()) {
242  biggest = bbox;
243  }
244  }
245  }
246  return biggest;
247 }

◆ blob_type()

BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 147 of file colpartition.h.

147  {
148  return blob_type_;
149  }

◆ block_owned()

bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 204 of file colpartition.h.

204  {
205  return block_owned_;
206  }

◆ bottom_spacing()

int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 219 of file colpartition.h.

219  {
220  return bottom_spacing_;
221  }

◆ bounding_box()

const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 108 of file colpartition.h.

108  {
109  return bounding_box_;
110  }

◆ BoundsWithoutBox()

TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 250 of file colpartition.cpp.

250  {
251  TBOX result;
252  BLOBNBOX_C_IT bb_it(&boxes_);
253  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
254  if (box != bb_it.data()) {
255  result += bb_it.data()->bounding_box();
256  }
257  }
258  return result;
259 }

◆ BoxColor()

ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1850 of file colpartition.cpp.

1850  {
1851  if (type_ == PT_UNKNOWN) {
1852  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1853  }
1854  return POLY_BLOCK::ColorForPolyBlockType(type_);
1855 }
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:442
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:389

◆ boxes()

BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 186 of file colpartition.h.

186  {
187  return &boxes_;
188  }

◆ boxes_count()

int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 189 of file colpartition.h.

189  {
190  return boxes_.length();
191  }

◆ BoxLeftKey()

int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 332 of file colpartition.h.

332  {
333  return SortKey(bounding_box_.left(), MidY());
334  }
int SortKey(int x, int y) const
Definition: colpartition.h:316

◆ BoxRightKey()

int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 336 of file colpartition.h.

336  {
337  return SortKey(bounding_box_.right(), MidY());
338  }

◆ ClaimBoxes()

void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 263 of file colpartition.cpp.

263  {
264  BLOBNBOX_C_IT bb_it(&boxes_);
265  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
266  BLOBNBOX *bblob = bb_it.data();
267  ColPartition *other = bblob->owner();
268  if (other == nullptr) {
269  // Normal case: ownership is available.
270  bblob->set_owner(this);
271  } else {
272  ASSERT_HOST(other == this);
273  }
274  }
275 }

◆ clear_table_type()

void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 238 of file colpartition.h.

238  {
239  if (type_ == PT_TABLE) {
240  type_ = type_before_table_;
241  }
242  }

◆ color1()

uint8_t* tesseract::ColPartition::color1 ( )
inline

Definition at line 285 of file colpartition.h.

285  {
286  return color1_;
287  }

◆ color2()

uint8_t* tesseract::ColPartition::color2 ( )
inline

Definition at line 288 of file colpartition.h.

288  {
289  return color2_;
290  }

◆ column_set()

ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 213 of file colpartition.h.

213  {
214  return column_set_;
215  }

◆ ColumnContains()

bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 353 of file colpartition.h.

353  {
354  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355  }
int LeftAtY(int y) const
Definition: colpartition.h:340
int RightAtY(int y) const
Definition: colpartition.h:344

◆ ColumnRange()

void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1107 of file colpartition.cpp.

1108  {
1109  int first_spanned_col = -1;
1110  ColumnSpanningType span_type = columns->SpanningType(
1111  resolution, bounding_box_.left(), bounding_box_.right(),
1112  std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
1113  left_margin_, right_margin_, first_col, last_col, &first_spanned_col);
1114  type_ = PartitionType(span_type);
1115 }
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
PolyBlockType PartitionType(ColumnSpanningType flow) const

◆ ColumnWidth()

int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 328 of file colpartition.h.

328  {
329  return KeyWidth(left_key_, right_key_);
330  }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324

◆ ComputeLimits()

void tesseract::ColPartition::ComputeLimits ( )

Definition at line 905 of file colpartition.cpp.

905  {
906  bounding_box_ = TBOX(); // Clear it
907  BLOBNBOX_C_IT it(&boxes_);
908  BLOBNBOX *bbox = nullptr;
909  int non_leader_count = 0;
910  if (it.empty()) {
911  bounding_box_.set_left(left_margin_);
912  bounding_box_.set_right(right_margin_);
913  bounding_box_.set_bottom(0);
914  bounding_box_.set_top(0);
915  } else {
916  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
917  bbox = it.data();
918  bounding_box_ += bbox->bounding_box();
919  if (bbox->flow() != BTFT_LEADER) {
920  ++non_leader_count;
921  }
922  }
923  }
924  if (!left_key_tab_) {
925  left_key_ = BoxLeftKey();
926  }
927  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
928  // TODO(rays) investigate the causes of these error messages, to find
929  // out if they are genuinely harmful, or just indicative of junk input.
930  tprintf("Computed left-illegal partition\n");
931  Print();
932  }
933  if (!right_key_tab_) {
934  right_key_ = BoxRightKey();
935  }
936  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
937  tprintf("Computed right-illegal partition\n");
938  Print();
939  }
940  if (it.empty()) {
941  return;
942  }
943  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
944  blob_type() == BRT_POLYIMAGE) {
945  median_top_ = bounding_box_.top();
946  median_bottom_ = bounding_box_.bottom();
947  median_height_ = bounding_box_.height();
948  median_left_ = bounding_box_.left();
949  median_right_ = bounding_box_.right();
950  median_width_ = bounding_box_.width();
951  } else {
952  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
953  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
954  STATS height_stats(0, bounding_box_.height() + 1);
955  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
956  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
957  STATS width_stats(0, bounding_box_.width() + 1);
958  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
959  bbox = it.data();
960  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
961  const TBOX &box = bbox->bounding_box();
962  int area = box.area();
963  top_stats.add(box.top(), area);
964  bottom_stats.add(box.bottom(), area);
965  height_stats.add(box.height(), area);
966  left_stats.add(box.left(), area);
967  right_stats.add(box.right(), area);
968  width_stats.add(box.width(), area);
969  }
970  }
971  median_top_ = static_cast<int>(top_stats.median() + 0.5);
972  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
973  median_height_ = static_cast<int>(height_stats.median() + 0.5);
974  median_left_ = static_cast<int>(left_stats.median() + 0.5);
975  median_right_ = static_cast<int>(right_stats.median() + 0.5);
976  median_width_ = static_cast<int>(width_stats.median() + 0.5);
977  }
978 
979  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
980  tprintf("Made partition with bad right coords, %d < %d\n", right_margin_,
981  bounding_box_.right());
982  Print();
983  }
984  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
985  tprintf("Made partition with bad left coords, %d > %d\n", left_margin_,
986  bounding_box_.left());
987  Print();
988  }
989  // Fix partner lists. The bounding box has changed and partners are stored
990  // in bounding box order, so remove and reinsert this as a partner
991  // of all its partners.
992  for (int upper = 0; upper < 2; ++upper) {
993  ColPartition_CLIST partners;
994  ColPartition_C_IT part_it(&partners);
995  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
996  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
997  ColPartition *partner = part_it.extract();
998  partner->RemovePartner(!upper, this);
999  partner->AddPartner(!upper, this);
1000  }
1001  }
1002  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1003  bounding_box_.bottom())) {
1004  tprintf("Recomputed box for partition %p\n", this);
1005  Print();
1006  }
1007 }
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_RECTIMAGE
Definition: blobbox.h:78
@ BTFT_LEADER
Definition: blobbox.h:117
void set_right(int x)
Definition: rect.h:92
void set_left(int x)
Definition: rect.h:85
TDimension top() const
Definition: rect.h:68
void set_bottom(int y)
Definition: rect.h:78
void set_top(int y)
Definition: rect.h:71
bool IsImageType() const
Definition: colpartition.h:429

◆ ComputeSpecialBlobsDensity()

void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 617 of file colpartition.cpp.

617  {
618  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
619  if (boxes_.empty()) {
620  return;
621  }
622 
623  BLOBNBOX_C_IT blob_it(&boxes_);
624  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
625  BLOBNBOX *blob = blob_it.data();
626  BlobSpecialTextType type = blob->special_text_type();
627  special_blobs_densities_[type]++;
628  }
629 
630  for (float &special_blobs_density : special_blobs_densities_) {
631  special_blobs_density /= boxes_.length();
632  }
633 }
BlobSpecialTextType
Definition: blobbox.h:92

◆ ConfirmNoTabViolation()

bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 437 of file colpartition.cpp.

437  {
438  if (bounding_box_.right() < other.bounding_box_.left() &&
439  bounding_box_.right() < other.LeftBlobRule()) {
440  return false;
441  }
442  if (other.bounding_box_.right() < bounding_box_.left() &&
443  other.bounding_box_.right() < LeftBlobRule()) {
444  return false;
445  }
446  if (bounding_box_.left() > other.bounding_box_.right() &&
447  bounding_box_.left() > other.RightBlobRule()) {
448  return false;
449  }
450  if (other.bounding_box_.left() > bounding_box_.right() &&
451  other.bounding_box_.left() > RightBlobRule()) {
452  return false;
453  }
454  return true;
455 }

◆ CopyButDontOwnBlobs()

ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1835 of file colpartition.cpp.

1835  {
1836  ColPartition *copy = ShallowCopy();
1837  copy->set_owns_blobs(false);
1838  BLOBNBOX_C_IT inserter(copy->boxes());
1839  BLOBNBOX_C_IT traverser(boxes());
1840  for (traverser.mark_cycle_pt(); !traverser.cycled_list();
1841  traverser.forward()) {
1842  inserter.add_after_then_move(traverser.data());
1843  }
1844  return copy;
1845 }
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:186
ColPartition * ShallowCopy() const

◆ CopyLeftTab()

void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 552 of file colpartition.cpp.

552  {
553  left_key_tab_ = take_box ? false : src.left_key_tab_;
554  if (left_key_tab_) {
555  left_key_ = src.left_key_;
556  } else {
557  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
558  left_key_ = BoxLeftKey();
559  }
560  if (left_margin_ > bounding_box_.left()) {
561  left_margin_ = src.left_margin_;
562  }
563 }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320

◆ CopyRightTab()

void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 566 of file colpartition.cpp.

566  {
567  right_key_tab_ = take_box ? false : src.right_key_tab_;
568  if (right_key_tab_) {
569  right_key_ = src.right_key_;
570  } else {
571  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
572  right_key_ = BoxRightKey();
573  }
574  if (right_margin_ < bounding_box_.right()) {
575  right_margin_ = src.right_margin_;
576  }
577 }

◆ CountOverlappingBoxes()

int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 1010 of file colpartition.cpp.

1010  {
1011  BLOBNBOX_C_IT it(&boxes_);
1012  int overlap_count = 0;
1013  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1014  BLOBNBOX *bbox = it.data();
1015  if (box.overlap(bbox->bounding_box())) {
1016  ++overlap_count;
1017  }
1018  }
1019  return overlap_count;
1020 }

◆ DeleteBoxes()

void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 326 of file colpartition.cpp.

326  {
327  // Although the boxes_ list is a C_LIST, in some cases it owns the
328  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
329  // and the BLOBNBOXes own the underlying C_BLOBs.
330  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
331  BLOBNBOX *bblob = bb_it.extract();
332  // TODO: remove next line, currently still needed for resultiterator_test.
333  delete bblob->remove_cblob();
334  delete bblob;
335  }
336 }

◆ desperately_merged()

bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 210 of file colpartition.h.

210  {
211  return desperately_merged_;
212  }

◆ DisownBoxes()

void tesseract::ColPartition::DisownBoxes ( )

Definition at line 279 of file colpartition.cpp.

279  {
280  BLOBNBOX_C_IT bb_it(&boxes_);
281  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
282  BLOBNBOX *bblob = bb_it.data();
283  ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
284  bblob->set_owner(nullptr);
285  }
286 }

◆ DisownBoxesNoAssert()

void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 292 of file colpartition.cpp.

292  {
293  BLOBNBOX_C_IT bb_it(&boxes_);
294  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
295  BLOBNBOX *bblob = bb_it.data();
296  if (bblob->owner() == this) {
297  bblob->set_owner(nullptr);
298  }
299  }
300 }

◆ FakePartition()

ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 108 of file colpartition.cpp.

111  {
112  auto *part = new ColPartition(blob_type, ICOORD(0, 1));
113  part->set_type(block_type);
114  part->set_flow(flow);
115  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
116  part->set_left_margin(box.left());
117  part->set_right_margin(box.right());
118  part->SetBlobTypes();
119  part->ComputeLimits();
120  part->ClaimBoxes();
121  return part;
122 }
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
BlobTextFlowType flow() const
Definition: colpartition.h:153

◆ flow()

BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 153 of file colpartition.h.

153  {
154  return flow_;
155  }

◆ good_blob_score()

int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 159 of file colpartition.h.

159  {
160  return good_blob_score_;
161  }

◆ good_column()

bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 165 of file colpartition.h.

165  {
166  return good_column_;
167  }

◆ good_width()

bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 162 of file colpartition.h.

162  {
163  return good_width_;
164  }

◆ HasGoodBaseline()

bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1343 of file colpartition.cpp.

1343  {
1344  // Approximation of the baseline.
1345  DetLineFit linepoints;
1346  // Calculation of the mean height on this line segment. Note that these
1347  // variable names apply to the context of a horizontal line, and work
1348  // analogously, rather than literally in the case of a vertical line.
1349  int total_height = 0;
1350  int coverage = 0;
1351  int height_count = 0;
1352  int width = 0;
1353  BLOBNBOX_C_IT it(&boxes_);
1354  TBOX box(it.data()->bounding_box());
1355  // Accumulate points representing the baseline at the middle of each blob,
1356  // but add an additional point for each end of the line. This makes it
1357  // harder to fit a severe skew angle, as it is most likely not right.
1358  if (IsVerticalType()) {
1359  // For a vertical line, use the right side as the baseline.
1360  ICOORD first_pt(box.right(), box.bottom());
1361  // Use the bottom-right of the first (bottom) box, the top-right of the
1362  // last, and the middle-right of all others.
1363  linepoints.Add(first_pt);
1364  for (it.forward(); !it.at_last(); it.forward()) {
1365  BLOBNBOX *blob = it.data();
1366  box = blob->bounding_box();
1367  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1368  linepoints.Add(box_pt);
1369  total_height += box.width();
1370  coverage += box.height();
1371  ++height_count;
1372  }
1373  box = it.data()->bounding_box();
1374  ICOORD last_pt(box.right(), box.top());
1375  linepoints.Add(last_pt);
1376  width = last_pt.y() - first_pt.y();
1377 
1378  } else {
1379  // Horizontal lines use the bottom as the baseline.
1380  TBOX box(it.data()->bounding_box());
1381  // Use the bottom-left of the first box, the the bottom-right of the last,
1382  // and the middle of all others.
1383  ICOORD first_pt(box.left(), box.bottom());
1384  linepoints.Add(first_pt);
1385  for (it.forward(); !it.at_last(); it.forward()) {
1386  BLOBNBOX *blob = it.data();
1387  box = blob->bounding_box();
1388  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1389  linepoints.Add(box_pt);
1390  total_height += box.height();
1391  coverage += box.width();
1392  ++height_count;
1393  }
1394  box = it.data()->bounding_box();
1395  ICOORD last_pt(box.right(), box.bottom());
1396  linepoints.Add(last_pt);
1397  width = last_pt.x() - first_pt.x();
1398  }
1399  // Maximum median error allowed to be a good text line.
1400  if (height_count == 0) {
1401  return false;
1402  }
1403  double max_error = kMaxBaselineError * total_height / height_count;
1404  ICOORD start_pt, end_pt;
1405  double error = linepoints.Fit(&start_pt, &end_pt);
1406  return error < max_error && coverage >= kMinBaselineCoverage * width;
1407 }
const double kMinBaselineCoverage
const double kMaxBaselineError

◆ HCoreOverlap()

int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 384 of file colpartition.h.

384  {
385  return std::min(median_right_, other.median_right_) -
386  std::max(median_left_, other.median_left_);
387  }

◆ HOverlaps()

bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 365 of file colpartition.h.

365  {
366  return bounding_box_.x_overlap(other.bounding_box_);
367  }
bool x_overlap(const TBOX &box) const
Definition: rect.h:409

◆ inside_table_column()

bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 243 of file colpartition.h.

243  {
244  return inside_table_column_;
245  }

◆ IsEmpty()

bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 357 of file colpartition.h.

357  {
358  return boxes_.empty();
359  }

◆ IsHorizontalLine()

bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 459 of file colpartition.h.

459  {
460  return IsHorizontalType() && IsLineType();
461  }
bool IsHorizontalType() const
Definition: colpartition.h:445

◆ IsHorizontalType()

bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 445 of file colpartition.h.

445  {
446  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
447  }
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_HLINE
Definition: blobbox.h:76

◆ IsImageType()

bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 429 of file colpartition.h.

429  {
430  return PTIsImageType(type_);
431  }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:77

◆ IsInSameColumnAs()

bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2270 of file colpartition.cpp.

2270  {
2271  // Overlap does not occur when last < part.first or first > part.last.
2272  // In other words, one is completely to the side of the other.
2273  // This is just DeMorgan's law applied to that so the function returns true.
2274  return (last_column_ >= part.first_column_) &&
2275  (first_column_ <= part.last_column_);
2276 }

◆ IsLeftOf()

bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 349 of file colpartition.h.

349  {
350  return bounding_box_.right() < other.bounding_box_.right();
351  }

◆ IsLegal()

bool tesseract::ColPartition::IsLegal ( )

Definition at line 364 of file colpartition.cpp.

364  {
365  if (bounding_box_.left() > bounding_box_.right()) {
366  if (textord_debug_bugs) {
367  tprintf("Bounding box invalid\n");
368  Print();
369  }
370  return false; // Bounding box invalid.
371  }
372  if (left_margin_ > bounding_box_.left() ||
373  right_margin_ < bounding_box_.right()) {
374  if (textord_debug_bugs) {
375  tprintf("Margins invalid\n");
376  Print();
377  }
378  return false; // Margins invalid.
379  }
380  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
381  if (textord_debug_bugs) {
382  tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(),
383  right_key_, BoxRightKey());
384  Print();
385  }
386  return false; // Keys inside the box.
387  }
388  return true;
389 }

◆ IsLineType()

bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 425 of file colpartition.h.

425  {
426  return PTIsLineType(type_);
427  }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:73

◆ IsPulloutType()

bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 437 of file colpartition.h.

437  {
438  return PTIsPulloutType(type_);
439  }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:89

◆ IsSingleton()

bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 361 of file colpartition.h.

361  {
362  return boxes_.singleton();
363  }

◆ IsTextType()

bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 433 of file colpartition.h.

433  {
434  return PTIsTextType(type_);
435  }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82

◆ IsUnMergeableType()

bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 449 of file colpartition.h.

449  {
450  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
451  }
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:447

◆ IsVerticalLine()

bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 454 of file colpartition.h.

454  {
455  return IsVerticalType() && IsLineType();
456  }

◆ IsVerticalType()

bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 441 of file colpartition.h.

441  {
442  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
443  }
@ BRT_VLINE
Definition: blobbox.h:77
@ BRT_VERT_TEXT
Definition: blobbox.h:81

◆ KeyWidth()

int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 324 of file colpartition.h.

324  {
325  return (right_key - left_key) / vertical_.y();
326  }
TDimension y() const
access_function
Definition: points.h:62

◆ left_key()

int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 171 of file colpartition.h.

171  {
172  return left_key_;
173  }

◆ left_key_tab()

bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 168 of file colpartition.h.

168  {
169  return left_key_tab_;
170  }

◆ left_margin()

int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 111 of file colpartition.h.

111  {
112  return left_margin_;
113  }

◆ LeftAtY()

int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 340 of file colpartition.h.

340  {
341  return XAtY(left_key_, y);
342  }

◆ LeftBlobRule()

int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 580 of file colpartition.cpp.

580  {
581  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
582  return it.data()->left_rule();
583 }

◆ LineSpacingBlocks()

void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1473 of file colpartition.cpp.

1478  {
1479  int page_height = tright.y() - bleft.y();
1480  // Compute the initial spacing stats.
1481  ColPartition_IT it(block_parts);
1482  int part_count = 0;
1483  int max_line_height = 0;
1484 
1485  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1486  // because their line spacing with their neighbors maybe smaller and their
1487  // height may be slightly larger.
1488 
1489  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1490  ColPartition *part = it.data();
1491  ASSERT_HOST(!part->boxes()->empty());
1492  STATS side_steps(0, part->bounding_box().height());
1493  if (part->bounding_box().height() > max_line_height) {
1494  max_line_height = part->bounding_box().height();
1495  }
1496  BLOBNBOX_C_IT blob_it(part->boxes());
1497  int prev_bottom = blob_it.data()->bounding_box().bottom();
1498  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1499  BLOBNBOX *blob = blob_it.data();
1500  int bottom = blob->bounding_box().bottom();
1501  int step = bottom - prev_bottom;
1502  if (step < 0) {
1503  step = -step;
1504  }
1505  side_steps.add(step, 1);
1506  prev_bottom = bottom;
1507  }
1508  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1509  if (!it.at_last()) {
1510  ColPartition *next_part = it.data_relative(1);
1511  part->set_bottom_spacing(part->median_bottom() -
1512  next_part->median_bottom());
1513  part->set_top_spacing(part->median_top() - next_part->median_top());
1514  } else {
1515  part->set_bottom_spacing(page_height);
1516  part->set_top_spacing(page_height);
1517  }
1518  if (textord_debug_tabfind) {
1519  part->Print();
1520  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1521  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1522  }
1523  ++part_count;
1524  }
1525  if (part_count == 0) {
1526  return;
1527  }
1528 
1529  SmoothSpacings(resolution, page_height, block_parts);
1530 
1531  // Move the partitions into individual block lists and make the blocks.
1532  BLOCK_IT block_it(completed_blocks);
1533  TO_BLOCK_IT to_block_it(to_blocks);
1534  ColPartition_LIST spacing_parts;
1535  ColPartition_IT sp_block_it(&spacing_parts);
1536  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1537  for (it.mark_cycle_pt(); !it.empty();) {
1538  ColPartition *part = it.extract();
1539  sp_block_it.add_to_end(part);
1540  it.forward();
1541  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1542  !part->SpacingsEqual(*it.data(), resolution)) {
1543  // There is a spacing boundary. Check to see if it.data() belongs
1544  // better in the current block or the next one.
1545  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1546  ColPartition *next_part = it.data();
1547  // If there is a size match one-way, then the middle line goes with
1548  // its matched size, otherwise it goes with the smallest spacing.
1549  ColPartition *third_part = it.at_last() ? nullptr : it.data_relative(1);
1550  if (textord_debug_tabfind) {
1551  tprintf(
1552  "Spacings unequal: upper:%d/%d, lower:%d/%d,"
1553  " sizes %d %d %d\n",
1554  part->top_spacing(), part->bottom_spacing(),
1555  next_part->top_spacing(), next_part->bottom_spacing(),
1556  part->median_height(), next_part->median_height(),
1557  third_part != nullptr ? third_part->median_height() : 0);
1558  }
1559  // We can only consider adding the next line to the block if the sizes
1560  // match and the lines are close enough for their size.
1561  if (part->SizesSimilar(*next_part) &&
1562  next_part->median_height() * kMaxSameBlockLineSpacing >
1563  part->bottom_spacing() &&
1564  part->median_height() * kMaxSameBlockLineSpacing >
1565  part->top_spacing()) {
1566  // Even now, we can only add it as long as the third line doesn't
1567  // match in the same way and have a smaller bottom spacing.
1568  if (third_part == nullptr || !next_part->SizesSimilar(*third_part) ||
1569  third_part->median_height() * kMaxSameBlockLineSpacing <=
1570  next_part->bottom_spacing() ||
1571  next_part->median_height() * kMaxSameBlockLineSpacing <=
1572  next_part->top_spacing() ||
1573  next_part->bottom_spacing() > part->bottom_spacing()) {
1574  // Add to the current block.
1575  sp_block_it.add_to_end(it.extract());
1576  it.forward();
1577  if (textord_debug_tabfind) {
1578  tprintf("Added line to current block.\n");
1579  }
1580  }
1581  }
1582  }
1583  TO_BLOCK *to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1584  if (to_block != nullptr) {
1585  to_block_it.add_to_end(to_block);
1586  block_it.add_to_end(to_block->block);
1587  }
1588  sp_block_it.set_to_list(&spacing_parts);
1589  } else {
1590  if (textord_debug_tabfind && !it.empty()) {
1591  ColPartition *next_part = it.data();
1592  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1593  part->top_spacing(), part->bottom_spacing(),
1594  next_part->top_spacing(), next_part->bottom_spacing(),
1595  part->median_height(), next_part->median_height());
1596  }
1597  }
1598  }
1599 }
const double kMaxSameBlockLineSpacing
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)

◆ lower_partners()

ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 198 of file colpartition.h.

198  {
199  return &lower_partners_;
200  }

◆ MakeBigPartition()

ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 129 of file colpartition.cpp.

130  {
131  box->set_owner(nullptr);
132  auto *single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
133  single->set_flow(BTFT_NONE);
134  single->AddBox(box);
135  single->ComputeLimits();
136  single->ClaimBoxes();
137  single->SetBlobTypes();
138  single->set_block_owned(true);
139  if (big_part_list != nullptr) {
140  ColPartition_IT part_it(big_part_list);
141  part_it.add_to_end(single);
142  }
143  return single;
144 }
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BTFT_NONE
Definition: blobbox.h:111

◆ MakeBlock()

TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1697 of file colpartition.cpp.

1699  {
1700  if (block_parts->empty()) {
1701  return nullptr; // Nothing to do.
1702  }
1703  // If the block_parts are not in reading order, then it will make an invalid
1704  // block polygon and bounding_box, so sort by bounding box now just to make
1705  // sure.
1706  block_parts->sort(&ColPartition::SortByBBox);
1707  ColPartition_IT it(block_parts);
1708  ColPartition *part = it.data();
1709  PolyBlockType type = part->type();
1710  if (type == PT_VERTICAL_TEXT) {
1711  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1712  }
1713  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1714  // put the average spacing in each partition, so we can just take the
1715  // linespacing from the first partition.
1716  int line_spacing = part->bottom_spacing();
1717  if (line_spacing < part->median_height()) {
1718  line_spacing = part->bounding_box().height();
1719  }
1720  ICOORDELT_LIST vertices;
1721  ICOORDELT_IT vert_it(&vertices);
1722  ICOORD start, end;
1723  int min_x = INT32_MAX;
1724  int max_x = -INT32_MAX;
1725  int min_y = INT32_MAX;
1726  int max_y = -INT32_MAX;
1727  int iteration = 0;
1728  do {
1729  if (iteration == 0) {
1730  ColPartition::LeftEdgeRun(&it, &start, &end);
1731  } else {
1732  ColPartition::RightEdgeRun(&it, &start, &end);
1733  }
1734  ClipCoord(bleft, tright, &start);
1735  ClipCoord(bleft, tright, &end);
1736  vert_it.add_after_then_move(new ICOORDELT(start));
1737  vert_it.add_after_then_move(new ICOORDELT(end));
1738  UpdateRange(start.x(), &min_x, &max_x);
1739  UpdateRange(end.x(), &min_x, &max_x);
1740  UpdateRange(start.y(), &min_y, &max_y);
1741  UpdateRange(end.y(), &min_y, &max_y);
1742  if ((iteration == 0 && it.at_first()) || (iteration == 1 && it.at_last())) {
1743  ++iteration;
1744  it.move_to_last();
1745  }
1746  } while (iteration < 2);
1747  if (textord_debug_tabfind) {
1748  tprintf("Making block at (%d,%d)->(%d,%d)\n", min_x, min_y, max_x, max_y);
1749  }
1750  auto *block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1751  block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1752  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1753 }
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:122
@ PT_VERTICAL_TEXT
Definition: publictypes.h:61
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:712

◆ MakeLinePartition()

ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 161 of file colpartition.cpp.

163  {
164  auto *part = new ColPartition(blob_type, vertical);
165  part->bounding_box_ = TBOX(left, bottom, right, top);
166  part->median_bottom_ = bottom;
167  part->median_top_ = top;
168  part->median_height_ = top - bottom;
169  part->median_left_ = left;
170  part->median_right_ = right;
171  part->median_width_ = right - left;
172  part->left_key_ = part->BoxLeftKey();
173  part->right_key_ = part->BoxRightKey();
174  return part;
175 }

◆ MakeToRow()

TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1784 of file colpartition.cpp.

1784  {
1785  BLOBNBOX_C_IT blob_it(&boxes_);
1786  TO_ROW *row = nullptr;
1787  int line_size = IsVerticalType() ? median_width_ : median_height_;
1788  // Add all the blobs to a single TO_ROW.
1789  for (; !blob_it.empty(); blob_it.forward()) {
1790  BLOBNBOX *blob = blob_it.extract();
1791  // blob->compute_bounding_box();
1792  int top = blob->bounding_box().top();
1793  int bottom = blob->bounding_box().bottom();
1794  if (row == nullptr) {
1795  row =
1796  new TO_ROW(blob, static_cast<float>(top), static_cast<float>(bottom),
1797  static_cast<float>(line_size));
1798  } else {
1799  row->add_blob(blob, static_cast<float>(top), static_cast<float>(bottom),
1800  static_cast<float>(line_size));
1801  }
1802  }
1803  return row;
1804 }

◆ MakeVerticalTextBlock()

TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1757 of file colpartition.cpp.

1760  {
1761  if (block_parts->empty()) {
1762  return nullptr; // Nothing to do.
1763  }
1764  ColPartition_IT it(block_parts);
1765  ColPartition *part = it.data();
1766  TBOX block_box = part->bounding_box();
1767  int line_spacing = block_box.width();
1768  PolyBlockType type = it.data()->type();
1769  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1770  block_box += it.data()->bounding_box();
1771  }
1772  if (textord_debug_tabfind) {
1773  tprintf("Making block at:");
1774  block_box.print();
1775  }
1776  auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1777  block_box.right(), block_box.top());
1778  block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1779  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1780 }

◆ MarkAsLeaderIfMonospaced()

bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1131 of file colpartition.cpp.

1131  {
1132  bool result = false;
1133  // Gather statistics on the gaps between blobs and the widths of the blobs.
1134  int part_width = bounding_box_.width();
1135  STATS gap_stats(0, part_width);
1136  STATS width_stats(0, part_width);
1137  BLOBNBOX_C_IT it(&boxes_);
1138  BLOBNBOX *prev_blob = it.data();
1139  prev_blob->set_flow(BTFT_NEIGHBOURS);
1140  width_stats.add(prev_blob->bounding_box().width(), 1);
1141  int blob_count = 1;
1142  for (it.forward(); !it.at_first(); it.forward()) {
1143  BLOBNBOX *blob = it.data();
1144  int left = blob->bounding_box().left();
1145  int right = blob->bounding_box().right();
1146  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1147  width_stats.add(right - left, 1);
1148  blob->set_flow(BTFT_NEIGHBOURS);
1149  prev_blob = blob;
1150  ++blob_count;
1151  }
1152  double median_gap = gap_stats.median();
1153  double median_width = width_stats.median();
1154  double max_width = std::max(median_gap, median_width);
1155  double min_width = std::min(median_gap, median_width);
1156  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1157  if (textord_debug_tabfind >= 4) {
1158  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count,
1159  max_width * kMaxLeaderGapFractionOfMax,
1160  min_width * kMaxLeaderGapFractionOfMin);
1161  }
1162  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1163  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1164  blob_count >= kMinLeaderCount) {
1165  // This is stable enough to be called a leader, so check the widths.
1166  // Since leader dashes can join, run a dp cutting algorithm and go
1167  // on the cost.
1168  int offset = static_cast<int>(ceil(gap_iqr * 2));
1169  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1170  int max_step = min_step + offset;
1171  min_step -= offset;
1172  // Pad the buffer with min_step/2 on each end.
1173  int part_left = bounding_box_.left() - min_step / 2;
1174  part_width += min_step;
1175  auto *projection = new DPPoint[part_width];
1176  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1177  BLOBNBOX *blob = it.data();
1178  int left = blob->bounding_box().left();
1179  int right = blob->bounding_box().right();
1180  int height = blob->bounding_box().height();
1181  for (int x = left; x < right; ++x) {
1182  projection[left - part_left].AddLocalCost(height);
1183  }
1184  }
1185  DPPoint *best_end =
1186  DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance,
1187  part_width, projection);
1188  if (best_end != nullptr && best_end->total_cost() < blob_count) {
1189  // Good enough. Call it a leader.
1190  result = true;
1191  bool modified_blob_list = false;
1192  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1193  BLOBNBOX *blob = it.data();
1194  // If the first or last blob is spaced too much, don't mark it.
1195  if (it.at_first()) {
1196  int gap = it.data_relative(1)->bounding_box().left() -
1197  blob->bounding_box().right();
1198  if (blob->bounding_box().width() + gap > max_step) {
1199  it.extract();
1200  modified_blob_list = true;
1201  continue;
1202  }
1203  }
1204  if (it.at_last()) {
1205  int gap = blob->bounding_box().left() -
1206  it.data_relative(-1)->bounding_box().right();
1207  if (blob->bounding_box().width() + gap > max_step) {
1208  it.extract();
1209  modified_blob_list = true;
1210  break;
1211  }
1212  }
1213  blob->set_region_type(BRT_TEXT);
1214  blob->set_flow(BTFT_LEADER);
1215  }
1216  if (modified_blob_list) {
1217  ComputeLimits();
1218  }
1219  blob_type_ = BRT_TEXT;
1220  flow_ = BTFT_LEADER;
1221  } else if (textord_debug_tabfind) {
1222  if (best_end == nullptr) {
1223  tprintf("No path\n");
1224  } else {
1225  tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1226  blob_count);
1227  }
1228  }
1229  delete[] projection;
1230  }
1231  return result;
1232 }
const double kMaxLeaderGapFractionOfMin
const int kMinLeaderCount
@ BTFT_NEIGHBOURS
Definition: blobbox.h:113
const double kMaxLeaderGapFractionOfMax
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:31
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:70

◆ MatchingColumns()

bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

392  {
393  int y = (MidY() + other.MidY()) / 2;
394  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
395  LeftAtY(y) / kColumnWidthFactor, 1)) {
396  return false;
397  }
398  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
399  RightAtY(y) / kColumnWidthFactor, 1)) {
400  return false;
401  }
402  return true;
403 }
const int kColumnWidthFactor
Definition: tabfind.h:41
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:51

◆ MatchingSizes()

bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 428 of file colpartition.cpp.

428  {
429  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) {
430  return !TabFind::DifferentSizes(median_width_, other.median_width_);
431  } else {
432  return !TabFind::DifferentSizes(median_height_, other.median_height_);
433  }
434 }
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407

◆ MatchingStrokeWidth()

bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 458 of file colpartition.cpp.

460  {
461  int match_count = 0;
462  int nonmatch_count = 0;
463  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
464  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST *>(&other.boxes_));
465  box_it.mark_cycle_pt();
466  other_it.mark_cycle_pt();
467  while (!box_it.cycled_list() && !other_it.cycled_list()) {
468  if (box_it.data()->MatchingStrokeWidth(
469  *other_it.data(), fractional_tolerance, constant_tolerance)) {
470  ++match_count;
471  } else {
472  ++nonmatch_count;
473  }
474  box_it.forward();
475  other_it.forward();
476  }
477  return match_count > nonmatch_count;
478 }

◆ MatchingTextColor()

bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 406 of file colpartition.cpp.

406  {
407  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
408  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise) {
409  return false; // Too noisy.
410  }
411 
412  // Colors must match for other to count.
413  double d_this1_o =
414  ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_);
415  double d_this2_o =
416  ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_);
417  double d_o1_this =
418  ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_);
419  double d_o2_this =
420  ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_);
421  // All 4 distances must be small enough.
422  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
423  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
424 }
const int kMaxColorDistance
const int kMaxRMSColorNoise
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:372

◆ median_bottom()

int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 126 of file colpartition.h.

126  {
127  return median_bottom_;
128  }

◆ median_height()

int tesseract::ColPartition::median_height ( ) const
inline

Definition at line 135 of file colpartition.h.

135  {
136  return median_height_;
137  }

◆ median_left()

int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 129 of file colpartition.h.

129  {
130  return median_left_;
131  }

◆ median_right()

int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 132 of file colpartition.h.

132  {
133  return median_right_;
134  }

◆ median_top()

int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 123 of file colpartition.h.

123  {
124  return median_top_;
125  }

◆ median_width()

int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 141 of file colpartition.h.

141  {
142  return median_width_;
143  }

◆ MedianY()

int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 308 of file colpartition.h.

308  {
309  return (median_top_ + median_bottom_) / 2;
310  }

◆ MidX()

int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 312 of file colpartition.h.

312  {
313  return (bounding_box_.left() + bounding_box_.right()) / 2;
314  }

◆ MidY()

int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 304 of file colpartition.h.

304  {
305  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306  }

◆ nearest_neighbor_above()

ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 249 of file colpartition.h.

249  {
250  return nearest_neighbor_above_;
251  }

◆ nearest_neighbor_below()

ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 255 of file colpartition.h.

255  {
256  return nearest_neighbor_below_;
257  }

◆ OKDiacriticMerge()

bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 486 of file colpartition.cpp.

487  {
488  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
489  int min_top = INT32_MAX;
490  int max_bottom = -INT32_MAX;
491  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
492  BLOBNBOX *blob = it.data();
493  if (!blob->IsDiacritic()) {
494  if (debug) {
495  tprintf("Blob is not a diacritic:");
496  blob->bounding_box().print();
497  }
498  return false; // All blobs must have diacritic bases.
499  }
500  if (blob->base_char_top() < min_top) {
501  min_top = blob->base_char_top();
502  }
503  if (blob->base_char_bottom() > max_bottom) {
504  max_bottom = blob->base_char_bottom();
505  }
506  }
507  // If the intersection of all vertical ranges of all base characters
508  // overlaps the median range of this, then it is OK.
509  bool result =
510  min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
511  if (debug) {
512  if (result) {
513  tprintf("OKDiacritic!\n");
514  } else {
515  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top,
516  median_bottom_, median_top_);
517  }
518  }
519  return result;
520 }

◆ OKMergeOverlap()

bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 773 of file colpartition.cpp.

775  {
776  // Vertical partitions are not allowed to be involved.
777  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
778  if (debug) {
779  tprintf("Vertical partition\n");
780  }
781  return false;
782  }
783  // The merging partitions must strongly overlap each other.
784  if (!merge1.VSignificantCoreOverlap(merge2)) {
785  if (debug) {
786  tprintf("Voverlap %d (%d)\n", merge1.VCoreOverlap(merge2),
787  merge1.VSignificantCoreOverlap(merge2));
788  }
789  return false;
790  }
791  // The merged box must not overlap the median bounds of this.
792  TBOX merged_box(merge1.bounding_box());
793  merged_box += merge2.bounding_box();
794  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
795  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
796  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
797  if (debug) {
798  tprintf("Excessive box overlap\n");
799  }
800  return false;
801  }
802  // Looks OK!
803  return true;
804 }

◆ OverlapSplitBlob()

BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 808 of file colpartition.cpp.

808  {
809  if (boxes_.empty() || boxes_.singleton()) {
810  return nullptr;
811  }
812  BLOBNBOX_C_IT it(&boxes_);
813  TBOX left_box(it.data()->bounding_box());
814  for (it.forward(); !it.at_first(); it.forward()) {
815  BLOBNBOX *bbox = it.data();
816  left_box += bbox->bounding_box();
817  if (left_box.overlap(box)) {
818  return bbox;
819  }
820  }
821  return nullptr;
822 }

◆ owns_blobs()

bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 291 of file colpartition.h.

291  {
292  return owns_blobs_;
293  }

◆ PartitionType()

PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1056 of file colpartition.cpp.

1056  {
1057  if (flow == CST_NOISE) {
1058  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1059  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) {
1060  return PT_NOISE;
1061  }
1062  flow = CST_FLOWING;
1063  }
1064 
1065  switch (blob_type_) {
1066  case BRT_NOISE:
1067  return PT_NOISE;
1068  case BRT_HLINE:
1069  return PT_HORZ_LINE;
1070  case BRT_VLINE:
1071  return PT_VERT_LINE;
1072  case BRT_RECTIMAGE:
1073  case BRT_POLYIMAGE:
1074  switch (flow) {
1075  case CST_FLOWING:
1076  return PT_FLOWING_IMAGE;
1077  case CST_HEADING:
1078  return PT_HEADING_IMAGE;
1079  case CST_PULLOUT:
1080  return PT_PULLOUT_IMAGE;
1081  default:
1082  ASSERT_HOST(!"Undefined flow type for image!");
1083  }
1084  break;
1085  case BRT_VERT_TEXT:
1086  return PT_VERTICAL_TEXT;
1087  case BRT_TEXT:
1088  case BRT_UNKNOWN:
1089  default:
1090  switch (flow) {
1091  case CST_FLOWING:
1092  return PT_FLOWING_TEXT;
1093  case CST_HEADING:
1094  return PT_HEADING_TEXT;
1095  case CST_PULLOUT:
1096  return PT_PULLOUT_TEXT;
1097  default:
1098  ASSERT_HOST(!"Undefined flow type for text!");
1099  }
1100  }
1101  ASSERT_HOST(!"Should never get here!");
1102  return PT_NOISE;
1103 }
@ BRT_NOISE
Definition: blobbox.h:75
@ PT_PULLOUT_IMAGE
Definition: publictypes.h:65
@ PT_HEADING_IMAGE
Definition: publictypes.h:64
@ PT_HORZ_LINE
Definition: publictypes.h:66
@ PT_FLOWING_IMAGE
Definition: publictypes.h:63
@ PT_VERT_LINE
Definition: publictypes.h:67
@ PT_PULLOUT_TEXT
Definition: publictypes.h:57
@ PT_HEADING_TEXT
Definition: publictypes.h:56
@ PT_FLOWING_TEXT
Definition: publictypes.h:55

◆ Print()

void tesseract::ColPartition::Print ( ) const

Definition at line 1862 of file colpartition.cpp.

1862  {
1863  int y = MidY();
1864  tprintf(
1865  "ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1866  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1867  " ts=%d bs=%d ls=%d rs=%d\n",
1868  boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B',
1869  LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(),
1870  median_bottom_, bounding_box_.right(), RightAtY(y),
1871  right_key_tab_ ? 'T' : 'B', right_margin_, median_right_,
1872  bounding_box_.top(), median_top_, good_width_, good_column_, type_,
1873  kBlobTypes[blob_type_], flow_, first_column_, last_column_,
1874  boxes_.length(), space_above_, space_below_, space_to_left_,
1875  space_to_right_);
1876 }

◆ PrintColors()

void tesseract::ColPartition::PrintColors ( )

Definition at line 1879 of file colpartition.cpp.

1879  {
1880  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED],
1881  color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL],
1882  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1883 }

◆ RefinePartners()

void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1956 of file colpartition.cpp.

1957  {
1958  if (TypesSimilar(type_, type)) {
1959  RefinePartnersInternal(true, get_desperate, grid);
1960  RefinePartnersInternal(false, get_desperate, grid);
1961  } else if (type == PT_COUNT) {
1962  // This is the final pass. Make sure only the correctly typed
1963  // partners surivive, however many there are.
1964  RefinePartnersByType(true, &upper_partners_);
1965  RefinePartnersByType(false, &lower_partners_);
1966  // It is possible for a merge to have given a partition multiple
1967  // partners again, so the last resort is to use overlap which is
1968  // guaranteed to leave at most one partner left.
1969  if (!upper_partners_.empty() && !upper_partners_.singleton()) {
1970  RefinePartnersByOverlap(true, &upper_partners_);
1971  }
1972  if (!lower_partners_.empty() && !lower_partners_.singleton()) {
1973  RefinePartnersByOverlap(false, &lower_partners_);
1974  }
1975  }
1976 }
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:418

◆ ReflectInYAxis()

void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 342 of file colpartition.cpp.

342  {
343  BLOBNBOX_CLIST reversed_boxes;
344  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
345  // Reverse the order of the boxes_.
346  BLOBNBOX_C_IT bb_it(&boxes_);
347  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
348  reversed_it.add_before_then_move(bb_it.extract());
349  }
350  bb_it.add_list_after(&reversed_boxes);
351  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
352  int tmp = left_margin_;
353  left_margin_ = -right_margin_;
354  right_margin_ = -tmp;
355  ComputeLimits();
356 }

◆ ReleaseNonLeaderBoxes()

bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 306 of file colpartition.cpp.

306  {
307  BLOBNBOX_C_IT bb_it(&boxes_);
308  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
309  BLOBNBOX *bblob = bb_it.data();
310  if (bblob->flow() != BTFT_LEADER) {
311  if (bblob->owner() == this) {
312  bblob->set_owner(nullptr);
313  }
314  bb_it.extract();
315  }
316  }
317  if (bb_it.empty()) {
318  return false;
319  }
320  flow_ = BTFT_LEADER;
321  ComputeLimits();
322  return true;
323 }

◆ RemoveBox()

void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 216 of file colpartition.cpp.

216  {
217  BLOBNBOX_C_IT bb_it(&boxes_);
218  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
219  if (box == bb_it.data()) {
220  bb_it.extract();
221  ComputeLimits();
222  return;
223  }
224  }
225 }

◆ RemovePartner()

void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 653 of file colpartition.cpp.

653  {
654  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
655  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
656  if (it.data() == partner) {
657  it.extract();
658  break;
659  }
660  }
661 }

◆ right_key()

int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 177 of file colpartition.h.

177  {
178  return right_key_;
179  }

◆ right_key_tab()

bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 174 of file colpartition.h.

174  {
175  return right_key_tab_;
176  }

◆ right_margin()

int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 117 of file colpartition.h.

117  {
118  return right_margin_;
119  }

◆ RightAtY()

int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 344 of file colpartition.h.

344  {
345  return XAtY(right_key_, y);
346  }

◆ RightBlobRule()

int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 585 of file colpartition.cpp.

585  {
586  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
587  it.move_to_last();
588  return it.data()->right_rule();
589 }

◆ set_blob_type()

void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 150 of file colpartition.h.

150  {
151  blob_type_ = t;
152  }

◆ set_block_owned()

void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 207 of file colpartition.h.

207  {
208  block_owned_ = owned;
209  }

◆ set_bottom_spacing()

void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 222 of file colpartition.h.

222  {
223  bottom_spacing_ = spacing;
224  }

◆ set_first_column()

void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 729 of file colpartition.h.

729  {
730  first_column_ = column;
731  }

◆ set_flow()

void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 156 of file colpartition.h.

156  {
157  flow_ = f;
158  }

◆ set_inside_table_column()

void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 246 of file colpartition.h.

246  {
247  inside_table_column_ = val;
248  }

◆ set_last_column()

void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 732 of file colpartition.h.

732  {
733  last_column_ = column;
734  }

◆ set_left_margin()

void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 114 of file colpartition.h.

114  {
115  left_margin_ = margin;
116  }

◆ set_median_height()

void tesseract::ColPartition::set_median_height ( int  height)
inline

Definition at line 138 of file colpartition.h.

138  {
139  median_height_ = height;
140  }

◆ set_median_width()

void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 144 of file colpartition.h.

144  {
145  median_width_ = width;
146  }

◆ set_nearest_neighbor_above()

void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 252 of file colpartition.h.

252  {
253  nearest_neighbor_above_ = part;
254  }

◆ set_nearest_neighbor_below()

void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 258 of file colpartition.h.

258  {
259  nearest_neighbor_below_ = part;
260  }

◆ set_owns_blobs()

void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 294 of file colpartition.h.

294  {
295  // Do NOT change ownership flag when there are blobs in the list.
296  // Immediately set the ownership flag when creating copies.
297  ASSERT_HOST(boxes_.empty());
298  owns_blobs_ = owns_blobs;
299  }

◆ set_right_margin()

void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 120 of file colpartition.h.

120  {
121  right_margin_ = margin;
122  }

◆ set_side_step()

void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 216 of file colpartition.h.

216  {
217  side_step_ = step;
218  }

◆ set_space_above()

void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 264 of file colpartition.h.

264  {
265  space_above_ = space;
266  }

◆ set_space_below()

void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 270 of file colpartition.h.

270  {
271  space_below_ = space;
272  }

◆ set_space_to_left()

void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 276 of file colpartition.h.

276  {
277  space_to_left_ = space;
278  }

◆ set_space_to_right()

void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 282 of file colpartition.h.

282  {
283  space_to_right_ = space;
284  }

◆ set_table_type()

void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 232 of file colpartition.h.

232  {
233  if (type_ != PT_TABLE) {
234  type_before_table_ = type_;
235  type_ = PT_TABLE;
236  }
237  }

◆ set_top_spacing()

void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 228 of file colpartition.h.

228  {
229  top_spacing_ = spacing;
230  }

◆ set_type()

void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 183 of file colpartition.h.

183  {
184  type_ = t;
185  }

◆ set_vertical()

void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 192 of file colpartition.h.

192  {
193  vertical_ = v;
194  }

◆ set_working_set()

void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 201 of file colpartition.h.

201  {
202  working_set_ = working_set;
203  }

◆ SetBlobTypes()

void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1326 of file colpartition.cpp.

1326  {
1327  if (!owns_blobs()) {
1328  return;
1329  }
1330  BLOBNBOX_C_IT it(&boxes_);
1331  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1332  BLOBNBOX *blob = it.data();
1333  if (blob->flow() != BTFT_LEADER) {
1334  blob->set_flow(flow_);
1335  }
1336  blob->set_region_type(blob_type_);
1337  ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1338  }
1339 }

◆ SetColumnGoodness()

void tesseract::ColPartition::SetColumnGoodness ( const WidthCallback cb)

Definition at line 1118 of file colpartition.cpp.

1118  {
1119  int y = MidY();
1120  int width = RightAtY(y) - LeftAtY(y);
1121  good_width_ = cb(width);
1122  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1123 }

◆ SetLeftTab()

void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 525 of file colpartition.cpp.

525  {
526  if (tab_vector != nullptr) {
527  left_key_ = tab_vector->sort_key();
528  left_key_tab_ = left_key_ <= BoxLeftKey();
529  } else {
530  left_key_tab_ = false;
531  }
532  if (!left_key_tab_) {
533  left_key_ = BoxLeftKey();
534  }
535 }

◆ SetPartitionType()

void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 1024 of file colpartition.cpp.

1024  {
1025  int first_spanned_col = -1;
1026  ColumnSpanningType span_type = columns->SpanningType(
1027  resolution, bounding_box_.left(), bounding_box_.right(),
1028  std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
1029  left_margin_, right_margin_, &first_column_, &last_column_,
1030  &first_spanned_col);
1031  column_set_ = columns;
1032  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
1033  !IsLineType()) {
1034  // Unequal columns may indicate that the pullout spans one of the columns
1035  // it lies in, so force it to be allocated to just that column.
1036  if (first_spanned_col >= 0) {
1037  first_column_ = first_spanned_col;
1038  last_column_ = first_spanned_col;
1039  } else {
1040  if ((first_column_ & 1) == 0) {
1041  last_column_ = first_column_;
1042  } else if ((last_column_ & 1) == 0) {
1043  first_column_ = last_column_;
1044  } else {
1045  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1046  }
1047  }
1048  }
1049  type_ = PartitionType(span_type);
1050 }

◆ SetRegionAndFlowTypesFromProjectionValue()

void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1241 of file colpartition.cpp.

1241  {
1242  int blob_count = 0; // Total # blobs.
1243  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1244  int noisy_count = 0; // Total # neighbours marked as noise.
1245  int hline_count = 0;
1246  int vline_count = 0;
1247  BLOBNBOX_C_IT it(&boxes_);
1248  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1249  BLOBNBOX *blob = it.data();
1250  ++blob_count;
1251  noisy_count += blob->NoisyNeighbours();
1252  good_blob_score_ += blob->GoodTextBlob();
1253  if (blob->region_type() == BRT_HLINE) {
1254  ++hline_count;
1255  }
1256  if (blob->region_type() == BRT_VLINE) {
1257  ++vline_count;
1258  }
1259  }
1260  flow_ = BTFT_NEIGHBOURS;
1261  blob_type_ = BRT_UNKNOWN;
1262  if (hline_count > vline_count) {
1263  flow_ = BTFT_NONE;
1264  blob_type_ = BRT_HLINE;
1265  } else if (vline_count > hline_count) {
1266  flow_ = BTFT_NONE;
1267  blob_type_ = BRT_VLINE;
1268  } else if (value < -1 || 1 < value) {
1269  int long_side;
1270  int short_side;
1271  if (value > 0) {
1272  long_side = bounding_box_.width();
1273  short_side = bounding_box_.height();
1274  blob_type_ = BRT_TEXT;
1275  } else {
1276  long_side = bounding_box_.height();
1277  short_side = bounding_box_.width();
1278  blob_type_ = BRT_VERT_TEXT;
1279  }
1280  // We will combine the old metrics using aspect ratio and blob counts
1281  // with the input value by allowing a strong indication to flip the
1282  // STRONG_CHAIN/CHAIN flow values.
1283  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1284  if (short_side > kHorzStrongTextlineHeight) {
1285  ++strong_score;
1286  }
1287  if (short_side * kHorzStrongTextlineAspect < long_side) {
1288  ++strong_score;
1289  }
1290  if (abs(value) >= kMinStrongTextValue) {
1291  flow_ = BTFT_STRONG_CHAIN;
1292  } else if (abs(value) >= kMinChainTextValue) {
1293  flow_ = BTFT_CHAIN;
1294  } else {
1295  flow_ = BTFT_NEIGHBOURS;
1296  }
1297  // Upgrade chain to strong chain if the other indicators are good
1298  if (flow_ == BTFT_CHAIN && strong_score == 3) {
1299  flow_ = BTFT_STRONG_CHAIN;
1300  }
1301  // Downgrade strong vertical text to chain if the indicators are bad.
1302  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) {
1303  flow_ = BTFT_CHAIN;
1304  }
1305  }
1306  if (flow_ == BTFT_NEIGHBOURS) {
1307  // Check for noisy neighbours.
1308  if (noisy_count >= blob_count) {
1309  flow_ = BTFT_NONTEXT;
1310  blob_type_ = BRT_NOISE;
1311  }
1312  }
1313  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1314  bounding_box_.bottom())) {
1315  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1316  blob_count, noisy_count, good_blob_score_);
1317  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_,
1318  blob_type_);
1319  Print();
1320  }
1321  SetBlobTypes();
1322 }
const int kMinChainTextValue
const int kHorzStrongTextlineCount
const int kHorzStrongTextlineHeight
const int kHorzStrongTextlineAspect
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_NONTEXT
Definition: blobbox.h:112
const int kMinStrongTextValue

◆ SetRightTab()

void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 538 of file colpartition.cpp.

538  {
539  if (tab_vector != nullptr) {
540  right_key_ = tab_vector->sort_key();
541  right_key_tab_ = right_key_ >= BoxRightKey();
542  } else {
543  right_key_tab_ = false;
544  }
545  if (!right_key_tab_) {
546  right_key_ = BoxRightKey();
547  }
548 }

◆ SetSpecialBlobsDensity()

void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 611 of file colpartition.cpp.

612  {
614  special_blobs_densities_[type] = density;
615 }

◆ ShallowCopy()

ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1808 of file colpartition.cpp.

1808  {
1809  auto *part = new ColPartition(blob_type_, vertical_);
1810  part->left_margin_ = left_margin_;
1811  part->right_margin_ = right_margin_;
1812  part->bounding_box_ = bounding_box_;
1813  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1814  sizeof(special_blobs_densities_));
1815  part->median_bottom_ = median_bottom_;
1816  part->median_top_ = median_top_;
1817  part->median_height_ = median_height_;
1818  part->median_left_ = median_left_;
1819  part->median_right_ = median_right_;
1820  part->median_width_ = median_width_;
1821  part->good_width_ = good_width_;
1822  part->good_column_ = good_column_;
1823  part->left_key_tab_ = left_key_tab_;
1824  part->right_key_tab_ = right_key_tab_;
1825  part->type_ = type_;
1826  part->flow_ = flow_;
1827  part->left_key_ = left_key_;
1828  part->right_key_ = right_key_;
1829  part->first_column_ = first_column_;
1830  part->last_column_ = last_column_;
1831  part->owns_blobs_ = false;
1832  return part;
1833 }

◆ SingletonPartner()

ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 664 of file colpartition.cpp.

664  {
665  ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_;
666  if (!partners->singleton()) {
667  return nullptr;
668  }
669  ColPartition_C_IT it(partners);
670  return it.data();
671 }

◆ SmoothPartnerRun()

void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1886 of file colpartition.cpp.

1886  {
1887  STATS left_stats(0, working_set_count);
1888  STATS right_stats(0, working_set_count);
1889  PolyBlockType max_type = type_;
1890  ColPartition *partner;
1891  for (partner = SingletonPartner(false); partner != nullptr;
1892  partner = partner->SingletonPartner(false)) {
1893  if (partner->type_ > max_type) {
1894  max_type = partner->type_;
1895  }
1896  if (column_set_ == partner->column_set_) {
1897  left_stats.add(partner->first_column_, 1);
1898  right_stats.add(partner->last_column_, 1);
1899  }
1900  }
1901  type_ = max_type;
1902  // TODO(rays) Either establish that it isn't necessary to set the columns,
1903  // or find a way to do it that does not cause an assert failure in
1904  // AddToWorkingSet.
1905 #if 0
1906  first_column_ = left_stats.mode();
1907  last_column_ = right_stats.mode();
1908  if (last_column_ < first_column_)
1909  last_column_ = first_column_;
1910 #endif
1911 
1912  for (partner = SingletonPartner(false); partner != nullptr;
1913  partner = partner->SingletonPartner(false)) {
1914  partner->type_ = max_type;
1915 #if 0 // See TODO above
1916  if (column_set_ == partner->column_set_) {
1917  partner->first_column_ = first_column_;
1918  partner->last_column_ = last_column_;
1919  }
1920 #endif
1921  }
1922 }

◆ SortByBBox()

static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 712 of file colpartition.h.

712  {
713  const ColPartition *part1 = *static_cast<const ColPartition *const *>(p1);
714  const ColPartition *part2 = *static_cast<const ColPartition *const *>(p2);
715  int mid_y1 = part1->bounding_box_.y_middle();
716  int mid_y2 = part2->bounding_box_.y_middle();
717  if ((part2->bounding_box_.bottom() <= mid_y1 &&
718  mid_y1 <= part2->bounding_box_.top()) ||
719  (part1->bounding_box_.bottom() <= mid_y2 &&
720  mid_y2 <= part1->bounding_box_.top())) {
721  // Sort by increasing x.
722  return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
723  }
724  // Sort by decreasing y.
725  return mid_y2 - mid_y1;
726  }

◆ SortKey()

int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 316 of file colpartition.h.

316  {
317  return TabVector::SortKey(vertical_, x, y);
318  }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274

◆ space_above()

int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 261 of file colpartition.h.

261  {
262  return space_above_;
263  }

◆ space_below()

int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 267 of file colpartition.h.

267  {
268  return space_below_;
269  }

◆ space_to_left()

int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 273 of file colpartition.h.

273  {
274  return space_to_left_;
275  }

◆ space_to_right()

int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 279 of file colpartition.h.

279  {
280  return space_to_right_;
281  }

◆ SpecialBlobsCount()

int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 596 of file colpartition.cpp.

596  {
598  BLOBNBOX_C_IT blob_it(&boxes_);
599  int count = 0;
600  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
601  BLOBNBOX *blob = blob_it.data();
602  BlobSpecialTextType blob_type = blob->special_text_type();
603  if (blob_type == type) {
604  count++;
605  }
606  }
607 
608  return count;
609 }

◆ SpecialBlobsDensity()

float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 591 of file colpartition.cpp.

591  {
593  return special_blobs_densities_[type];
594 }

◆ SplitAt()

ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 865 of file colpartition.cpp.

865  {
866  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) {
867  return nullptr; // There will be no change.
868  }
869  ColPartition *split_part = ShallowCopy();
870  split_part->set_owns_blobs(owns_blobs());
871  BLOBNBOX_C_IT it(&boxes_);
872  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
873  BLOBNBOX *bbox = it.data();
874  ColPartition *prev_owner = bbox->owner();
875  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
876  const TBOX &box = bbox->bounding_box();
877  if (box.left() >= split_x) {
878  split_part->AddBox(it.extract());
879  if (owns_blobs() && prev_owner != nullptr) {
880  bbox->set_owner(split_part);
881  }
882  }
883  }
884  if (it.empty()) {
885  // Possible if split-x passes through the first blob.
886  it.add_list_after(&split_part->boxes_);
887  }
888  ASSERT_HOST(!it.empty());
889  if (split_part->IsEmpty()) {
890  // Split part ended up with nothing. Possible if split_x passes
891  // through the last blob.
892  delete split_part;
893  return nullptr;
894  }
895  right_key_tab_ = false;
896  split_part->left_key_tab_ = false;
897  right_margin_ = split_x;
898  split_part->left_margin_ = split_x;
899  ComputeLimits();
900  split_part->ComputeLimits();
901  return split_part;
902 }

◆ SplitAtBlob()

ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 828 of file colpartition.cpp.

828  {
829  ColPartition *split_part = ShallowCopy();
830  split_part->set_owns_blobs(owns_blobs());
831  BLOBNBOX_C_IT it(&boxes_);
832  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
833  BLOBNBOX *bbox = it.data();
834  ColPartition *prev_owner = bbox->owner();
835  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
836  if (bbox == split_blob || !split_part->boxes_.empty()) {
837  split_part->AddBox(it.extract());
838  if (owns_blobs() && prev_owner != nullptr) {
839  bbox->set_owner(split_part);
840  }
841  }
842  }
843  ASSERT_HOST(!it.empty());
844  if (split_part->IsEmpty()) {
845  // Split part ended up with nothing. Possible if split_blob is not
846  // in the list of blobs.
847  delete split_part;
848  return nullptr;
849  }
850  right_key_tab_ = false;
851  split_part->left_key_tab_ = false;
852  ComputeLimits();
853  // TODO(nbeato) Merge Ray's CL like this:
854  // if (owns_blobs())
855  // SetBlobTextlineGoodness();
856  split_part->ComputeLimits();
857  // TODO(nbeato) Merge Ray's CL like this:
858  // if (split_part->owns_blobs())
859  // split_part->SetBlobTextlineGoodness();
860  return split_part;
861 }

◆ top_spacing()

int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 225 of file colpartition.h.

225  {
226  return top_spacing_;
227  }

◆ type()

PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 180 of file colpartition.h.

180  {
181  return type_;
182  }

◆ TypesMatch() [1/2]

static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 412 of file colpartition.h.

412  {
413  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
414  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
415  }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:443

◆ TypesMatch() [2/2]

bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 409 of file colpartition.h.

409  {
410  return TypesMatch(blob_type_, other.blob_type_);
411  }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:409

◆ TypesSimilar()

static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 418 of file colpartition.h.

418  {
419  return (type1 == type2 ||
420  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
421  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
422  }
@ PT_INLINE_EQUATION
Definition: publictypes.h:59

◆ upper_partners()

ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 195 of file colpartition.h.

195  {
196  return &upper_partners_;
197  }

◆ VCoreOverlap()

int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 375 of file colpartition.h.

375  {
376  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
377  return 0;
378  }
379  return std::min(median_top_, other.median_top_) -
380  std::max(median_bottom_, other.median_bottom_);
381  }

◆ VOverlaps()

bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 370 of file colpartition.h.

370  {
371  return bounding_box_.y_gap(other.bounding_box_) < 0;
372  }
int y_gap(const TBOX &box) const
Definition: rect.h:245

◆ VSignificantCoreOverlap()

bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 390 of file colpartition.h.

390  {
391  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
392  return false;
393  }
394  int overlap = VCoreOverlap(other);
395  int height = std::min(median_top_ - median_bottom_,
396  other.median_top_ - other.median_bottom_);
397  return overlap * 3 > height;
398  }
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375

◆ WithinSameMargins()

bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 401 of file colpartition.h.

401  {
402  return left_margin_ <= other.bounding_box_.left() &&
403  bounding_box_.left() >= other.left_margin_ &&
404  bounding_box_.right() <= other.right_margin_ &&
405  right_margin_ >= other.bounding_box_.right();
406  }

◆ XAtY()

int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 320 of file colpartition.h.

320  {
321  return TabVector::XAtY(vertical_, sort_key, y);
322  }
int XAtY(int y) const
Definition: tabvector.h:181

The documentation for this class was generated from the following files: