tesseract  5.0.0
tesseract::TabVector Class Reference

#include <tabvector.h>

Inheritance diagram for tesseract::TabVector:
tesseract::ELIST2_LINK

Public Member Functions

 TabVector ()=default
 
 ~TabVector ()=default
 
 TabVector (const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew, BLOBNBOX *blob)
 
TabVectorShallowCopy () const
 
const ICOORDstartpt () const
 
const ICOORDendpt () const
 
int extended_ymax () const
 
int extended_ymin () const
 
int sort_key () const
 
int mean_width () const
 
void set_top_constraints (TabConstraint_LIST *constraints)
 
void set_bottom_constraints (TabConstraint_LIST *constraints)
 
TabVector_CLIST * partners ()
 
void set_startpt (const ICOORD &start)
 
void set_endpt (const ICOORD &end)
 
bool intersects_other_lines () const
 
void set_intersects_other_lines (bool value)
 
int XAtY (int y) const
 
int VOverlap (const TabVector &other) const
 
int VOverlap (int top_y, int bottom_y) const
 
int ExtendedOverlap (int top_y, int bottom_y) const
 
bool IsLeftTab () const
 
bool IsRightTab () const
 
bool IsSeparator () const
 
bool IsCenterTab () const
 
bool IsRagged () const
 
bool IsLeftOf (const TabVector &other) const
 
bool Partnerless ()
 
int BoxCount ()
 
void Freeze ()
 
void XYFlip ()
 
void ReflectInYAxis ()
 
void ExtendToBox (BLOBNBOX *blob)
 
void SetYStart (int start_y)
 
void SetYEnd (int end_y)
 
void Rotate (const FCOORD &rotation)
 
void SetupConstraints ()
 
void SetupPartnerConstraints ()
 
void SetupPartnerConstraints (TabVector *partner)
 
void ApplyConstraints ()
 
bool SimilarTo (const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
 
void MergeWith (const ICOORD &vertical, TabVector *other)
 
void AddPartner (TabVector *partner)
 
bool IsAPartner (const TabVector *other)
 
void Print (const char *prefix)
 
void Debug (const char *prefix)
 
void Display (ScrollView *tab_win)
 
void FitAndEvaluateIfNeeded (const ICOORD &vertical, TabFind *finder)
 
void Evaluate (const ICOORD &vertical, TabFind *finder)
 
bool Fit (ICOORD vertical, bool force_parallel)
 
TabVectorVerticalTextlinePartner ()
 
TabVectorGetSinglePartner ()
 
- Public Member Functions inherited from tesseract::ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)=delete
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static TabVectorFitVector (TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
 
static int SortKey (const ICOORD &vertical, int x, int y)
 
static int XAtY (const ICOORD &vertical, int sort_key, int y)
 
static int SortVectorsByKey (const void *v1, const void *v2)
 
static void MergeSimilarTabVectors (const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
 

Detailed Description

Definition at line 105 of file tabvector.h.

Constructor & Destructor Documentation

◆ TabVector() [1/2]

tesseract::TabVector::TabVector ( )
default

◆ ~TabVector()

tesseract::TabVector::~TabVector ( )
default

◆ TabVector() [2/2]

tesseract::TabVector::TabVector ( const TabVector src,
TabAlignment  alignment,
const ICOORD vertical_skew,
BLOBNBOX blob 
)

Definition at line 196 of file tabvector.cpp.

198  : extended_ymin_(src.extended_ymin_)
199  , extended_ymax_(src.extended_ymax_)
200  , needs_refit_(true)
201  , needs_evaluation_(true)
202  , alignment_(alignment) {
203  BLOBNBOX_C_IT it(&boxes_);
204  it.add_to_end(blob);
205  TBOX box = blob->bounding_box();
206  if (IsLeftTab()) {
207  startpt_ = box.botleft();
208  endpt_ = box.topleft();
209  } else {
210  startpt_ = box.botright();
211  endpt_ = box.topright();
212  }
213  sort_key_ =
214  SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2);
215  if (textord_debug_tabfind > 3) {
216  Print("Constructed a new tab vector:");
217  }
218 }
@ TBOX
int textord_debug_tabfind
Definition: alignedblob.cpp:29
TDimension y() const
access_function
Definition: points.h:62
TDimension x() const
access function
Definition: points.h:58
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274
bool IsLeftTab() const
Definition: tabvector.h:205
void Print(const char *prefix)
Definition: tabvector.cpp:518

Member Function Documentation

◆ AddPartner()

void tesseract::TabVector::AddPartner ( TabVector partner)

Definition at line 488 of file tabvector.cpp.

488  {
489  if (IsSeparator() || partner->IsSeparator()) {
490  return;
491  }
492  TabVector_C_IT it(&partners_);
493  if (!it.empty()) {
494  it.move_to_last();
495  if (it.data() == partner) {
496  return;
497  }
498  }
499  it.add_after_then_move(partner);
500 }
bool IsSeparator() const
Definition: tabvector.h:213

◆ ApplyConstraints()

void tesseract::TabVector::ApplyConstraints ( )

Definition at line 342 of file tabvector.cpp.

342  {
343  if (top_constraints_ != nullptr) {
344  TabConstraint::ApplyConstraints(top_constraints_);
345  }
346  if (bottom_constraints_ != nullptr) {
347  TabConstraint::ApplyConstraints(bottom_constraints_);
348  }
349 }
static void ApplyConstraints(TabConstraint_LIST *constraints)
Definition: tabvector.cpp:122

◆ BoxCount()

int tesseract::TabVector::BoxCount ( )
inline

Definition at line 237 of file tabvector.h.

237  {
238  return boxes_.length();
239  }

◆ Debug()

void tesseract::TabVector::Debug ( const char *  prefix)

Definition at line 527 of file tabvector.cpp.

527  {
528  Print(prefix);
529  BLOBNBOX_C_IT it(&boxes_);
530  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
531  BLOBNBOX *bbox = it.data();
532  const TBOX &box = bbox->bounding_box();
533  tprintf("Box at (%d,%d)->(%d,%d)\n", box.left(), box.bottom(), box.right(), box.top());
534  }
535 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ Display()

void tesseract::TabVector::Display ( ScrollView tab_win)

Definition at line 540 of file tabvector.cpp.

540  {
542  tab_win->Pen(ScrollView::BLUE);
543  } else if (alignment_ == TA_LEFT_ALIGNED) {
544  tab_win->Pen(ScrollView::LIME_GREEN);
545  } else if (alignment_ == TA_LEFT_RAGGED) {
546  tab_win->Pen(ScrollView::DARK_GREEN);
547  } else if (alignment_ == TA_RIGHT_ALIGNED) {
548  tab_win->Pen(ScrollView::PINK);
549  } else if (alignment_ == TA_RIGHT_RAGGED) {
550  tab_win->Pen(ScrollView::CORAL);
551  } else {
552  tab_win->Pen(ScrollView::WHITE);
553  }
554  tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
555  tab_win->Pen(ScrollView::GREY);
556  tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
557  tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
558  auto score_string = std::to_string(percent_score_);
559  tab_win->TextAttributes("Times", 50, false, false, false);
560  tab_win->Text(startpt_.x(), startpt_.y(), score_string.c_str());
561 }
bool textord_debug_printable
Definition: alignedblob.cpp:43
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:45
@ TA_RIGHT_RAGGED
Definition: tabvector.h:46
@ TA_LEFT_ALIGNED
Definition: tabvector.h:42
@ TA_LEFT_RAGGED
Definition: tabvector.h:43

◆ endpt()

const ICOORD& tesseract::TabVector::endpt ( ) const
inline

Definition at line 141 of file tabvector.h.

141  {
142  return endpt_;
143  }

◆ Evaluate()

void tesseract::TabVector::Evaluate ( const ICOORD vertical,
TabFind finder 
)

Definition at line 581 of file tabvector.cpp.

581  {
582  bool debug = false;
583  needs_evaluation_ = false;
584  int length = endpt_.y() - startpt_.y();
585  if (length == 0 || boxes_.empty()) {
586  percent_score_ = 0;
587  Print("Zero length in evaluate");
588  return;
589  }
590  // Compute the mean box height.
591  BLOBNBOX_C_IT it(&boxes_);
592  int mean_height = 0;
593  int height_count = 0;
594  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
595  BLOBNBOX *bbox = it.data();
596  const TBOX &box = bbox->bounding_box();
597  int height = box.height();
598  mean_height += height;
599  ++height_count;
600  }
601  if (height_count > 0) {
602  mean_height /= height_count;
603  }
604  int max_gutter = kGutterMultiple * mean_height;
605  if (IsRagged()) {
606  // Ragged edges face a tougher test in that the gap must always be within
607  // the height of the blob.
608  max_gutter = kGutterToNeighbourRatio * mean_height;
609  }
610 
611  STATS gutters(0, max_gutter + 1);
612  // Evaluate the boxes for their goodness, calculating the coverage as we go.
613  // Remove boxes that are not good and shorten the list to the first and
614  // last good boxes.
615  int num_deleted_boxes = 0;
616  bool text_on_image = false;
617  int good_length = 0;
618  const TBOX *prev_good_box = nullptr;
619  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
620  BLOBNBOX *bbox = it.data();
621  const TBOX &box = bbox->bounding_box();
622  int mid_y = (box.top() + box.bottom()) / 2;
623  if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
624  if (!debug) {
625  tprintf("After already deleting %d boxes, ", num_deleted_boxes);
626  Print("Starting evaluation");
627  }
628  debug = true;
629  }
630  // A good box is one where the nearest neighbour on the inside is closer
631  // than half the distance to the nearest neighbour on the outside
632  // (of the putative column).
633  bool left = IsLeftTab();
634  int tab_x = XAtY(mid_y);
635  int gutter_width;
636  int neighbour_gap;
637  finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width,
638  &neighbour_gap);
639  if (debug) {
640  tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.left(), box.bottom(),
641  box.right(), box.top(), gutter_width, neighbour_gap);
642  }
643  // Now we can make the test.
644  if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
645  // A good box contributes its height to the good_length.
646  good_length += box.top() - box.bottom();
647  gutters.add(gutter_width, 1);
648  // Two good boxes together contribute the gap between them
649  // to the good_length as well, as long as the gap is not
650  // too big.
651  if (prev_good_box != nullptr) {
652  int vertical_gap = box.bottom() - prev_good_box->top();
653  double size1 = sqrt(static_cast<double>(prev_good_box->area()));
654  double size2 = sqrt(static_cast<double>(box.area()));
655  if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2)) {
656  good_length += vertical_gap;
657  }
658  if (debug) {
659  tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap,
660  kMaxFillinMultiple * std::min(size1, size2), good_length);
661  }
662  } else {
663  // Adjust the start to the first good box.
664  SetYStart(box.bottom());
665  }
666  prev_good_box = &box;
667  if (bbox->flow() == BTFT_TEXT_ON_IMAGE) {
668  text_on_image = true;
669  }
670  } else {
671  // Get rid of boxes that are not good.
672  if (debug) {
673  tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.left(), box.bottom(),
674  box.right(), box.top(), gutter_width, neighbour_gap);
675  }
676  it.extract();
677  ++num_deleted_boxes;
678  }
679  }
680  if (debug) {
681  Print("Evaluating:");
682  }
683  // If there are any good boxes, do it again, except this time get rid of
684  // boxes that have a gutter that is a small fraction of the mean gutter.
685  // This filters out ends that run into a coincidental gap in the text.
686  int search_top = endpt_.y();
687  int search_bottom = startpt_.y();
688  int median_gutter = IntCastRounded(gutters.median());
689  if (gutters.get_total() > 0) {
690  prev_good_box = nullptr;
691  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
692  BLOBNBOX *bbox = it.data();
693  const TBOX &box = bbox->bounding_box();
694  int mid_y = (box.top() + box.bottom()) / 2;
695  // A good box is one where the gutter width is at least some constant
696  // fraction of the mean gutter width.
697  bool left = IsLeftTab();
698  int tab_x = XAtY(mid_y);
699  int max_gutter = kGutterMultiple * mean_height;
700  if (IsRagged()) {
701  // Ragged edges face a tougher test in that the gap must always be
702  // within the height of the blob.
703  max_gutter = kGutterToNeighbourRatio * mean_height;
704  }
705  int gutter_width;
706  int neighbour_gap;
707  finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width,
708  &neighbour_gap);
709  // Now we can make the test.
710  if (gutter_width >= median_gutter * kMinGutterFraction) {
711  if (prev_good_box == nullptr) {
712  // Adjust the start to the first good box.
713  SetYStart(box.bottom());
714  search_bottom = box.top();
715  }
716  prev_good_box = &box;
717  search_top = box.bottom();
718  } else {
719  // Get rid of boxes that are not good.
720  if (debug) {
721  tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", box.left(),
722  box.bottom(), box.right(), box.top(), gutter_width, median_gutter);
723  }
724  it.extract();
725  ++num_deleted_boxes;
726  }
727  }
728  }
729  // If there has been a good box, adjust the end.
730  if (prev_good_box != nullptr) {
731  SetYEnd(prev_good_box->top());
732  // Compute the percentage of the vector that is occupied by good boxes.
733  int length = endpt_.y() - startpt_.y();
734  percent_score_ = 100 * good_length / length;
735  if (num_deleted_boxes > 0) {
736  needs_refit_ = true;
737  FitAndEvaluateIfNeeded(vertical, finder);
738  if (boxes_.empty()) {
739  return;
740  }
741  }
742  // Test the gutter over the whole vector, instead of just at the boxes.
743  int required_shift;
744  if (search_bottom > search_top) {
745  search_bottom = startpt_.y();
746  search_top = endpt_.y();
747  }
748  double min_gutter_width = kLineCountReciprocal / boxes_.length();
749  min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
750  min_gutter_width *= mean_height;
751  int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
752  if (median_gutter > max_gutter_width) {
753  max_gutter_width = median_gutter;
754  }
755  int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, text_on_image,
756  max_gutter_width, &required_shift);
757  if (gutter_width < min_gutter_width) {
758  if (debug) {
759  tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", gutter_width,
760  min_gutter_width);
761  }
762  boxes_.shallow_clear();
763  percent_score_ = 0;
764  } else if (debug) {
765  tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", gutter_width,
766  min_gutter_width, required_shift);
767  }
768  } else {
769  // There are no good boxes left, so score is 0.
770  percent_score_ = 0;
771  }
772 
773  if (debug) {
774  Print("Evaluation complete:");
775  }
776 }
const double kMinAlignedGutter
Definition: tabvector.cpp:51
int IntCastRounded(double x)
Definition: helpers.h:175
const int kGutterMultiple
Definition: tabvector.cpp:36
const double kMinRaggedGutter
Definition: tabvector.cpp:53
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116
const double kLineCountReciprocal
Definition: tabvector.cpp:49
const int kMaxFillinMultiple
Definition: tabvector.cpp:45
const double kMinGutterFraction
Definition: tabvector.cpp:47
const int kGutterToNeighbourRatio
Definition: tabvector.cpp:38
static bool WithinTestRegion(int detail_level, int x, int y)
int XAtY(int y) const
Definition: tabvector.h:181
void SetYEnd(int end_y)
Definition: tabvector.cpp:268
bool IsRagged() const
Definition: tabvector.h:221
void SetYStart(int start_y)
Definition: tabvector.cpp:263
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:566

◆ extended_ymax()

int tesseract::TabVector::extended_ymax ( ) const
inline

Definition at line 144 of file tabvector.h.

144  {
145  return extended_ymax_;
146  }

◆ extended_ymin()

int tesseract::TabVector::extended_ymin ( ) const
inline

Definition at line 147 of file tabvector.h.

147  {
148  return extended_ymin_;
149  }

◆ ExtendedOverlap()

int tesseract::TabVector::ExtendedOverlap ( int  top_y,
int  bottom_y 
) const
inline

Definition at line 200 of file tabvector.h.

200  {
201  return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
202  }

◆ ExtendToBox()

void tesseract::TabVector::ExtendToBox ( BLOBNBOX blob)

Definition at line 238 of file tabvector.cpp.

238  {
239  TBOX new_box = new_blob->bounding_box();
240  BLOBNBOX_C_IT it(&boxes_);
241  if (!it.empty()) {
242  BLOBNBOX *blob = it.data();
243  TBOX box = blob->bounding_box();
244  while (!it.at_last() && box.top() <= new_box.top()) {
245  if (blob == new_blob) {
246  return; // We have it already.
247  }
248  it.forward();
249  blob = it.data();
250  box = blob->bounding_box();
251  }
252  if (box.top() >= new_box.top()) {
253  it.add_before_stay_put(new_blob);
254  needs_refit_ = true;
255  return;
256  }
257  }
258  needs_refit_ = true;
259  it.add_after_stay_put(new_blob);
260 }

◆ Fit()

bool tesseract::TabVector::Fit ( ICOORD  vertical,
bool  force_parallel 
)

Definition at line 783 of file tabvector.cpp.

783  {
784  needs_refit_ = false;
785  if (boxes_.empty()) {
786  // Don't refit something with no boxes, as that only happens
787  // in Evaluate, and we don't want to end up with a zero vector.
788  if (!force_parallel) {
789  return false;
790  }
791  // If we are forcing parallel, then we just need to set the sort_key_.
792  ICOORD midpt = startpt_;
793  midpt += endpt_;
794  midpt /= 2;
795  sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
796  return startpt_.y() != endpt_.y();
797  }
798  if (!force_parallel && !IsRagged()) {
799  // Use a fitted line as the vertical.
800  DetLineFit linepoints;
801  BLOBNBOX_C_IT it(&boxes_);
802  // Fit a line to all the boxes in the list.
803  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
804  BLOBNBOX *bbox = it.data();
805  const TBOX &box = bbox->bounding_box();
806  int x1 = IsRightTab() ? box.right() : box.left();
807  ICOORD boxpt(x1, box.bottom());
808  linepoints.Add(boxpt);
809  if (it.at_last()) {
810  ICOORD top_pt(x1, box.top());
811  linepoints.Add(top_pt);
812  }
813  }
814  linepoints.Fit(&startpt_, &endpt_);
815  if (startpt_.y() != endpt_.y()) {
816  vertical = endpt_;
817  vertical -= startpt_;
818  }
819  }
820  int start_y = startpt_.y();
821  int end_y = endpt_.y();
822  sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX;
823  BLOBNBOX_C_IT it(&boxes_);
824  // Choose a line parallel to the vertical such that all boxes are on the
825  // correct side of it.
826  mean_width_ = 0;
827  int width_count = 0;
828  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
829  BLOBNBOX *bbox = it.data();
830  const TBOX &box = bbox->bounding_box();
831  mean_width_ += box.width();
832  ++width_count;
833  int x1 = IsRightTab() ? box.right() : box.left();
834  // Test both the bottom and the top, as one will be more extreme, depending
835  // on the direction of skew.
836  int bottom_y = box.bottom();
837  int top_y = box.top();
838  int key = SortKey(vertical, x1, bottom_y);
839  if (IsLeftTab() == (key < sort_key_)) {
840  sort_key_ = key;
841  startpt_ = ICOORD(x1, bottom_y);
842  }
843  key = SortKey(vertical, x1, top_y);
844  if (IsLeftTab() == (key < sort_key_)) {
845  sort_key_ = key;
846  startpt_ = ICOORD(x1, top_y);
847  }
848  if (it.at_first()) {
849  start_y = bottom_y;
850  }
851  if (it.at_last()) {
852  end_y = top_y;
853  }
854  }
855  if (width_count > 0) {
856  mean_width_ = (mean_width_ + width_count - 1) / width_count;
857  }
858  endpt_ = startpt_ + vertical;
859  needs_evaluation_ = true;
860  if (start_y != end_y) {
861  // Set the ends of the vector to fully include the first and last blobs.
862  startpt_.set_x(XAtY(vertical, sort_key_, start_y));
863  startpt_.set_y(start_y);
864  endpt_.set_x(XAtY(vertical, sort_key_, end_y));
865  endpt_.set_y(end_y);
866  return true;
867  }
868  return false;
869 }
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
void set_y(TDimension yin)
rewrite function
Definition: points.h:71
bool IsRightTab() const
Definition: tabvector.h:209

◆ FitAndEvaluateIfNeeded()

void tesseract::TabVector::FitAndEvaluateIfNeeded ( const ICOORD vertical,
TabFind finder 
)

Definition at line 566 of file tabvector.cpp.

566  {
567  if (needs_refit_) {
568  Fit(vertical, true);
569  }
570  if (needs_evaluation_) {
571  Evaluate(vertical, finder);
572  }
573 }
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:581
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:783

◆ FitVector()

TabVector * tesseract::TabVector::FitVector ( TabAlignment  alignment,
ICOORD  vertical,
int  extended_start_y,
int  extended_end_y,
BLOBNBOX_CLIST *  good_points,
int *  vertical_x,
int *  vertical_y 
)
static

Definition at line 176 of file tabvector.cpp.

178  {
179  auto *vector = new TabVector(extended_start_y, extended_end_y, alignment, good_points);
180  if (!vector->Fit(vertical, false)) {
181  delete vector;
182  return nullptr;
183  }
184  if (!vector->IsRagged()) {
185  vertical = vector->endpt_ - vector->startpt_;
186  int weight = vector->BoxCount();
187  *vertical_x += vertical.x() * weight;
188  *vertical_y += vertical.y() * weight;
189  }
190  return vector;
191 }

◆ Freeze()

void tesseract::TabVector::Freeze ( )
inline

Definition at line 242 of file tabvector.h.

242  {
243  boxes_.shallow_clear();
244  }

◆ GetSinglePartner()

TabVector * tesseract::TabVector::GetSinglePartner ( )

Definition at line 872 of file tabvector.cpp.

872  {
873  if (!partners_.singleton()) {
874  return nullptr;
875  }
876  TabVector_C_IT partner_it(&partners_);
877  TabVector *partner = partner_it.data();
878  return partner;
879 }

◆ intersects_other_lines()

bool tesseract::TabVector::intersects_other_lines ( ) const
inline

Definition at line 171 of file tabvector.h.

171  {
172  return intersects_other_lines_;
173  }

◆ IsAPartner()

bool tesseract::TabVector::IsAPartner ( const TabVector other)

Definition at line 503 of file tabvector.cpp.

503  {
504  TabVector_C_IT it(&partners_);
505  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
506  if (it.data() == other) {
507  return true;
508  }
509  }
510  return false;
511 }

◆ IsCenterTab()

bool tesseract::TabVector::IsCenterTab ( ) const
inline

Definition at line 217 of file tabvector.h.

217  {
218  return alignment_ == TA_CENTER_JUSTIFIED;
219  }
@ TA_CENTER_JUSTIFIED
Definition: tabvector.h:44

◆ IsLeftOf()

bool tesseract::TabVector::IsLeftOf ( const TabVector other) const
inline

Definition at line 227 of file tabvector.h.

227  {
228  return sort_key_ < other.sort_key_;
229  }

◆ IsLeftTab()

bool tesseract::TabVector::IsLeftTab ( ) const
inline

Definition at line 205 of file tabvector.h.

205  {
206  return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
207  }

◆ IsRagged()

bool tesseract::TabVector::IsRagged ( ) const
inline

Definition at line 221 of file tabvector.h.

221  {
222  return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
223  }

◆ IsRightTab()

bool tesseract::TabVector::IsRightTab ( ) const
inline

Definition at line 209 of file tabvector.h.

209  {
210  return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
211  }

◆ IsSeparator()

bool tesseract::TabVector::IsSeparator ( ) const
inline

Definition at line 213 of file tabvector.h.

213  {
214  return alignment_ == TA_SEPARATOR;
215  }
@ TA_SEPARATOR
Definition: tabvector.h:47

◆ mean_width()

int tesseract::TabVector::mean_width ( ) const
inline

Definition at line 153 of file tabvector.h.

153  {
154  return mean_width_;
155  }

◆ MergeSimilarTabVectors()

void tesseract::TabVector::MergeSimilarTabVectors ( const ICOORD vertical,
TabVector_LIST *  vectors,
BlobGrid grid 
)
static

Definition at line 352 of file tabvector.cpp.

353  {
354  TabVector_IT it1(vectors);
355  for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
356  TabVector *v1 = it1.data();
357  TabVector_IT it2(it1);
358  for (it2.forward(); !it2.at_first(); it2.forward()) {
359  TabVector *v2 = it2.data();
360  if (v2->SimilarTo(vertical, *v1, grid)) {
361  // Merge into the forward one, in case the combined vector now
362  // overlaps one in between.
363  if (textord_debug_tabfind) {
364  v2->Print("Merging");
365  v1->Print("by deleting");
366  }
367  v2->MergeWith(vertical, it1.extract());
368  if (textord_debug_tabfind) {
369  v2->Print("Producing");
370  }
371  ICOORD merged_vector = v2->endpt();
372  merged_vector -= v2->startpt();
373  if (textord_debug_tabfind && abs(merged_vector.x()) > 100) {
374  v2->Print("Garbage result of merge?");
375  }
376  break;
377  }
378  }
379  }
380 }

◆ MergeWith()

void tesseract::TabVector::MergeWith ( const ICOORD vertical,
TabVector other 
)

Definition at line 454 of file tabvector.cpp.

454  {
455  extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
456  extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
457  if (other->IsRagged()) {
458  alignment_ = other->alignment_;
459  }
460  // Merge sort the two lists of boxes.
461  BLOBNBOX_C_IT it1(&boxes_);
462  BLOBNBOX_C_IT it2(&other->boxes_);
463  while (!it2.empty()) {
464  BLOBNBOX *bbox2 = it2.extract();
465  it2.forward();
466  TBOX box2 = bbox2->bounding_box();
467  BLOBNBOX *bbox1 = it1.data();
468  TBOX box1 = bbox1->bounding_box();
469  while (box1.bottom() < box2.bottom() && !it1.at_last()) {
470  it1.forward();
471  bbox1 = it1.data();
472  box1 = bbox1->bounding_box();
473  }
474  if (box1.bottom() < box2.bottom()) {
475  it1.add_to_end(bbox2);
476  } else if (bbox1 != bbox2) {
477  it1.add_before_stay_put(bbox2);
478  }
479  }
480  Fit(vertical, true);
481  other->Delete(this);
482 }

◆ Partnerless()

bool tesseract::TabVector::Partnerless ( )
inline

Definition at line 232 of file tabvector.h.

232  {
233  return partners_.empty();
234  }

◆ partners()

TabVector_CLIST* tesseract::TabVector::partners ( )
inline

Definition at line 162 of file tabvector.h.

162  {
163  return &partners_;
164  }

◆ Print()

void tesseract::TabVector::Print ( const char *  prefix)

Definition at line 518 of file tabvector.cpp.

518  {
519  tprintf(
520  "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
521  " partners=%d\n",
522  prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(),
523  mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length());
524 }

◆ ReflectInYAxis()

void tesseract::TabVector::ReflectInYAxis ( )
inline

Definition at line 257 of file tabvector.h.

257  {
258  startpt_.set_x(-startpt_.x());
259  endpt_.set_x(-endpt_.x());
260  sort_key_ = -sort_key_;
261  if (alignment_ == TA_LEFT_ALIGNED) {
262  alignment_ = TA_RIGHT_ALIGNED;
263  } else if (alignment_ == TA_RIGHT_ALIGNED) {
264  alignment_ = TA_LEFT_ALIGNED;
265  }
266  if (alignment_ == TA_LEFT_RAGGED) {
267  alignment_ = TA_RIGHT_RAGGED;
268  } else if (alignment_ == TA_RIGHT_RAGGED) {
269  alignment_ = TA_LEFT_RAGGED;
270  }
271  }

◆ Rotate()

void tesseract::TabVector::Rotate ( const FCOORD rotation)

Definition at line 274 of file tabvector.cpp.

274  {
275  startpt_.rotate(rotation);
276  endpt_.rotate(rotation);
277  int dx = endpt_.x() - startpt_.x();
278  int dy = endpt_.y() - startpt_.y();
279  if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
280  // Need to flip start/end.
281  ICOORD tmp = startpt_;
282  startpt_ = endpt_;
283  endpt_ = tmp;
284  }
285 }
void rotate(const FCOORD &vec)
Definition: points.h:511

◆ set_bottom_constraints()

void tesseract::TabVector::set_bottom_constraints ( TabConstraint_LIST *  constraints)
inline

Definition at line 159 of file tabvector.h.

159  {
160  bottom_constraints_ = constraints;
161  }

◆ set_endpt()

void tesseract::TabVector::set_endpt ( const ICOORD end)
inline

Definition at line 168 of file tabvector.h.

168  {
169  endpt_ = end;
170  }

◆ set_intersects_other_lines()

void tesseract::TabVector::set_intersects_other_lines ( bool  value)
inline

Definition at line 174 of file tabvector.h.

174  {
175  intersects_other_lines_ = value;
176  }

◆ set_startpt()

void tesseract::TabVector::set_startpt ( const ICOORD start)
inline

Definition at line 165 of file tabvector.h.

165  {
166  startpt_ = start;
167  }

◆ set_top_constraints()

void tesseract::TabVector::set_top_constraints ( TabConstraint_LIST *  constraints)
inline

Definition at line 156 of file tabvector.h.

156  {
157  top_constraints_ = constraints;
158  }

◆ SetupConstraints()

void tesseract::TabVector::SetupConstraints ( )

Definition at line 289 of file tabvector.cpp.

289  {
290  TabConstraint::CreateConstraint(this, false);
292 }
static void CreateConstraint(TabVector *vector, bool is_top)
Definition: tabvector.cpp:63

◆ SetupPartnerConstraints() [1/2]

void tesseract::TabVector::SetupPartnerConstraints ( )

Definition at line 295 of file tabvector.cpp.

295  {
296  // With the first and last partner, we want a common bottom and top,
297  // respectively, and for each change of partner, we want a common
298  // top of first with bottom of next.
299  TabVector_C_IT it(&partners_);
300  TabVector *prev_partner = nullptr;
301  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
302  TabVector *partner = it.data();
303  if (partner->top_constraints_ == nullptr || partner->bottom_constraints_ == nullptr) {
304  partner->Print("Impossible: has no constraints");
305  Print("This vector has it as a partner");
306  continue;
307  }
308  if (prev_partner == nullptr) {
309  // This is the first partner, so common bottom.
310  if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) {
311  TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_);
312  }
313  } else {
314  // We need prev top to be common with partner bottom.
315  if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
316  partner->bottom_constraints_)) {
317  TabConstraint::MergeConstraints(prev_partner->top_constraints_,
318  partner->bottom_constraints_);
319  }
320  }
321  prev_partner = partner;
322  if (it.at_last()) {
323  // This is the last partner, so common top.
324  if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) {
325  TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_);
326  }
327  }
328  }
329 }
static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
Definition: tabvector.cpp:95
static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
Definition: tabvector.cpp:76

◆ SetupPartnerConstraints() [2/2]

void tesseract::TabVector::SetupPartnerConstraints ( TabVector partner)

Definition at line 332 of file tabvector.cpp.

332  {
333  if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) {
334  TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_);
335  }
336  if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) {
337  TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_);
338  }
339 }

◆ SetYEnd()

void tesseract::TabVector::SetYEnd ( int  end_y)

Definition at line 268 of file tabvector.cpp.

268  {
269  endpt_.set_x(XAtY(end_y));
270  endpt_.set_y(end_y);
271 }

◆ SetYStart()

void tesseract::TabVector::SetYStart ( int  start_y)

Definition at line 263 of file tabvector.cpp.

263  {
264  startpt_.set_x(XAtY(start_y));
265  startpt_.set_y(start_y);
266 }

◆ ShallowCopy()

TabVector * tesseract::TabVector::ShallowCopy ( ) const

Definition at line 225 of file tabvector.cpp.

225  {
226  auto *copy = new TabVector();
227  copy->startpt_ = startpt_;
228  copy->endpt_ = endpt_;
229  copy->alignment_ = alignment_;
230  copy->extended_ymax_ = extended_ymax_;
231  copy->extended_ymin_ = extended_ymin_;
232  copy->intersects_other_lines_ = intersects_other_lines_;
233  return copy;
234 }

◆ SimilarTo()

bool tesseract::TabVector::SimilarTo ( const ICOORD vertical,
const TabVector other,
BlobGrid grid 
) const

Definition at line 384 of file tabvector.cpp.

384  {
385  if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) {
386  // If they don't overlap, at least in extensions, then there is no chance.
387  if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) {
388  return false;
389  }
390  // A fast approximation to the scale factor of the sort_key_.
391  int v_scale = abs(vertical.y());
392  if (v_scale == 0) {
393  v_scale = 1;
394  }
395  // If they are close enough, then OK.
396  if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
397  sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) {
398  return true;
399  }
400  // Ragged tabs get a bigger threshold.
401  if (!IsRagged() || !other.IsRagged() ||
402  sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
403  sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) {
404  return false;
405  }
406  if (grid == nullptr) {
407  // There is nothing else to test!
408  return true;
409  }
410  // If there is nothing in the rectangle between the vector that is going to
411  // move, and the place it is moving to, then they can be merged.
412  // Setup a vertical search for any blob.
413  const TabVector *mover = (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other;
414  int top_y = mover->endpt_.y();
415  int bottom_y = mover->startpt_.y();
416  int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y));
417  int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y));
418  int shift = abs(sort_key_ - other.sort_key_) / v_scale;
419  if (IsRightTab()) {
420  right += shift;
421  } else {
422  left -= shift;
423  }
424 
425  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
426  vsearch.StartVerticalSearch(left, right, top_y);
427  BLOBNBOX *blob;
428  while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) {
429  const TBOX &box = blob->bounding_box();
430  if (box.top() > bottom_y) {
431  return true; // Nothing found.
432  }
433  if (box.bottom() < top_y) {
434  continue; // Doesn't overlap.
435  }
436  int left_at_box = XAtY(box.bottom());
437  int right_at_box = left_at_box;
438  if (IsRightTab()) {
439  right_at_box += shift;
440  } else {
441  left_at_box -= shift;
442  }
443  if (std::min(right_at_box, static_cast<int>(box.right())) >
444  std::max(left_at_box, static_cast<int>(box.left()))) {
445  return false;
446  }
447  }
448  return true; // Nothing found.
449  }
450  return false;
451 }
const int kSimilarVectorDist
Definition: tabvector.cpp:40
const int kSimilarRaggedDist
Definition: tabvector.cpp:43
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:200

◆ sort_key()

int tesseract::TabVector::sort_key ( ) const
inline

Definition at line 150 of file tabvector.h.

150  {
151  return sort_key_;
152  }

◆ SortKey()

static int tesseract::TabVector::SortKey ( const ICOORD vertical,
int  x,
int  y 
)
inlinestatic

Definition at line 274 of file tabvector.h.

274  {
275  ICOORD pt(x, y);
276  return pt * vertical;
277  }

◆ SortVectorsByKey()

static int tesseract::TabVector::SortVectorsByKey ( const void *  v1,
const void *  v2 
)
inlinestatic

Definition at line 289 of file tabvector.h.

289  {
290  const TabVector *tv1 = *static_cast<const TabVector *const *>(v1);
291  const TabVector *tv2 = *static_cast<const TabVector *const *>(v2);
292  return tv1->sort_key_ - tv2->sort_key_;
293  }

◆ startpt()

const ICOORD& tesseract::TabVector::startpt ( ) const
inline

Definition at line 138 of file tabvector.h.

138  {
139  return startpt_;
140  }

◆ VerticalTextlinePartner()

TabVector * tesseract::TabVector::VerticalTextlinePartner ( )

Definition at line 883 of file tabvector.cpp.

883  {
884  if (!partners_.singleton()) {
885  return nullptr;
886  }
887  TabVector_C_IT partner_it(&partners_);
888  TabVector *partner = partner_it.data();
889  BLOBNBOX_C_IT box_it1(&boxes_);
890  BLOBNBOX_C_IT box_it2(&partner->boxes_);
891  // Count how many boxes are also in the other list.
892  // At the same time, gather the mean width and median vertical gap.
893  if (textord_debug_tabfind > 1) {
894  Print("Testing for vertical text");
895  partner->Print(" partner");
896  }
897  int num_matched = 0;
898  int num_unmatched = 0;
899  int total_widths = 0;
900  int width = startpt().x() - partner->startpt().x();
901  if (width < 0) {
902  width = -width;
903  }
904  STATS gaps(0, width * 2);
905  BLOBNBOX *prev_bbox = nullptr;
906  box_it2.mark_cycle_pt();
907  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
908  BLOBNBOX *bbox = box_it1.data();
909  TBOX box = bbox->bounding_box();
910  if (prev_bbox != nullptr) {
911  gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
912  }
913  while (!box_it2.cycled_list() && box_it2.data() != bbox &&
914  box_it2.data()->bounding_box().bottom() < box.bottom()) {
915  box_it2.forward();
916  }
917  if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN &&
918  (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN)) {
919  ++num_matched;
920  } else {
921  ++num_unmatched;
922  }
923  total_widths += box.width();
924  prev_bbox = bbox;
925  }
926  if (num_unmatched + num_matched == 0) {
927  return nullptr;
928  }
929  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
930  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
931  int min_box_match =
932  static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio);
933  bool is_vertical =
934  (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap);
935  if (textord_debug_tabfind > 1) {
936  tprintf(
937  "gaps=%d, matched=%d, unmatched=%d, min_match=%d "
938  "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
939  gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width,
940  max_gap, is_vertical ? "Yes" : "No");
941  }
942  return (is_vertical) ? partner : nullptr;
943 }
double textord_tabvector_vertical_gap_fraction
Definition: tabvector.cpp:57
@ BRT_UNKNOWN
Definition: blobbox.h:80
double textord_tabvector_vertical_box_ratio
Definition: tabvector.cpp:60
const ICOORD & startpt() const
Definition: tabvector.h:138

◆ VOverlap() [1/2]

int tesseract::TabVector::VOverlap ( const TabVector other) const
inline

Definition at line 191 of file tabvector.h.

191  {
192  return std::min(other.endpt_.y(), endpt_.y()) - std::max(other.startpt_.y(), startpt_.y());
193  }

◆ VOverlap() [2/2]

int tesseract::TabVector::VOverlap ( int  top_y,
int  bottom_y 
) const
inline

Definition at line 195 of file tabvector.h.

195  {
196  return std::min(top_y, static_cast<int>(endpt_.y())) -
197  std::max(bottom_y, static_cast<int>(startpt_.y()));
198  }

◆ XAtY() [1/2]

static int tesseract::TabVector::XAtY ( const ICOORD vertical,
int  sort_key,
int  y 
)
inlinestatic

Definition at line 280 of file tabvector.h.

280  {
281  if (vertical.y() != 0) {
282  return (vertical.x() * y + sort_key) / vertical.y();
283  } else {
284  return sort_key;
285  }
286  }
int sort_key() const
Definition: tabvector.h:150

◆ XAtY() [2/2]

int tesseract::TabVector::XAtY ( int  y) const
inline

Definition at line 181 of file tabvector.h.

181  {
182  int height = endpt_.y() - startpt_.y();
183  if (height != 0) {
184  return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + startpt_.x();
185  } else {
186  return startpt_.x();
187  }
188  }

◆ XYFlip()

void tesseract::TabVector::XYFlip ( )
inline

Definition at line 247 of file tabvector.h.

247  {
248  int x = startpt_.y();
249  startpt_.set_y(startpt_.x());
250  startpt_.set_x(x);
251  x = endpt_.y();
252  endpt_.set_y(endpt_.x());
253  endpt_.set_x(x);
254  }

The documentation for this class was generated from the following files: