tesseract  5.0.0
tesseract::BaselineBlock Class Reference

#include <baselinedetect.h>

Public Member Functions

 BaselineBlock (int debug_level, bool non_text, TO_BLOCK *block)
 
 ~BaselineBlock ()
 
TO_BLOCKblock () const
 
double skew_angle () const
 
bool FitBaselinesAndFindSkew (bool use_box_bottoms)
 
void ParallelizeBaselines (double default_block_skew)
 
void SetupBlockParameters () const
 
void PrepareForSplineFitting (ICOORD page_tr, bool remove_noise)
 
void FitBaselineSplines (bool enable_splines, bool show_final_rows, Textord *textord)
 
void DrawFinalRows (const ICOORD &page_tr)
 
void DrawPixSpline (Image pix_in)
 

Static Public Member Functions

static double SpacingModelError (double perp_disp, double line_spacing, double line_offset)
 

Detailed Description

Definition at line 125 of file baselinedetect.h.

Constructor & Destructor Documentation

◆ BaselineBlock()

tesseract::BaselineBlock::BaselineBlock ( int  debug_level,
bool  non_text,
TO_BLOCK block 
)

Definition at line 418 of file baselinedetect.cpp.

419  : block_(block),
420  debug_level_(debug_level),
421  non_text_block_(non_text),
422  good_skew_angle_(false),
423  skew_angle_(0.0),
424  line_spacing_(block->line_spacing),
425  line_offset_(0.0),
426  model_error_(0.0) {
427  TO_ROW_IT row_it(block_->get_rows());
428  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
429  // Sort the blobs on the rows.
430  row_it.data()->blob_list()->sort(blob_x_order);
431  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
432  }
433 }
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2540
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
TO_BLOCK * block() const

◆ ~BaselineBlock()

tesseract::BaselineBlock::~BaselineBlock ( )
inline

Definition at line 129 of file baselinedetect.h.

129  {
130  for (auto row : rows_) {
131  delete row;
132  }
133  }

Member Function Documentation

◆ block()

TO_BLOCK* tesseract::BaselineBlock::block ( ) const
inline

Definition at line 135 of file baselinedetect.h.

135  {
136  return block_;
137  }

◆ DrawFinalRows()

void tesseract::BaselineBlock::DrawFinalRows ( const ICOORD page_tr)

Definition at line 606 of file baselinedetect.cpp.

606  {
607  if (non_text_block_) {
608  return;
609  }
610  double gradient = tan(skew_angle_);
611  FCOORD rotation(1.0f, 0.0f);
612  int left_edge = block_->block->pdblk.bounding_box().left();
613  ScrollView *win = create_to_win(page_tr);
615  TO_ROW_IT row_it = block_->get_rows();
616  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
617  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
618  colour = static_cast<ScrollView::Color>(colour + 1);
619  if (colour > ScrollView::MAGENTA) {
620  colour = ScrollView::RED;
621  }
622  }
624  // Show discarded blobs.
627  if (block_->blobs.length() > 0) {
628  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
629  }
630  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
631 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1071
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:122
void draw_meanlines(TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:203
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
BLOBNBOX_LIST underlines
Definition: blobbox.h:777
BLOBNBOX_LIST blobs
Definition: blobbox.h:776
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67

◆ DrawPixSpline()

void tesseract::BaselineBlock::DrawPixSpline ( Image  pix_in)

Definition at line 635 of file baselinedetect.cpp.

635  {
636  if (non_text_block_) {
637  return;
638  }
639  TO_ROW_IT row_it = block_->get_rows();
640  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
641  row_it.data()->baseline.plot(pix_in);
642  }
643 }

◆ FitBaselinesAndFindSkew()

bool tesseract::BaselineBlock::FitBaselinesAndFindSkew ( bool  use_box_bottoms)

Definition at line 449 of file baselinedetect.cpp.

449  {
450  if (non_text_block_) {
451  return false;
452  }
453  std::vector<double> angles;
454  for (auto row : rows_) {
455  if (row->FitBaseline(use_box_bottoms)) {
456  double angle = row->BaselineAngle();
457  angles.push_back(angle);
458  }
459  if (debug_level_ > 1) {
460  row->Print();
461  }
462  }
463 
464  if (!angles.empty()) {
465  skew_angle_ = MedianOfCircularValues(M_PI, angles);
466  good_skew_angle_ = true;
467  } else {
468  skew_angle_ = 0.0f;
469  good_skew_angle_ = false;
470  }
471  if (debug_level_ > 0) {
472  tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_,
473  good_skew_angle_);
474  }
475  return good_skew_angle_;
476 }
T MedianOfCircularValues(T modulus, std::vector< T > &v)
Definition: linlsq.h:117

◆ FitBaselineSplines()

void tesseract::BaselineBlock::FitBaselineSplines ( bool  enable_splines,
bool  show_final_rows,
Textord textord 
)

Definition at line 575 of file baselinedetect.cpp.

576  {
577  double gradient = tan(skew_angle_);
578  FCOORD rotation(1.0f, 0.0f);
579 
580  if (enable_splines) {
581  textord->make_spline_rows(block_, gradient, show_final_rows);
582  } else {
583  // Make a fake spline from the existing line.
584  TBOX block_box = block_->block->pdblk.bounding_box();
585  TO_ROW_IT row_it = block_->get_rows();
586  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
587  TO_ROW *row = row_it.data();
588  int32_t xstarts[2] = {block_box.left(), block_box.right()};
589  double coeffs[3] = {0.0, row->line_m(), row->line_c()};
590  row->baseline = QSPLINE(1, xstarts, coeffs);
591  textord->compute_row_xheight(row, block_->block->classify_rotation(),
592  row->line_m(), block_->line_size);
593  }
594  }
595  textord->compute_block_xheight(block_, gradient);
596  block_->block->set_xheight(block_->xheight);
597  if (textord_restore_underlines) { // fix underlines
598  restore_underlined_blobs(block_);
599  }
600 }
@ TBOX
bool textord_restore_underlines
Definition: underlin.cpp:24
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:32
FCOORD classify_rotation() const
Definition: ocrblock.h:135
void set_xheight(int32_t height)
set char size
Definition: ocrblock.h:63

◆ ParallelizeBaselines()

void tesseract::BaselineBlock::ParallelizeBaselines ( double  default_block_skew)

Definition at line 480 of file baselinedetect.cpp.

480  {
481  if (non_text_block_) {
482  return;
483  }
484  if (!good_skew_angle_) {
485  skew_angle_ = default_block_skew;
486  }
487  if (debug_level_ > 0) {
488  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
489  }
490  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
491  for (auto row : rows_) {
492  row->AdjustBaselineToParallel(debug_level_, direction);
493  if (debug_level_ > 1) {
494  row->Print();
495  }
496  }
497  if (rows_.size() < 3 || !ComputeLineSpacing()) {
498  return;
499  }
500  // Enforce the line spacing model on all lines that don't yet have a good
501  // baseline.
502  // Start by finding the row that is best fitted to the model.
503  unsigned best_row = 0;
504  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
505  line_spacing_, line_offset_);
506  for (unsigned r = 1; r < rows_.size(); ++r) {
507  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
508  line_spacing_, line_offset_);
509  if (error < best_error) {
510  best_error = error;
511  best_row = r;
512  }
513  }
514  // Starting at the best fitting row, work outwards, syncing the offset.
515  double offset = line_offset_;
516  for (auto r = best_row + 1; r < rows_.size(); ++r) {
517  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
518  line_spacing_, offset);
519  }
520  offset = line_offset_;
521  for (int r = best_row - 1; r >= 0; --r) {
522  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
523  line_spacing_, offset);
524  }
525 }
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)

◆ PrepareForSplineFitting()

void tesseract::BaselineBlock::PrepareForSplineFitting ( ICOORD  page_tr,
bool  remove_noise 
)

Definition at line 557 of file baselinedetect.cpp.

557  {
558  if (non_text_block_) {
559  return;
560  }
561  if (remove_noise) {
562  vigorous_noise_removal(block_);
563  }
564  FCOORD rotation(1.0f, 0.0f);
565  double gradient = tan(skew_angle_);
566  separate_underlines(block_, gradient, rotation, true);
567  pre_associate_blobs(page_tr, block_, rotation, true);
568 }
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1846
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:508
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1781

◆ SetupBlockParameters()

void tesseract::BaselineBlock::SetupBlockParameters ( ) const

Definition at line 528 of file baselinedetect.cpp.

528  {
529  if (line_spacing_ > 0.0) {
530  // Where was block_line_spacing set before?
531  float min_spacing =
532  std::min(block_->line_spacing, static_cast<float>(line_spacing_));
533  if (min_spacing < block_->line_size) {
534  block_->line_size = min_spacing;
535  }
536  block_->line_spacing = line_spacing_;
537  block_->baseline_offset = line_offset_;
538  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
539  }
540  // Setup the parameters on all the rows.
541  TO_ROW_IT row_it(block_->get_rows());
542  for (unsigned r = 0; r < rows_.size(); ++r, row_it.forward()) {
543  BaselineRow *row = rows_[r];
544  TO_ROW *to_row = row_it.data();
545  row->SetupOldLineParameters(to_row);
546  }
547 }
const double kMaxBlobSizeMultiple
float baseline_offset
Definition: blobbox.h:791

◆ skew_angle()

double tesseract::BaselineBlock::skew_angle ( ) const
inline

Definition at line 138 of file baselinedetect.h.

138  {
139  return skew_angle_;
140  }

◆ SpacingModelError()

double tesseract::BaselineBlock::SpacingModelError ( double  perp_disp,
double  line_spacing,
double  line_offset 
)
static

Definition at line 437 of file baselinedetect.cpp.

438  {
439  // Round to the nearest multiple of line_spacing + line offset.
440  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
441  double model_y = line_spacing * multiple + line_offset;
442  return fabs(perp_disp - model_y);
443 }
int IntCastRounded(double x)
Definition: helpers.h:175

The documentation for this class was generated from the following files: