tesseract  5.0.0
baselinedetect.cpp
Go to the documentation of this file.
1 // File: baselinedetect.cpp
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #define _USE_MATH_DEFINES // for M_PI
20 
21 #ifdef HAVE_CONFIG_H
22 # include "config_auto.h"
23 #endif
24 
25 #include "baselinedetect.h"
26 
27 #include <allheaders.h>
28 #include <algorithm>
29 #include <cfloat> // for FLT_MAX
30 #include <cmath> // for M_PI
31 #include "blobbox.h"
32 #include "detlinefit.h"
33 #include "drawtord.h"
34 #include "helpers.h"
35 #include "linlsq.h"
36 #include "makerow.h"
37 #include "textord.h"
38 #include "tprintf.h"
39 #include "underlin.h"
40 
41 // Number of displacement modes kept in displacement_modes_;
42 const int kMaxDisplacementsModes = 3;
43 // Number of points to skip when retrying initial fit.
44 const int kNumSkipPoints = 3;
45 // Max angle deviation (in radians) allowed to keep the independent baseline.
46 const double kMaxSkewDeviation = 1.0 / 64;
47 // Fraction of line spacing estimate for quantization of blob displacements.
48 const double kOffsetQuantizationFactor = 3.0 / 64;
49 // Fraction of line spacing estimate for computing blob fit error.
50 const double kFitHalfrangeFactor = 6.0 / 64;
51 // Max fraction of line spacing allowed before a baseline counts as badly
52 // fitting.
53 const double kMaxBaselineError = 3.0 / 64;
54 // Multiple of linespacing that sets max_blob_size in TO_BLOCK.
55 // Copied from textord_excess_blobsize.
56 const double kMaxBlobSizeMultiple = 1.3;
57 // Min fraction of linespacing gaps that should be close to the model before
58 // we will force the linespacing model on all the lines.
59 const double kMinFittingLinespacings = 0.25;
60 // A y-coordinate within a textline that is to be debugged.
61 //#define kDebugYCoord 1525
62 
63 namespace tesseract {
64 
65 BaselineRow::BaselineRow(double line_spacing, TO_ROW *to_row)
66  : blobs_(to_row->blob_list()),
67  baseline_pt1_(0.0f, 0.0f),
68  baseline_pt2_(0.0f, 0.0f),
69  baseline_error_(0.0),
70  good_baseline_(false) {
71  ComputeBoundingBox();
72  // Compute a scale factor for rounding to ints.
73  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
74  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
75  max_baseline_error_ = kMaxBaselineError * line_spacing;
76 }
77 
78 // Sets the TO_ROW with the output straight line.
80  // TODO(rays) get rid of this when m and c are no longer used.
81  double gradient = tan(BaselineAngle());
82  // para_c is the actual intercept of the baseline on the y-axis.
83  float para_c = StraightYAtX(0.0);
84  row->set_line(gradient, para_c, baseline_error_);
85  row->set_parallel_line(gradient, para_c, baseline_error_);
86 }
87 
88 // Outputs diagnostic information.
89 void BaselineRow::Print() const {
90  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
91  baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(),
92  baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0));
93  tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_,
94  baseline_error_, good_baseline_);
95  bounding_box_.print();
96 }
97 
98 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
100  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
101  double angle = baseline_dir.angle();
102  // Baseline directions are only unique in a range of pi so constrain to
103  // [-pi/2, pi/2].
104  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
105 }
106 
107 // Computes and returns the linespacing at the middle of the overlap
108 // between this and other.
109 double BaselineRow::SpaceBetween(const BaselineRow &other) const {
110  // Find the x-centre of overlap of the lines.
111  float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
112  std::min(bounding_box_.right(), other.bounding_box_.right())) /
113  2.0f;
114  // Find the vertical centre between them.
115  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
116  // Find the perpendicular distance of (x,y) from each line.
117  FCOORD pt(x, y);
118  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
119 }
120 
121 // Computes and returns the displacement of the center of the line
122 // perpendicular to the given direction.
123 double BaselineRow::PerpDisp(const FCOORD &direction) const {
124  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
125  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
126  return direction * middle_pos / direction.length();
127 }
128 
129 // Computes the y coordinate at the given x using the straight baseline
130 // defined by baseline_pt1_ and baseline_pt2__.
131 double BaselineRow::StraightYAtX(double x) const {
132  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
133  if (denominator == 0.0) {
134  return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
135  }
136  return baseline_pt1_.y() + (x - baseline_pt1_.x()) *
137  (baseline_pt2_.y() - baseline_pt1_.y()) /
138  denominator;
139 }
140 
141 // Fits a straight baseline to the points. Returns true if it had enough
142 // points to be reasonably sure of the fitted baseline.
143 // If use_box_bottoms is false, baselines positions are formed by
144 // considering the outlines of the blobs.
145 bool BaselineRow::FitBaseline(bool use_box_bottoms) {
146  // Deterministic fitting is used wherever possible.
147  fitter_.Clear();
148  // Linear least squares is a backup if the DetLineFit produces a bad line.
149  LLSQ llsq;
150  BLOBNBOX_IT blob_it(blobs_);
151 
152  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
153  BLOBNBOX *blob = blob_it.data();
154  if (!use_box_bottoms) {
155  blob->EstimateBaselinePosition();
156  }
157  const TBOX &box = blob->bounding_box();
158  int x_middle = (box.left() + box.right()) / 2;
159 #ifdef kDebugYCoord
160  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
161  tprintf("Box bottom = %d, baseline pos=%d for box at:", box.bottom(),
162  blob->baseline_position());
163  box.print();
164  }
165 #endif
166  fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
167  llsq.add(x_middle, blob->baseline_position());
168  }
169  // Fit the line.
170  ICOORD pt1, pt2;
171  baseline_error_ = fitter_.Fit(&pt1, &pt2);
172  baseline_pt1_ = pt1;
173  baseline_pt2_ = pt2;
174  if (baseline_error_ > max_baseline_error_ &&
176  // The fit was bad but there were plenty of points, so try skipping
177  // the first and last few, and use the new line if it dramatically improves
178  // the error of fit.
179  double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
180  if (error < baseline_error_ / 2.0) {
181  baseline_error_ = error;
182  baseline_pt1_ = pt1;
183  baseline_pt2_ = pt2;
184  }
185  }
186  int debug = 0;
187 #ifdef kDebugYCoord
188  Print();
189  debug = bounding_box_.bottom() < kDebugYCoord &&
190  bounding_box_.top() > kDebugYCoord
191  ? 3
192  : 2;
193 #endif
194  // Now we obtained a direction from that fit, see if we can improve the
195  // fit using the same direction and some other start point.
196  FCOORD direction(pt2 - pt1);
197  double target_offset = direction * pt1;
198  good_baseline_ = false;
199  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
200  // Wild lines can be produced because DetLineFit allows vertical lines, but
201  // vertical text has been rotated so angles over pi/4 should be disallowed.
202  // Near vertical lines can still be produced by vertically aligned components
203  // on very short lines.
204  double angle = BaselineAngle();
205  if (fabs(angle) > M_PI * 0.25) {
206  // Use the llsq fit as a backup.
207  baseline_pt1_ = llsq.mean_point();
208  baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
209  // TODO(rays) get rid of this when m and c are no longer used.
210  double m = llsq.m();
211  double c = llsq.c(m);
212  baseline_error_ = llsq.rms(m, c);
213  good_baseline_ = false;
214  }
215  return good_baseline_;
216 }
217 
218 // Modifies an existing result of FitBaseline to be parallel to the given
219 // direction vector if that produces a better result.
220 void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) {
221  SetupBlobDisplacements(direction);
222  if (displacement_modes_.empty()) {
223  return;
224  }
225 #ifdef kDebugYCoord
226  if (bounding_box_.bottom() < kDebugYCoord &&
227  bounding_box_.top() > kDebugYCoord && debug < 3)
228  debug = 3;
229 #endif
230  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
231 }
232 
233 // Modifies the baseline to snap to the textline grid if the existing
234 // result is not good enough.
235 double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction,
236  double line_spacing,
237  double line_offset) {
238  if (blobs_->empty()) {
239  if (debug > 1) {
240  tprintf("Row empty at:");
241  bounding_box_.print();
242  }
243  return line_offset;
244  }
245  // Find the displacement_modes_ entry nearest to the grid.
246  double best_error = 0.0;
247  int best_index = -1;
248  for (unsigned i = 0; i < displacement_modes_.size(); ++i) {
249  double blob_y = displacement_modes_[i];
250  double error =
251  BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset);
252  if (debug > 1) {
253  tprintf("Mode at %g has error %g from model \n", blob_y, error);
254  }
255  if (best_index < 0 || error < best_error) {
256  best_error = error;
257  best_index = i;
258  }
259  }
260  // We will move the baseline only if the chosen mode is close enough to the
261  // model.
262  double model_margin = max_baseline_error_ - best_error;
263  if (best_index >= 0 && model_margin > 0.0) {
264  // But if the current baseline is already close to the mode there is no
265  // point, and only the potential to damage accuracy by changing its angle.
266  double perp_disp = PerpDisp(direction);
267  double shift = displacement_modes_[best_index] - perp_disp;
268  if (fabs(shift) > max_baseline_error_) {
269  if (debug > 1) {
270  tprintf("Attempting linespacing model fit with mode %g to row at:",
271  displacement_modes_[best_index]);
272  bounding_box_.print();
273  }
274  FitConstrainedIfBetter(debug, direction, model_margin,
275  displacement_modes_[best_index]);
276  } else if (debug > 1) {
277  tprintf("Linespacing model only moves current line by %g for row at:",
278  shift);
279  bounding_box_.print();
280  }
281  } else if (debug > 1) {
282  tprintf("Linespacing model not close enough to any mode for row at:");
283  bounding_box_.print();
284  }
285  return fmod(PerpDisp(direction), line_spacing);
286 }
287 
288 // Sets up displacement_modes_ with the top few modes of the perpendicular
289 // distance of each blob from the given direction vector, after rounding.
290 void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
291  // Set of perpendicular displacements of the blob bottoms from the required
292  // baseline direction.
293  std::vector<double> perp_blob_dists;
294  displacement_modes_.clear();
295  // Gather the skew-corrected position of every blob.
296  double min_dist = FLT_MAX;
297  double max_dist = -FLT_MAX;
298  BLOBNBOX_IT blob_it(blobs_);
299 #ifdef kDebugYCoord
300  bool debug = false;
301 #endif
302  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
303  BLOBNBOX *blob = blob_it.data();
304  const TBOX &box = blob->bounding_box();
305 #ifdef kDebugYCoord
306  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord)
307  debug = true;
308 #endif
309  FCOORD blob_pos((box.left() + box.right()) / 2.0f,
310  blob->baseline_position());
311  double offset = direction * blob_pos;
312  perp_blob_dists.push_back(offset);
313 #ifdef kDebugYCoord
314  if (debug) {
315  tprintf("Displacement %g for blob at:", offset);
316  box.print();
317  }
318 #endif
319  UpdateRange(offset, &min_dist, &max_dist);
320  }
321  // Set up a histogram using disp_quant_factor_ as the bucket size.
322  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
323  IntCastRounded(max_dist / disp_quant_factor_) + 1);
324  for (double perp_blob_dist : perp_blob_dists) {
325  dist_stats.add(IntCastRounded(perp_blob_dist / disp_quant_factor_), 1);
326  }
327  std::vector<KDPairInc<float, int>> scaled_modes;
328  dist_stats.top_n_modes(kMaxDisplacementsModes, scaled_modes);
329 #ifdef kDebugYCoord
330  if (debug) {
331  for (int i = 0; i < scaled_modes.size(); ++i) {
332  tprintf("Top mode = %g * %d\n", scaled_modes[i].key * disp_quant_factor_,
333  scaled_modes[i].data());
334  }
335  }
336 #endif
337  for (auto &scaled_mode : scaled_modes) {
338  displacement_modes_.push_back(disp_quant_factor_ * scaled_mode.key());
339  }
340 }
341 
342 // Fits a line in the given direction to blobs that are close to the given
343 // target_offset perpendicular displacement from the direction. The fit
344 // error is allowed to be cheat_allowance worse than the existing fit, and
345 // will still be used.
346 // If cheat_allowance > 0, the new fit will be good and replace the current
347 // fit if it has better fit (with cheat) OR its error is below
348 // max_baseline_error_ and the old fit is marked bad.
349 // Otherwise the new fit will only replace the old if it is really better,
350 // or the old fit is marked bad and the new fit has sufficient points, as
351 // well as being within the max_baseline_error_.
352 void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction,
353  double cheat_allowance,
354  double target_offset) {
355  double halfrange = fit_halfrange_ * direction.length();
356  double min_dist = target_offset - halfrange;
357  double max_dist = target_offset + halfrange;
358  ICOORD line_pt;
359  double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
360  debug > 2, &line_pt);
361  // Allow cheat_allowance off the new error
362  new_error -= cheat_allowance;
363  double old_angle = BaselineAngle();
364  double new_angle = direction.angle();
365  if (debug > 1) {
366  tprintf("Constrained error = %g, original = %g", new_error,
367  baseline_error_);
368  tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle,
369  new_angle, new_angle - old_angle, kMaxSkewDeviation);
370  }
371  bool new_good_baseline =
372  new_error <= max_baseline_error_ &&
373  (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
374  // The new will replace the old if any are true:
375  // 1. the new error is better
376  // 2. the old is NOT good, but the new is
377  // 3. there is a wild angular difference between them (assuming that the new
378  // is a better guess at the angle.)
379  if (new_error <= baseline_error_ || (!good_baseline_ && new_good_baseline) ||
380  fabs(new_angle - old_angle) > kMaxSkewDeviation) {
381  baseline_error_ = new_error;
382  baseline_pt1_ = line_pt;
383  baseline_pt2_ = baseline_pt1_ + direction;
384  good_baseline_ = new_good_baseline;
385  if (debug > 1) {
386  tprintf("Replacing with constrained baseline, good = %d\n",
387  good_baseline_);
388  }
389  } else if (debug > 1) {
390  tprintf("Keeping old baseline\n");
391  }
392 }
393 
394 // Returns the perpendicular distance of the point from the straight
395 // baseline.
396 float BaselineRow::PerpDistanceFromBaseline(const FCOORD &pt) const {
397  FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
398  FCOORD offset_vector(pt - baseline_pt1_);
399  float distance = baseline_vector * offset_vector;
400  float sqlength = baseline_vector.sqlength();
401  if (sqlength == 0.0f) {
402  tprintf("unexpected baseline vector (0,0)\n");
403  return 0.0f;
404  }
405  return std::sqrt(distance * distance / sqlength);
406 }
407 
408 // Computes the bounding box of the row.
409 void BaselineRow::ComputeBoundingBox() {
410  BLOBNBOX_IT it(blobs_);
411  TBOX box;
412  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
413  box += it.data()->bounding_box();
414  }
415  bounding_box_ = box;
416 }
417 
418 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
419  : block_(block),
420  debug_level_(debug_level),
421  non_text_block_(non_text),
422  good_skew_angle_(false),
423  skew_angle_(0.0),
424  line_spacing_(block->line_spacing),
425  line_offset_(0.0),
426  model_error_(0.0) {
427  TO_ROW_IT row_it(block_->get_rows());
428  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
429  // Sort the blobs on the rows.
430  row_it.data()->blob_list()->sort(blob_x_order);
431  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
432  }
433 }
434 
435 // Computes and returns the absolute error of the given perp_disp from the
436 // given linespacing model.
437 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
438  double line_offset) {
439  // Round to the nearest multiple of line_spacing + line offset.
440  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
441  double model_y = line_spacing * multiple + line_offset;
442  return fabs(perp_disp - model_y);
443 }
444 
445 // Fits straight line baselines and computes the skew angle from the
446 // median angle. Returns true if a good angle is found.
447 // If use_box_bottoms is false, baseline positions are formed by
448 // considering the outlines of the blobs.
449 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
450  if (non_text_block_) {
451  return false;
452  }
453  std::vector<double> angles;
454  for (auto row : rows_) {
455  if (row->FitBaseline(use_box_bottoms)) {
456  double angle = row->BaselineAngle();
457  angles.push_back(angle);
458  }
459  if (debug_level_ > 1) {
460  row->Print();
461  }
462  }
463 
464  if (!angles.empty()) {
465  skew_angle_ = MedianOfCircularValues(M_PI, angles);
466  good_skew_angle_ = true;
467  } else {
468  skew_angle_ = 0.0f;
469  good_skew_angle_ = false;
470  }
471  if (debug_level_ > 0) {
472  tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_,
473  good_skew_angle_);
474  }
475  return good_skew_angle_;
476 }
477 
478 // Refits the baseline to a constrained angle, using the stored block
479 // skew if good enough, otherwise the supplied default skew.
480 void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
481  if (non_text_block_) {
482  return;
483  }
484  if (!good_skew_angle_) {
485  skew_angle_ = default_block_skew;
486  }
487  if (debug_level_ > 0) {
488  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
489  }
490  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
491  for (auto row : rows_) {
492  row->AdjustBaselineToParallel(debug_level_, direction);
493  if (debug_level_ > 1) {
494  row->Print();
495  }
496  }
497  if (rows_.size() < 3 || !ComputeLineSpacing()) {
498  return;
499  }
500  // Enforce the line spacing model on all lines that don't yet have a good
501  // baseline.
502  // Start by finding the row that is best fitted to the model.
503  unsigned best_row = 0;
504  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
505  line_spacing_, line_offset_);
506  for (unsigned r = 1; r < rows_.size(); ++r) {
507  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
508  line_spacing_, line_offset_);
509  if (error < best_error) {
510  best_error = error;
511  best_row = r;
512  }
513  }
514  // Starting at the best fitting row, work outwards, syncing the offset.
515  double offset = line_offset_;
516  for (auto r = best_row + 1; r < rows_.size(); ++r) {
517  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
518  line_spacing_, offset);
519  }
520  offset = line_offset_;
521  for (int r = best_row - 1; r >= 0; --r) {
522  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
523  line_spacing_, offset);
524  }
525 }
526 
527 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
529  if (line_spacing_ > 0.0) {
530  // Where was block_line_spacing set before?
531  float min_spacing =
532  std::min(block_->line_spacing, static_cast<float>(line_spacing_));
533  if (min_spacing < block_->line_size) {
534  block_->line_size = min_spacing;
535  }
536  block_->line_spacing = line_spacing_;
537  block_->baseline_offset = line_offset_;
538  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
539  }
540  // Setup the parameters on all the rows.
541  TO_ROW_IT row_it(block_->get_rows());
542  for (unsigned r = 0; r < rows_.size(); ++r, row_it.forward()) {
543  BaselineRow *row = rows_[r];
544  TO_ROW *to_row = row_it.data();
545  row->SetupOldLineParameters(to_row);
546  }
547 }
548 
549 // Processing that is required before fitting baseline splines, but requires
550 // linear baselines in order to be successful:
551 // Removes noise if required
552 // Separates out underlines
553 // Pre-associates blob fragments.
554 // TODO(rays/joeliu) This entire section of code is inherited from the past
555 // and could be improved/eliminated.
556 // page_tr is used to size a debug window.
557 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
558  if (non_text_block_) {
559  return;
560  }
561  if (remove_noise) {
562  vigorous_noise_removal(block_);
563  }
564  FCOORD rotation(1.0f, 0.0f);
565  double gradient = tan(skew_angle_);
566  separate_underlines(block_, gradient, rotation, true);
567  pre_associate_blobs(page_tr, block_, rotation, true);
568 }
569 
570 // Fits splines to the textlines, or creates fake QSPLINES from the straight
571 // baselines that are already on the TO_ROWs.
572 // As a side-effect, computes the xheights of the rows and the block.
573 // Although x-height estimation is conceptually separate, it is part of
574 // detecting perspective distortion and therefore baseline fitting.
575 void BaselineBlock::FitBaselineSplines(bool enable_splines,
576  bool show_final_rows, Textord *textord) {
577  double gradient = tan(skew_angle_);
578  FCOORD rotation(1.0f, 0.0f);
579 
580  if (enable_splines) {
581  textord->make_spline_rows(block_, gradient, show_final_rows);
582  } else {
583  // Make a fake spline from the existing line.
584  TBOX block_box = block_->block->pdblk.bounding_box();
585  TO_ROW_IT row_it = block_->get_rows();
586  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
587  TO_ROW *row = row_it.data();
588  int32_t xstarts[2] = {block_box.left(), block_box.right()};
589  double coeffs[3] = {0.0, row->line_m(), row->line_c()};
590  row->baseline = QSPLINE(1, xstarts, coeffs);
591  textord->compute_row_xheight(row, block_->block->classify_rotation(),
592  row->line_m(), block_->line_size);
593  }
594  }
595  textord->compute_block_xheight(block_, gradient);
596  block_->block->set_xheight(block_->xheight);
597  if (textord_restore_underlines) { // fix underlines
598  restore_underlined_blobs(block_);
599  }
600 }
601 
602 #ifndef GRAPHICS_DISABLED
603 
604 // Draws the (straight) baselines and final blobs colored according to
605 // what was discarded as noise and what is associated with each row.
606 void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) {
607  if (non_text_block_) {
608  return;
609  }
610  double gradient = tan(skew_angle_);
611  FCOORD rotation(1.0f, 0.0f);
612  int left_edge = block_->block->pdblk.bounding_box().left();
613  ScrollView *win = create_to_win(page_tr);
615  TO_ROW_IT row_it = block_->get_rows();
616  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
617  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
618  colour = static_cast<ScrollView::Color>(colour + 1);
619  if (colour > ScrollView::MAGENTA) {
620  colour = ScrollView::RED;
621  }
622  }
624  // Show discarded blobs.
627  if (block_->blobs.length() > 0) {
628  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
629  }
630  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
631 }
632 
633 #endif // !GRAPHICS_DISABLED
634 
636  if (non_text_block_) {
637  return;
638  }
639  TO_ROW_IT row_it = block_->get_rows();
640  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
641  row_it.data()->baseline.plot(pix_in);
642  }
643 }
644 
645 // Top-level line-spacing calculation. Computes an estimate of the line-
646 // spacing, using the current baselines in the TO_ROWS of the block, and
647 // then refines it by fitting a regression line to the baseline positions
648 // as a function of their integer index.
649 // Returns true if it seems that the model is a reasonable fit to the
650 // observations.
651 bool BaselineBlock::ComputeLineSpacing() {
652  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
653  std::vector<double> row_positions;
654  ComputeBaselinePositions(direction, &row_positions);
655  if (row_positions.size() < 2) {
656  return false;
657  }
658  EstimateLineSpacing();
659  RefineLineSpacing(row_positions);
660  // Verify that the model is reasonable.
661  double max_baseline_error = kMaxBaselineError * line_spacing_;
662  int non_trivial_gaps = 0;
663  int fitting_gaps = 0;
664  for (unsigned i = 1; i < row_positions.size(); ++i) {
665  double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
666  if (row_gap > max_baseline_error) {
667  ++non_trivial_gaps;
668  if (fabs(row_gap - line_spacing_) <= max_baseline_error) {
669  ++fitting_gaps;
670  }
671  }
672  }
673  if (debug_level_ > 0) {
674  tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n",
675  line_spacing_, row_positions.size(), fitting_gaps,
676  non_trivial_gaps);
677  }
678  return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
679 }
680 
681 // Computes the deskewed vertical position of each baseline in the block and
682 // stores them in the given vector.
683 // This is calculated as the perpendicular distance of the middle of each
684 // baseline (in case it has a different skew angle) from the line passing
685 // through the origin parallel to the block baseline angle.
686 // NOTE that "distance" above is a signed quantity so we can tell which side
687 // of the block baseline a line sits, hence the function and argument name
688 // positions not distances.
689 void BaselineBlock::ComputeBaselinePositions(const FCOORD &direction,
690  std::vector<double> *positions) {
691  positions->clear();
692  for (auto row : rows_) {
693  const TBOX &row_box = row->bounding_box();
694  float x_middle = (row_box.left() + row_box.right()) / 2.0f;
695  FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
696  float offset = direction * row_pos;
697  positions->push_back(offset);
698  }
699 }
700 
701 // Computes an estimate of the line spacing of the block from the median
702 // of the spacings between adjacent overlapping textlines.
703 void BaselineBlock::EstimateLineSpacing() {
704  std::vector<float> spacings;
705  for (unsigned r = 0; r < rows_.size(); ++r) {
706  BaselineRow *row = rows_[r];
707  // Exclude silly lines.
708  if (fabs(row->BaselineAngle()) > M_PI * 0.25) {
709  continue;
710  }
711  // Find the first row after row that overlaps it significantly.
712  const TBOX &row_box = row->bounding_box();
713  unsigned r2;
714  for (r2 = r + 1; r2 < rows_.size() &&
715  !row_box.major_x_overlap(rows_[r2]->bounding_box());
716  ++r2) {
717  ;
718  }
719  if (r2 < rows_.size()) {
720  BaselineRow *row2 = rows_[r2];
721  // Exclude silly lines.
722  if (fabs(row2->BaselineAngle()) > M_PI * 0.25) {
723  continue;
724  }
725  float spacing = row->SpaceBetween(*row2);
726  spacings.push_back(spacing);
727  }
728  }
729  // If we have at least one value, use it, otherwise leave the previous
730  // value unchanged.
731  if (!spacings.empty()) {
732  std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2,
733  spacings.end());
734  line_spacing_ = spacings[spacings.size() / 2];
735  if (debug_level_ > 1) {
736  tprintf("Estimate of linespacing = %g\n", line_spacing_);
737  }
738  }
739 }
740 
741 // Refines the line spacing of the block by fitting a regression
742 // line to the deskewed y-position of each baseline as a function of its
743 // estimated line index, allowing for a small error in the initial linespacing
744 // and choosing the best available model.
745 void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
746  double spacings[3], offsets[3], errors[3];
747  int index_range;
748  errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0],
749  &offsets[0], &index_range);
750  if (index_range > 1) {
751  double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
752  // Try the hypotheses that there might be index_range +/- 1 line spaces.
753  errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1],
754  &offsets[1], nullptr);
755  double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
756  errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2],
757  &offsets[2], nullptr);
758  for (int i = 1; i <= 2; ++i) {
759  if (errors[i] < errors[0]) {
760  spacings[0] = spacings[i];
761  offsets[0] = offsets[i];
762  errors[0] = errors[i];
763  }
764  }
765  }
766  if (spacings[0] > 0.0) {
767  line_spacing_ = spacings[0];
768  line_offset_ = offsets[0];
769  model_error_ = errors[0];
770  if (debug_level_ > 0) {
771  tprintf("Final linespacing model = %g + offset %g, error %g\n",
772  line_spacing_, line_offset_, model_error_);
773  }
774  }
775 }
776 
777 // Given an initial estimate of line spacing (m_in) and the positions of each
778 // baseline, computes the line spacing of the block more accurately in m_out,
779 // and the corresponding intercept in c_out, and the number of spacings seen
780 // in index_delta. Returns the error of fit to the line spacing model.
781 // Uses a simple linear regression, but optimized the offset using the median.
782 double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
783  double m_in, double *m_out,
784  double *c_out, int *index_delta) {
785  if (m_in == 0.0f || positions.size() < 2) {
786  *m_out = m_in;
787  *c_out = 0.0;
788  if (index_delta != nullptr) {
789  *index_delta = 0;
790  }
791  return 0.0;
792  }
793  std::vector<double> offsets;
794  // Get the offset (remainder) linespacing for each line and choose the median.
795  offsets.reserve(positions.size());
796  for (double position : positions) {
797  offsets.push_back(fmod(position, m_in));
798  }
799  // Get the median offset.
800  double median_offset = MedianOfCircularValues(m_in, offsets);
801  // Now fit a line to quantized line number and offset.
802  LLSQ llsq;
803  int min_index = INT32_MAX;
804  int max_index = -INT32_MAX;
805  for (double y_pos : positions) {
806  int row_index = IntCastRounded((y_pos - median_offset) / m_in);
807  UpdateRange(row_index, &min_index, &max_index);
808  llsq.add(row_index, y_pos);
809  }
810  // Get the refined line spacing.
811  *m_out = llsq.m();
812  // Use the median offset rather than the mean.
813  offsets.clear();
814  if (*m_out != 0.0) {
815  for (double position : positions) {
816  offsets.push_back(fmod(position, *m_out));
817  }
818  // Get the median offset.
819  if (debug_level_ > 2) {
820  for (unsigned i = 0; i < offsets.size(); ++i) {
821  tprintf("%u: %g\n", i, offsets[i]);
822  }
823  }
824  *c_out = MedianOfCircularValues(*m_out, offsets);
825  } else {
826  *c_out = 0.0;
827  }
828  if (debug_level_ > 1) {
829  tprintf("Median offset = %g, compared to mean of %g.\n", *c_out,
830  llsq.c(*m_out));
831  }
832  // Index_delta is the number of hypothesized line gaps present.
833  if (index_delta != nullptr) {
834  *index_delta = max_index - min_index;
835  }
836  // Use the regression model's intercept to compute the error, as it may be
837  // a full line-spacing in disagreement with the median.
838  double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
839  if (debug_level_ > 1) {
840  tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in,
841  median_offset, *m_out, *c_out, rms_error);
842  }
843  return rms_error;
844 }
845 
846 BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew,
847  TO_BLOCK_LIST *blocks)
848  : page_skew_(page_skew), debug_level_(debug_level) {
849  TO_BLOCK_IT it(blocks);
850  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
851  TO_BLOCK *to_block = it.data();
852  BLOCK *block = to_block->block;
853  POLY_BLOCK *pb = block->pdblk.poly_block();
854  // A note about non-text blocks.
855  // On output, non-text blocks are supposed to contain a single empty word
856  // in each incoming text line. These mark out the polygonal bounds of the
857  // block. Ideally no baselines should be required, but currently
858  // make_words crashes if a baseline and xheight are not provided, so we
859  // include non-text blocks here, but flag them for special treatment.
860  bool non_text = pb != nullptr && !pb->IsText();
861  blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
862  }
863 }
864 
865 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
866 // block-wise and page-wise data to smooth small blocks/rows, and applies
867 // smoothing based on block/page-level skew and block-level linespacing.
868 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
869  std::vector<double> block_skew_angles;
870  for (auto bl_block : blocks_) {
871  if (debug_level_ > 0) {
872  tprintf("Fitting initial baselines...\n");
873  }
874  if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
875  block_skew_angles.push_back(bl_block->skew_angle());
876  }
877  }
878  // Compute a page-wide default skew for blocks with too little information.
879  double default_block_skew = page_skew_.angle();
880  if (!block_skew_angles.empty()) {
881  default_block_skew = MedianOfCircularValues(M_PI, block_skew_angles);
882  }
883  if (debug_level_ > 0) {
884  tprintf("Page skew angle = %g\n", default_block_skew);
885  }
886  // Set bad lines in each block to the default block skew and then force fit
887  // a linespacing model where it makes sense to do so.
888  for (auto bl_block : blocks_) {
889  bl_block->ParallelizeBaselines(default_block_skew);
890  bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
891  }
892 }
893 
894 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
895 // other associated side-effects, including pre-associating blobs, computing
896 // x-heights and displaying debug information.
897 // NOTE that ComputeStraightBaselines must have been called first as this
898 // sets up data in the TO_ROWs upon which this function depends.
900  bool enable_splines,
901  bool remove_noise,
902  bool show_final_rows,
903  Textord *textord) {
904  for (auto bl_block : blocks_) {
905  if (enable_splines) {
906  bl_block->PrepareForSplineFitting(page_tr, remove_noise);
907  }
908  bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
909 #ifndef GRAPHICS_DISABLED
910  if (show_final_rows) {
911  bl_block->DrawFinalRows(page_tr);
912  }
913 #endif
914  }
915 }
916 
917 } // namespace tesseract.
const double kMaxBaselineError
const double kMaxBlobSizeMultiple
const double kFitHalfrangeFactor
const double kOffsetQuantizationFactor
const int kMaxDisplacementsModes
const double kMaxSkewDeviation
const double kMinFittingLinespacings
const int kNumSkipPoints
@ TBOX
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1846
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int IntCastRounded(double x)
Definition: helpers.h:175
bool textord_restore_underlines
Definition: underlin.cpp:24
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:508
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:32
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1071
const double kMaxBaselineError
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:122
void draw_meanlines(TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:203
T MedianOfCircularValues(T modulus, std::vector< T > &v)
Definition: linlsq.h:117
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1781
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:122
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2540
const TBOX & bounding_box() const
Definition: blobbox.h:239
void EstimateBaselinePosition()
Definition: blobbox.cpp:360
int baseline_position() const
Definition: blobbox.h:404
QSPLINE baseline
Definition: blobbox.h:676
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:612
float line_m() const
Definition: blobbox.h:580
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:619
float line_c() const
Definition: blobbox.h:583
BLOBNBOX_LIST underlines
Definition: blobbox.h:777
float baseline_offset
Definition: blobbox.h:791
BLOBNBOX_LIST blobs
Definition: blobbox.h:776
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:50
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:133
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:73
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:164
void add(double x, double y)
Definition: linlsq.cpp:49
double m() const
Definition: linlsq.cpp:100
double c(double m) const
Definition: linlsq.cpp:116
FCOORD mean_point() const
Definition: linlsq.cpp:166
double rms(double m, double c) const
Definition: linlsq.cpp:130
FCOORD classify_rotation() const
Definition: ocrblock.h:135
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
void set_xheight(int32_t height)
set char size
Definition: ocrblock.h:63
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
integer coordinate
Definition: points.h:36
float length() const
find length
Definition: points.h:227
float angle() const
find angle
Definition: points.h:246
float y() const
Definition: points.h:209
float x() const
Definition: points.h:206
bool IsText() const
Definition: polyblk.h:52
TDimension left() const
Definition: rect.h:82
TDimension width() const
Definition: rect.h:126
TDimension top() const
Definition: rect.h:68
void print() const
Definition: rect.h:289
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
bool FitBaseline(bool use_box_bottoms)
double PerpDisp(const FCOORD &direction) const
double BaselineAngle() const
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
double SpaceBetween(const BaselineRow &other) const
BaselineRow(double line_size, TO_ROW *to_row)
double StraightYAtX(double x) const
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
void SetupOldLineParameters(TO_ROW *row) const
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
TO_BLOCK * block() const
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
void DrawFinalRows(const ICOORD &page_tr)
void ParallelizeBaselines(double default_block_skew)
void DrawPixSpline(Image pix_in)
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1384
void make_spline_rows(TO_BLOCK *block, float gradient, bool testing_on)
Definition: makerow.cpp:1998
void compute_block_xheight(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:1278