tesseract  5.0.0
baselinedetect.h
Go to the documentation of this file.
1 // File: baselinedetect.h
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
20 #define TESSERACT_TEXTORD_BASELINEDETECT_H_
21 
22 #include "detlinefit.h"
23 #include "points.h"
24 #include "rect.h"
25 
26 struct Pix;
27 
28 namespace tesseract {
29 
30 class Textord;
31 class BLOBNBOX_LIST;
32 class TO_BLOCK;
33 class TO_BLOCK_LIST;
34 class TO_ROW;
35 
36 // Class to compute and hold baseline data for a TO_ROW.
37 class BaselineRow {
38 public:
39  BaselineRow(double line_size, TO_ROW *to_row);
40 
41  const TBOX &bounding_box() const {
42  return bounding_box_;
43  }
44  // Sets the TO_ROW with the output straight line.
45  void SetupOldLineParameters(TO_ROW *row) const;
46 
47  // Outputs diagnostic information.
48  void Print() const;
49 
50  // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
51  double BaselineAngle() const;
52  // Computes and returns the linespacing at the middle of the overlap
53  // between this and other.
54  double SpaceBetween(const BaselineRow &other) const;
55  // Computes and returns the displacement of the center of the line
56  // perpendicular to the given direction.
57  double PerpDisp(const FCOORD &direction) const;
58  // Computes the y coordinate at the given x using the straight baseline
59  // defined by baseline1_ and baseline2_.
60  double StraightYAtX(double x) const;
61 
62  // Fits a straight baseline to the points. Returns true if it had enough
63  // points to be reasonably sure of the fitted baseline.
64  // If use_box_bottoms is false, baselines positions are formed by
65  // considering the outlines of the blobs.
66  bool FitBaseline(bool use_box_bottoms);
67  // Modifies an existing result of FitBaseline to be parallel to the given
68  // vector if that produces a better result.
69  void AdjustBaselineToParallel(int debug, const FCOORD &direction);
70  // Modifies the baseline to snap to the textline grid if the existing
71  // result is not good enough.
72  double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing,
73  double line_offset);
74 
75 private:
76  // Sets up displacement_modes_ with the top few modes of the perpendicular
77  // distance of each blob from the given direction vector, after rounding.
78  void SetupBlobDisplacements(const FCOORD &direction);
79 
80  // Fits a line in the given direction to blobs that are close to the given
81  // target_offset perpendicular displacement from the direction. The fit
82  // error is allowed to be cheat_allowance worse than the existing fit, and
83  // will still be used.
84  // If cheat_allowance > 0, the new fit will be good and replace the current
85  // fit if it has better fit (with cheat) OR its error is below
86  // max_baseline_error_ and the old fit is marked bad.
87  // Otherwise the new fit will only replace the old if it is really better,
88  // or the old fit is marked bad and the new fit has sufficient points, as
89  // well as being within the max_baseline_error_.
90  void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance,
91  double target_offset);
92  // Returns the perpendicular distance of the point from the straight
93  // baseline.
94  float PerpDistanceFromBaseline(const FCOORD &pt) const;
95  // Computes the bounding box of the row.
96  void ComputeBoundingBox();
97 
98  // The blobs of the row to which this BaselineRow adds extra information
99  // during baseline fitting. Note that blobs_ could easily come from either
100  // a TO_ROW or a ColPartition.
101  BLOBNBOX_LIST *blobs_;
102  // Bounding box of all the blobs.
103  TBOX bounding_box_;
104  // Fitter used to fit lines to the blobs.
105  DetLineFit fitter_;
106  // 2 points on the straight baseline.
107  FCOORD baseline_pt1_;
108  FCOORD baseline_pt2_;
109  // Set of modes of displacements. They indicate preferable baseline positions.
110  std::vector<double> displacement_modes_;
111  // Quantization factor used for displacement_modes_.
112  double disp_quant_factor_;
113  // Half the acceptance range of blob displacements for computing the
114  // error during a constrained fit.
115  double fit_halfrange_;
116  // Max baseline error before a line is regarded as fitting badly.
117  double max_baseline_error_;
118  // The error of fit of the baseline.
119  double baseline_error_;
120  // True if this row seems to have a good baseline.
121  bool good_baseline_;
122 };
123 
124 // Class to compute and hold baseline data for a TO_BLOCK.
126 public:
127  BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block);
128 
130  for (auto row : rows_) {
131  delete row;
132  }
133  }
134 
135  TO_BLOCK *block() const {
136  return block_;
137  }
138  double skew_angle() const {
139  return skew_angle_;
140  }
141 
142  // Computes and returns the absolute error of the given perp_disp from the
143  // given linespacing model.
144  static double SpacingModelError(double perp_disp, double line_spacing, double line_offset);
145 
146  // Fits straight line baselines and computes the skew angle from the
147  // median angle. Returns true if a good angle is found.
148  // If use_box_bottoms is false, baseline positions are formed by
149  // considering the outlines of the blobs.
150  bool FitBaselinesAndFindSkew(bool use_box_bottoms);
151 
152  // Refits the baseline to a constrained angle, using the stored block
153  // skew if good enough, otherwise the supplied default skew.
154  void ParallelizeBaselines(double default_block_skew);
155 
156  // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
157  void SetupBlockParameters() const;
158 
159  // Processing that is required before fitting baseline splines, but requires
160  // linear baselines in order to be successful:
161  // Removes noise if required
162  // Separates out underlines
163  // Pre-associates blob fragments.
164  // TODO(rays/joeliu) This entire section of code is inherited from the past
165  // and could be improved/eliminated.
166  // page_tr is used to size a debug window.
167  void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
168 
169  // Fits splines to the textlines, or creates fake QSPLINES from the straight
170  // baselines that are already on the TO_ROWs.
171  // As a side-effect, computes the xheights of the rows and the block.
172  // Although x-height estimation is conceptually separate, it is part of
173  // detecting perspective distortion and therefore baseline fitting.
174  void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord);
175 
176  // Draws the (straight) baselines and final blobs colored according to
177  // what was discarded as noise and what is associated with each row.
178  void DrawFinalRows(const ICOORD &page_tr);
179 
180  // Render the generated spline baselines for this block on pix_in.
181  void DrawPixSpline(Image pix_in);
182 
183 private:
184  // Top-level line-spacing calculation. Computes an estimate of the line-
185  // spacing, using the current baselines in the TO_ROWS of the block, and
186  // then refines it by fitting a regression line to the baseline positions
187  // as a function of their integer index.
188  // Returns true if it seems that the model is a reasonable fit to the
189  // observations.
190  bool ComputeLineSpacing();
191 
192  // Computes the deskewed vertical position of each baseline in the block and
193  // stores them in the given vector.
194  void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions);
195 
196  // Computes an estimate of the line spacing of the block from the median
197  // of the spacings between adjacent overlapping textlines.
198  void EstimateLineSpacing();
199 
200  // Refines the line spacing of the block by fitting a regression
201  // line to the deskewed y-position of each baseline as a function of its
202  // estimated line index, allowing for a small error in the initial linespacing
203  // and choosing the best available model.
204  void RefineLineSpacing(const std::vector<double> &positions);
205 
206  // Given an initial estimate of line spacing (m_in) and the positions of each
207  // baseline, computes the line spacing of the block more accurately in m_out,
208  // and the corresponding intercept in c_out, and the number of spacings seen
209  // in index_delta. Returns the error of fit to the line spacing model.
210  double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out,
211  double *c_out, int *index_delta);
212 
213  // The block to which this class adds extra information used during baseline
214  // calculation.
215  TO_BLOCK *block_;
216  // The rows in the block that we will be working with.
217  std::vector<BaselineRow *> rows_;
218  // Amount of debugging output to provide.
219  int debug_level_;
220  // True if the block is non-text (graphic).
221  bool non_text_block_;
222  // True if the block has at least one good enough baseline to compute the
223  // skew angle and therefore skew_angle_ is valid.
224  bool good_skew_angle_;
225  // Angle of skew in radians using the conventional anticlockwise from x-axis.
226  double skew_angle_;
227  // Current best estimate line spacing in pixels perpendicular to skew_angle_.
228  double line_spacing_;
229  // Offset for baseline positions, in pixels. Each baseline is at
230  // line_spacing_ * n + line_offset_ for integer n, which represents
231  // [textline] line number in a line numbering system that has line 0 on or
232  // at least near the x-axis. Not equal to the actual line number of a line
233  // within a block as most blocks are not near the x-axis.
234  double line_offset_;
235  // The error of the line spacing model.
236  double model_error_;
237 };
238 
240 public:
241  BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks);
242 
244  for (auto block : blocks_) {
245  delete block;
246  }
247  }
248 
249  // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
250  // block-wise and page-wise data to smooth small blocks/rows, and applies
251  // smoothing based on block/page-level skew and block-level linespacing.
252  void ComputeStraightBaselines(bool use_box_bottoms);
253 
254  // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
255  // other associated side-effects, including pre-associating blobs, computing
256  // x-heights and displaying debug information.
257  // NOTE that ComputeStraightBaselines must have been called first as this
258  // sets up data in the TO_ROWs upon which this function depends.
259  void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines,
260  bool remove_noise, bool show_final_rows, Textord *textord);
261 
262 private:
263  // Average (median) skew of the blocks on the page among those that have
264  // a good angle of their own.
265  FCOORD page_skew_;
266  // Amount of debug output to produce.
267  int debug_level_;
268  // The blocks that we are working with.
269  std::vector<BaselineBlock *> blocks_;
270 };
271 
272 } // namespace tesseract
273 
274 #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_
integer coordinate
Definition: points.h:36
bool FitBaseline(bool use_box_bottoms)
double PerpDisp(const FCOORD &direction) const
const TBOX & bounding_box() const
double BaselineAngle() const
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
double SpaceBetween(const BaselineRow &other) const
BaselineRow(double line_size, TO_ROW *to_row)
double StraightYAtX(double x) const
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
void SetupOldLineParameters(TO_ROW *row) const
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
TO_BLOCK * block() const
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
void DrawFinalRows(const ICOORD &page_tr)
void ParallelizeBaselines(double default_block_skew)
void DrawPixSpline(Image pix_in)
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)