tesseract  5.0.0
normalis.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: normalis.h (Formerly denorm.h)
3  * Description: Code for the DENORM class.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef NORMALIS_H
20 #define NORMALIS_H
21 
22 #include "image.h"
23 
24 #include <tesseract/export.h>
25 
26 #include <vector>
27 
28 struct Pix;
29 
30 namespace tesseract {
31 
32 const int kBlnCellHeight = 256; // Full-height for baseline normalization.
33 const int kBlnXHeight = 128; // x-height for baseline normalization.
34 const int kBlnBaselineOffset = 64; // offset for baseline normalization.
35 
36 class BLOCK;
37 class FCOORD;
38 class TBOX;
39 class UNICHARSET;
40 
41 struct TBLOB;
42 struct TPOINT;
43 
44 // Possible normalization methods. Use NEGATIVE values as these also
45 // double up as markers for the last sub-classifier.
47  NM_BASELINE = -3, // The original BL normalization mode.
48  NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
49  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
50 };
51 
53 public:
54  DENORM();
55 
56  // Copying a DENORM is allowed.
57  DENORM(const DENORM &);
58  DENORM &operator=(const DENORM &);
59  ~DENORM();
60 
61  // Setup the normalization transformation parameters.
62  // The normalizations applied to a blob are as follows:
63  // 1. An optional block layout rotation that was applied during layout
64  // analysis to make the textlines horizontal.
65  // 2. A normalization transformation (LocalNormTransform):
66  // Subtract the "origin"
67  // Apply an x,y scaling.
68  // Apply an optional rotation.
69  // Add back a final translation.
70  // The origin is in the block-rotated space, and is usually something like
71  // the x-middle of the word at the baseline.
72  // 3. Zero or more further normalization transformations that are applied
73  // in sequence, with a similar pattern to the first normalization transform.
74  //
75  // A DENORM holds the parameters of a single normalization, and can execute
76  // both the LocalNormTransform (a forwards normalization), and the
77  // LocalDenormTransform which is an inverse transform or de-normalization.
78  // A DENORM may point to a predecessor DENORM, which is actually the earlier
79  // normalization, so the full normalization sequence involves executing all
80  // predecessors first and then the transform in "this".
81  // Let x be image coordinates and that we have normalization classes A, B, C
82  // where we first apply A then B then C to get normalized x':
83  // x' = CBAx
84  // Then the backwards (to original coordinates) would be:
85  // x = A^-1 B^-1 C^-1 x'
86  // and A = B->predecessor_ and B = C->predecessor_
87  // NormTransform executes all predecessors recursively, and then this.
88  // NormTransform would be used to transform an image-based feature to
89  // normalized space for use in a classifier
90  // DenormTransform inverts this and then all predecessors. It can be
91  // used to get back to the original image coordinates from normalized space.
92  // The LocalNormTransform member executes just the transformation
93  // in "this" without the layout rotation or any predecessors. It would be
94  // used to run each successive normalization, eg the word normalization,
95  // and later the character normalization.
96 
97  // Arguments:
98  // block: if not nullptr, then this is the first transformation, and
99  // block->re_rotation() needs to be used after the Denorm
100  // transformation to get back to the image coords.
101  // rotation: if not nullptr, apply this rotation after translation to the
102  // origin and scaling. (Usually a classify rotation.)
103  // predecessor: if not nullptr, then predecessor has been applied to the
104  // input space and needs to be undone to complete the inverse.
105  // The above pointers are not owned by this DENORM and are assumed to live
106  // longer than this denorm, except rotation, which is deep copied on input.
107  //
108  // x_origin: The x origin which will be mapped to final_xshift in the result.
109  // y_origin: The y origin which will be mapped to final_yshift in the result.
110  // Added to result of row->baseline(x) if not nullptr.
111  //
112  // x_scale: scale factor for the x-coordinate.
113  // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
114  // Note that these scale factors apply to the same x and y system as the
115  // x-origin and y-origin apply, ie after any block rotation, but before
116  // the rotation argument is applied.
117  //
118  // final_xshift: The x component of the final translation.
119  // final_yshift: The y component of the final translation.
120  //
121  // In theory, any of the commonly used normalizations can be setup here:
122  // * Traditional baseline normalization on a word:
123  // SetupNormalization(block, nullptr, nullptr,
124  // box.x_middle(), baseline,
125  // kBlnXHeight / x_height, kBlnXHeight / x_height,
126  // 0, kBlnBaselineOffset);
127  // * "Numeric mode" baseline normalization on a word, in which the blobs
128  // are positioned with the bottom as the baseline is achieved by making
129  // a separate DENORM for each blob.
130  // SetupNormalization(block, nullptr, nullptr,
131  // box.x_middle(), box.bottom(),
132  // kBlnXHeight / x_height, kBlnXHeight / x_height,
133  // 0, kBlnBaselineOffset);
134  // * Anisotropic character normalization used by IntFx.
135  // SetupNormalization(nullptr, nullptr, denorm,
136  // centroid_x, centroid_y,
137  // 51.2 / ry, 51.2 / rx, 128, 128);
138  // * Normalize blob height to x-height (current OSD):
139  // SetupNormalization(nullptr, &rotation, nullptr,
140  // box.rotational_x_middle(rotation),
141  // box.rotational_y_middle(rotation),
142  // kBlnXHeight / box.rotational_height(rotation),
143  // kBlnXHeight / box.rotational_height(rotation),
144  // 0, kBlnBaselineOffset);
145  // * Secondary normalization for classification rotation (current):
146  // FCOORD rotation = block->classify_rotation();
147  // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
148  // SetupNormalization(nullptr, &rotation, denorm,
149  // box.rotational_x_middle(rotation),
150  // box.rotational_y_middle(rotation),
151  // target_height / box.rotational_height(rotation),
152  // target_height / box.rotational_height(rotation),
153  // 0, kBlnBaselineOffset);
154  // * Proposed new normalizations for CJK: Between them there is then
155  // no need for further normalization at all, and the character fills the cell.
156  // ** Replacement for baseline normalization on a word:
157  // Scales height and width independently so that modal height and pitch
158  // fill the cell respectively.
159  // float cap_height = x_height / CCStruct::kXHeightCapRatio;
160  // SetupNormalization(block, nullptr, nullptr,
161  // box.x_middle(), cap_height / 2.0f,
162  // kBlnCellHeight / fixed_pitch,
163  // kBlnCellHeight / cap_height,
164  // 0, 0);
165  // ** Secondary normalization for classification (with rotation) (proposed):
166  // Requires a simple translation to the center of the appropriate character
167  // cell, no further scaling and a simple rotation (or nothing) about the
168  // cell center.
169  // FCOORD rotation = block->classify_rotation();
170  // SetupNormalization(nullptr, &rotation, denorm,
171  // fixed_pitch_cell_center,
172  // 0.0f,
173  // 1.0f,
174  // 1.0f,
175  // 0, 0);
176  void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
177  float x_origin, float y_origin, float x_scale, float y_scale,
178  float final_xshift, float final_yshift);
179 
180  // Sets up the DENORM to execute a non-linear transformation based on
181  // preserving an even distribution of stroke edges. The transformation
182  // operates only within the given box, scaling input coords within the box
183  // non-linearly to a box of target_width by target_height, with all other
184  // coords being clipped to the box edge. As with SetupNormalization above,
185  // final_xshift and final_yshift are applied after scaling, and the bottom-
186  // left of box is used as a pre-scaling origin.
187  // x_coords is a collection of the x-coords of vertical edges for each
188  // y-coord starting at box.bottom().
189  // y_coords is a collection of the y-coords of horizontal edges for each
190  // x-coord starting at box.left().
191  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
192  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
193  // The second-level vectors must all be sorted in ascending order.
194  void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
195  float target_height, float final_xshift, float final_yshift,
196  const std::vector<std::vector<int>> &x_coords,
197  const std::vector<std::vector<int>> &y_coords);
198 
199  // Transforms the given coords one step forward to normalized space, without
200  // using any block rotation or predecessor.
201  void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const;
202  void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const;
203  // Transforms the given coords forward to normalized space using the
204  // full transformation sequence defined by the block rotation, the
205  // predecessors, deepest first, and finally this. If first_norm is not
206  // nullptr, then the first and deepest transformation used is first_norm,
207  // ending with this, and the block rotation will not be applied.
208  void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const;
209  void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const;
210  // Transforms the given coords one step back to source space, without
211  // using to any block rotation or predecessor.
212  void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const;
213  void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const;
214  // Transforms the given coords all the way back to source image space using
215  // the full transformation sequence defined by this and its predecessors
216  // recursively, shallowest first, and finally any block re_rotation.
217  // If last_denorm is not nullptr, then the last transformation used will
218  // be last_denorm, and the block re_rotation will never be executed.
219  void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const;
220  void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const;
221 
222  // Normalize a blob using blob transformations. Less accurate, but
223  // more accurately copies the old way.
224  void LocalNormBlob(TBLOB *blob) const;
225 
226  // Fills in the x-height range accepted by the given unichar_id in blob
227  // coordinates, given its bounding box in the usual baseline-normalized
228  // coordinates, with some initial crude x-height estimate (such as word
229  // size) and this denoting the transformation that was used.
230  // Also returns the amount the character must have shifted up or down.
231  void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht,
232  float *max_xht, float *yshift) const;
233 
234  // Prints the content of the DENORM for debug purposes.
235  void Print() const;
236 
237  Image pix() const {
238  return pix_;
239  }
240  void set_pix(Image pix) {
241  pix_ = pix;
242  }
243  bool inverse() const {
244  return inverse_;
245  }
246  void set_inverse(bool value) {
247  inverse_ = value;
248  }
249  const DENORM *RootDenorm() const {
250  if (predecessor_ != nullptr) {
251  return predecessor_->RootDenorm();
252  }
253  return this;
254  }
255  const DENORM *predecessor() const {
256  return predecessor_;
257  }
258  // Accessors - perhaps should not be needed.
259  float x_scale() const {
260  return x_scale_;
261  }
262  float y_scale() const {
263  return y_scale_;
264  }
265  const BLOCK *block() const {
266  return block_;
267  }
268  void set_block(const BLOCK *block) {
269  block_ = block;
270  }
271 
272 private:
273  // Free allocated memory and clear pointers.
274  void Clear();
275  // Setup default values.
276  void Init();
277 
278  // Best available image.
279  Image pix_;
280  // True if the source image is white-on-black.
281  bool inverse_;
282  // Block the word came from. If not null, block->re_rotation() takes the
283  // "untransformed" coordinates even further back to the original image.
284  // Used only on the first DENORM in a chain.
285  const BLOCK *block_;
286  // Rotation to apply between translation to the origin and scaling.
287  const FCOORD *rotation_;
288  // Previous transformation in a chain.
289  const DENORM *predecessor_;
290  // Non-linear transformation maps directly from each integer offset from the
291  // origin to the corresponding x-coord. Owned by the DENORM.
292  std::vector<float> *x_map_;
293  // Non-linear transformation maps directly from each integer offset from the
294  // origin to the corresponding y-coord. Owned by the DENORM.
295  std::vector<float> *y_map_;
296  // x-coordinate to be mapped to final_xshift_ in the result.
297  float x_origin_;
298  // y-coordinate to be mapped to final_yshift_ in the result.
299  float y_origin_;
300  // Scale factors for x and y coords. Applied to pre-rotation system.
301  float x_scale_;
302  float y_scale_;
303  // Destination coords of the x_origin_ and y_origin_.
304  float final_xshift_;
305  float final_yshift_;
306 };
307 
308 } // namespace tesseract
309 
310 #endif
const int kBlnXHeight
Definition: normalis.h:33
const int kBlnCellHeight
Definition: normalis.h:32
NormalizationMode
Definition: normalis.h:46
@ NM_BASELINE
Definition: normalis.h:47
@ NM_CHAR_ANISOTROPIC
Definition: normalis.h:49
@ NM_CHAR_ISOTROPIC
Definition: normalis.h:48
const int kBlnBaselineOffset
Definition: normalis.h:34
const BLOCK * block() const
Definition: normalis.h:265
float y_scale() const
Definition: normalis.h:262
const DENORM * predecessor() const
Definition: normalis.h:255
void set_block(const BLOCK *block)
Definition: normalis.h:268
void set_inverse(bool value)
Definition: normalis.h:246
void set_pix(Image pix)
Definition: normalis.h:240
const DENORM * RootDenorm() const
Definition: normalis.h:249
Image pix() const
Definition: normalis.h:237
bool inverse() const
Definition: normalis.h:243
float x_scale() const
Definition: normalis.h:259
#define TESS_API
Definition: export.h:34