tesseract  5.0.0
trainingsample.h
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
17 #define TESSERACT_TRAINING_TRAININGSAMPLE_H_
18 
19 #include "elst.h"
20 #include "featdefs.h"
21 #include "intfx.h"
22 #include "intmatcher.h"
23 #include "matrix.h"
24 #include "mf.h"
25 #include "mfdefs.h"
26 #include "picofeat.h"
27 #include "shapetable.h"
28 #include "unicharset.h"
29 
30 struct Pix;
31 
32 namespace tesseract {
33 
34 class IntFeatureMap;
35 class IntFeatureSpace;
36 class ShapeTable;
37 
38 // Number of elements of cn_feature_.
39 static const int kNumCNParams = 4;
40 // Number of ways to shift the features when randomizing.
41 static const int kSampleYShiftSize = 5;
42 // Number of ways to scale the features when randomizing.
43 static const int kSampleScaleSize = 3;
44 // Total number of different ways to manipulate the features when randomizing.
45 // The first and last combinations are removed to avoid an excessive
46 // top movement (first) and an identity transformation (last).
47 // WARNING: To avoid patterned duplication of samples, be sure to keep
48 // kSampleRandomSize prime!
49 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
50 // kSampleRandomSize is 13, which is prime.
51 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
52 // ASSERT_IS_PRIME(kSampleRandomSize) !!
53 
55 public:
57  : class_id_(INVALID_UNICHAR_ID)
58  , font_id_(0)
59  , page_num_(0)
60  , num_features_(0)
61  , num_micro_features_(0)
62  , outline_length_(0)
63  , features_(nullptr)
64  , micro_features_(nullptr)
65  , weight_(1.0)
66  , max_dist_(0.0)
67  , sample_index_(0)
68  , features_are_indexed_(false)
69  , features_are_mapped_(false)
70  , is_error_(false) {}
71  ~TrainingSample();
72 
73  // Saves the given features into a TrainingSample. The features are copied,
74  // so may be deleted afterwards. Delete the return value after use.
75  static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info,
76  const TBOX &bounding_box,
77  const INT_FEATURE_STRUCT *features, int num_features);
78  // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
79  FEATURE_STRUCT *GetCNFeature() const;
80  // Constructs and returns a copy "randomized" by the method given by
81  // the randomizer index. If index is out of [0, kSampleRandomSize) then
82  // an exact copy is returned.
83  TrainingSample *RandomizedCopy(int index) const;
84  // Constructs and returns an exact copy.
85  TrainingSample *Copy() const;
86 
87  // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
88  // members, which is mostly the mapped features, and the weight.
89  // It is assumed these can all be reconstructed from what is saved.
90  // Writes to the given file. Returns false in case of error.
91  bool Serialize(FILE *fp) const;
92  // Creates from the given file. Returns nullptr in case of error.
93  // If swap is true, assumes a big/little-endian swap is needed.
94  static TrainingSample *DeSerializeCreate(bool swap, FILE *fp);
95  // Reads from the given file. Returns false in case of error.
96  // If swap is true, assumes a big/little-endian swap is needed.
97  bool DeSerialize(bool swap, FILE *fp);
98 
99  // Extracts the needed information from the CHAR_DESC_STRUCT.
100  void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type,
101  CHAR_DESC_STRUCT *char_desc);
102 
103  // Sets the mapped_features_ from the features_ using the provided
104  // feature_space to the indexed versions of the features.
105  void IndexFeatures(const IntFeatureSpace &feature_space);
106 
107  // Returns a pix representing the sample. (Int features only.)
108  Image RenderToPix(const UNICHARSET *unicharset) const;
109  // Displays the features in the given window with the given color.
110  void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
111 
112  // Returns a pix of the original sample image. The pix is padded all round
113  // by padding wherever possible.
114  // The returned Pix must be pixDestroyed after use.
115  // If the input page_pix is nullptr, nullptr is returned.
116  Image GetSamplePix(int padding, Image page_pix) const;
117 
118  // Accessors.
120  return class_id_;
121  }
122  void set_class_id(int id) {
123  class_id_ = id;
124  }
125  int font_id() const {
126  return font_id_;
127  }
128  void set_font_id(int id) {
129  font_id_ = id;
130  }
131  int page_num() const {
132  return page_num_;
133  }
134  void set_page_num(int page) {
135  page_num_ = page;
136  }
137  const TBOX &bounding_box() const {
138  return bounding_box_;
139  }
140  void set_bounding_box(const TBOX &box) {
141  bounding_box_ = box;
142  }
143  uint32_t num_features() const {
144  return num_features_;
145  }
146  const INT_FEATURE_STRUCT *features() const {
147  return features_;
148  }
149  uint32_t num_micro_features() const {
150  return num_micro_features_;
151  }
152  const MicroFeature *micro_features() const {
153  return micro_features_;
154  }
155  int outline_length() const {
156  return outline_length_;
157  }
158  float cn_feature(int index) const {
159  return cn_feature_[index];
160  }
161  int geo_feature(int index) const {
162  return geo_feature_[index];
163  }
164  double weight() const {
165  return weight_;
166  }
167  void set_weight(double value) {
168  weight_ = value;
169  }
170  double max_dist() const {
171  return max_dist_;
172  }
173  void set_max_dist(double value) {
174  max_dist_ = value;
175  }
176  int sample_index() const {
177  return sample_index_;
178  }
179  void set_sample_index(int value) {
180  sample_index_ = value;
181  }
182  bool features_are_mapped() const {
183  return features_are_mapped_;
184  }
185  const std::vector<int> &mapped_features() const {
186  ASSERT_HOST(features_are_mapped_);
187  return mapped_features_;
188  }
189  const std::vector<int> &indexed_features() const {
190  ASSERT_HOST(features_are_indexed_);
191  return mapped_features_;
192  }
193  bool is_error() const {
194  return is_error_;
195  }
196  void set_is_error(bool value) {
197  is_error_ = value;
198  }
199 
200 private:
201  // Unichar id that this sample represents. There obviously must be a
202  // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
203  UNICHAR_ID class_id_;
204  // Font id in which this sample was printed. Refers to a fontinfo_table_ in
205  // MasterTrainer.
206  int font_id_;
207  // Number of page that the sample came from.
208  int page_num_;
209  // Bounding box of sample in original image.
210  TBOX bounding_box_;
211  // Number of INT_FEATURE_STRUCT in features_ array.
212  uint32_t num_features_;
213  // Number of MicroFeature in micro_features_ array.
214  uint32_t num_micro_features_;
215  // Total length of outline in the baseline normalized coordinate space.
216  // See comment in WERD_RES class definition for a discussion of coordinate
217  // spaces.
218  int outline_length_;
219  // Array of features.
220  INT_FEATURE_STRUCT *features_;
221  // Array of features.
222  MicroFeature *micro_features_;
223  // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
224  float cn_feature_[kNumCNParams];
225  // The one and only geometric feature. (Aims at replacing cn_feature_).
226  // Indexed by GeoParams enum in picofeat.h
227  int geo_feature_[GeoCount];
228 
229  // Non-serialized cache data.
230  // Weight used for boosting training.
231  double weight_;
232  // Maximum distance to other samples of same class/font used in computing
233  // the canonical sample.
234  double max_dist_;
235  // Global index of this sample.
236  int sample_index_;
237 
238 public:
239  // both are used in training tools
240  // hide after refactoring
241 
242  // Indexed/mapped features, as indicated by the bools below.
243  std::vector<int> mapped_features_;
246 
247 private:
248  // True if the last classification was an error by the current definition.
249  bool is_error_;
250 
251  // Randomizing factors.
252  static const int kYShiftValues[kSampleYShiftSize];
253  static const double kScaleValues[kSampleScaleSize];
254 };
255 
257 
258 } // namespace tesseract
259 
260 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:803
#define ASSERT_HOST(x)
Definition: errcode.h:59
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:220
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:251
@ GeoCount
Definition: picofeat.h:40
int UNICHAR_ID
Definition: unichar.h:36
std::array< float,(int) MicroFeatureParameter::MFCount > MicroFeature
Definition: mfdefs.h:36
void set_max_dist(double value)
UNICHAR_ID class_id() const
uint32_t num_features() const
const std::vector< int > & mapped_features() const
const TBOX & bounding_box() const
void set_weight(double value)
const std::vector< int > & indexed_features() const
const INT_FEATURE_STRUCT * features() const
uint32_t num_micro_features() const
int geo_feature(int index) const
void set_is_error(bool value)
float cn_feature(int index) const
bool features_are_mapped() const
void set_bounding_box(const TBOX &box)
const MicroFeature * micro_features() const
std::vector< int > mapped_features_
void set_sample_index(int value)
#define TESS_API
Definition: export.h:34