tesseract  5.0.0
picofeat.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: picofeat.c
3  ** Purpose: Definition of pico-features.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 #include "picofeat.h"
19 
20 #include "classify.h"
21 #include "featdefs.h"
22 #include "fpoint.h"
23 #include "mfoutline.h"
24 #include "ocrfeatures.h"
25 #include "params.h"
26 #include "trainingsample.h"
27 
28 #include <cmath>
29 #include <cstdio>
30 
31 namespace tesseract {
32 
33 /*---------------------------------------------------------------------------
34  Variables
35 ----------------------------------------------------------------------------*/
36 
37 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
38 
39 /*---------------------------------------------------------------------------
40  Private Function Prototypes
41 ----------------------------------------------------------------------------*/
42 void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet);
43 
44 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
45 
46 void NormalizePicoX(FEATURE_SET FeatureSet);
47 
48 /*----------------------------------------------------------------------------
49  Public Code
50 ----------------------------------------------------------------------------*/
51 /*---------------------------------------------------------------------------*/
61  auto FeatureSet = new FEATURE_SET_STRUCT(MAX_PICO_FEATURES);
62  auto Outlines = ConvertBlob(Blob);
63  float XScale, YScale;
64  NormalizeOutlines(Outlines, &XScale, &YScale);
65  auto RemainingOutlines = Outlines;
66  iterate(RemainingOutlines) {
67  auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
68  ConvertToPicoFeatures2(Outline, FeatureSet);
69  }
70  if (classify_norm_method == baseline) {
71  NormalizePicoX(FeatureSet);
72  }
73  FreeOutlines(Outlines);
74  return (FeatureSet);
75 
76 } /* ExtractPicoFeatures */
77 
78 /*----------------------------------------------------------------------------
79  Private Code
80 ----------------------------------------------------------------------------*/
81 /*---------------------------------------------------------------------------*/
95 void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) {
96  float Angle;
97  float Length;
98  int NumFeatures;
99  FPOINT Center;
100  FPOINT Delta;
101  int i;
102 
103  Angle = NormalizedAngleFrom(Start, End, 1.0);
104  Length = DistanceBetween(*Start, *End);
105  NumFeatures = static_cast<int>(floor(Length / classify_pico_feature_length + 0.5));
106  if (NumFeatures < 1) {
107  NumFeatures = 1;
108  }
109 
110  /* compute vector for one pico feature */
111  Delta.x = XDelta(*Start, *End) / NumFeatures;
112  Delta.y = YDelta(*Start, *End) / NumFeatures;
113 
114  /* compute position of first pico feature */
115  Center.x = Start->x + Delta.x / 2.0;
116  Center.y = Start->y + Delta.y / 2.0;
117 
118  /* compute each pico feature in segment and add to feature set */
119  for (i = 0; i < NumFeatures; i++) {
120  auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
121  Feature->Params[PicoFeatDir] = Angle;
122  Feature->Params[PicoFeatX] = Center.x;
123  Feature->Params[PicoFeatY] = Center.y;
124  AddFeature(FeatureSet, Feature);
125 
126  Center.x += Delta.x;
127  Center.y += Delta.y;
128  }
129 } /* ConvertSegmentToPicoFeat */
130 
131 /*---------------------------------------------------------------------------*/
144 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
145  MFOUTLINE Next;
146  MFOUTLINE First;
147  MFOUTLINE Current;
148 
149  if (DegenerateOutline(Outline)) {
150  return;
151  }
152 
153  First = Outline;
154  Current = First;
155  Next = NextPointAfter(Current);
156  do {
157  /* note that an edge is hidden if the ending point of the edge is
158  marked as hidden. This situation happens because the order of
159  the outlines is reversed when they are converted from the old
160  format. In the old format, a hidden edge is marked by the
161  starting point for that edge. */
162  if (!(PointAt(Next)->Hidden)) {
163  ConvertSegmentToPicoFeat(&(PointAt(Current)->Point), &(PointAt(Next)->Point), FeatureSet);
164  }
165 
166  Current = Next;
167  Next = NextPointAfter(Current);
168  } while (Current != First);
169 
170 } /* ConvertToPicoFeatures2 */
171 
172 /*---------------------------------------------------------------------------*/
181 void NormalizePicoX(FEATURE_SET FeatureSet) {
182  int i;
183  FEATURE Feature;
184  float Origin = 0.0;
185 
186  for (i = 0; i < FeatureSet->NumFeatures; i++) {
187  Feature = FeatureSet->Features[i];
188  Origin += Feature->Params[PicoFeatX];
189  }
190  Origin /= FeatureSet->NumFeatures;
191 
192  for (i = 0; i < FeatureSet->NumFeatures; i++) {
193  Feature = FeatureSet->Features[i];
194  Feature->Params[PicoFeatX] -= Origin;
195  }
196 } /* NormalizePicoX */
197 
198 /*---------------------------------------------------------------------------*/
205  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
206  std::vector<INT_FEATURE_STRUCT> bl_features;
207  tesseract::TrainingSample *sample =
208  tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
209  if (sample == nullptr) {
210  return nullptr;
211  }
212 
213  uint32_t num_features = sample->num_features();
214  const INT_FEATURE_STRUCT *features = sample->features();
215  auto feature_set = new FEATURE_SET_STRUCT(num_features);
216  for (uint32_t f = 0; f < num_features; ++f) {
217  auto feature = new FEATURE_STRUCT(&IntFeatDesc);
218  feature->Params[IntX] = features[f].X;
219  feature->Params[IntY] = features[f].Y;
220  feature->Params[IntDir] = features[f].Theta;
221  AddFeature(feature_set, feature);
222  }
223  delete sample;
224 
225  return feature_set;
226 } /* ExtractIntCNFeatures */
227 
228 /*---------------------------------------------------------------------------*/
235  const INT_FX_RESULT_STRUCT &fx_info) {
236  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
237  std::vector<INT_FEATURE_STRUCT> bl_features;
238  tesseract::TrainingSample *sample =
239  tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
240  if (sample == nullptr) {
241  return nullptr;
242  }
243 
244  auto feature_set = new FEATURE_SET_STRUCT(1);
245  auto feature = new FEATURE_STRUCT(&IntFeatDesc);
246 
247  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
248  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
249  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
250  AddFeature(feature_set, feature);
251  delete sample;
252 
253  return feature_set;
254 } /* ExtractIntGeoFeatures */
255 
256 } // namespace tesseract.
#define double_VAR(name, val, comment)
Definition: params.h:365
float DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:29
float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale)
Definition: fpoint.cpp:44
#define XDelta(A, B)
Definition: fpoint.h:38
#define YDelta(A, B)
Definition: fpoint.h:39
#define MAX_PICO_FEATURES
Definition: picofeat.h:45
#define iterate(l)
Definition: oldlist.h:91
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:151
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:95
@ PicoFeatDir
Definition: picofeat.h:43
@ PicoFeatX
Definition: picofeat.h:43
@ PicoFeatY
Definition: picofeat.h:43
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, std::vector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:79
TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc
@ baseline
Definition: mfoutline.h:53
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:34
const FEATURE_DESC_STRUCT IntFeatDesc
@ GeoTop
Definition: picofeat.h:37
@ GeoWidth
Definition: picofeat.h:38
@ GeoBottom
Definition: picofeat.h:36
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:39
@ IntDir
Definition: picofeat.h:31
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:144
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:181
double classify_pico_feature_length
Definition: picofeat.cpp:37
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:204
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:60
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:249
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:234
Definition: fpoint.h:29
float y
Definition: fpoint.h:30
float x
Definition: fpoint.h:30
std::vector< float > Params
Definition: ocrfeatures.h:66
std::vector< FEATURE_STRUCT * > Features
Definition: ocrfeatures.h:85
uint32_t num_features() const
const INT_FEATURE_STRUCT * features() const
int geo_feature(int index) const