tesseract  5.0.0
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 #ifndef INTMATCHER_H
18 #define INTMATCHER_H
19 
20 #include "intproto.h"
21 #include "params.h"
22 
23 namespace tesseract {
24 
25 // Character fragments could be present in the trained templaes
26 // but turned on/off on the language-by-language basis or depending
27 // on particular properties of the corpus (e.g. when we expect the
28 // images to have low exposure).
29 extern BOOL_VAR_H(disable_character_fragments);
30 
31 extern INT_VAR_H(classify_integer_matcher_multiplier);
32 
33 struct UnicharRating;
34 
36  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
37 
38  float Rating;
40 };
41 
46 #define SE_TABLE_BITS 9
47 #define SE_TABLE_SIZE 512
48 
53 
54  void Clear(const INT_CLASS_STRUCT *class_template);
55  void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template);
56  void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures);
57  void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask);
58 };
59 
61 public:
62  // Integer Matcher Theta Fudge (0-255).
63  static const int kIntThetaFudge = 128;
64  // Bits in Similarity to Evidence Lookup (8-9).
65  static const int kEvidenceTableBits = 9;
66  // Integer Evidence Truncation Bits (8-14).
67  static const int kIntEvidenceTruncBits = 14;
68  // Similarity to Evidence Table Exponential Multiplier.
69  static const float kSEExponentialMultiplier;
70  // Center of Similarity Curve.
71  static const float kSimilarityCenter;
72 
73  IntegerMatcher(tesseract::IntParam *classify_debug_level);
74 
75  void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
76  int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
77  tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug,
78  bool SeparateDebugWindows);
79 
80  // Applies the CN normalization factor to the given rating and returns
81  // the modified rating.
82  float ApplyCNCorrection(float rating, int blob_length, int normalization_factor,
83  int matcher_multiplier);
84 
85  int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
86  int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray,
87  int AdaptProtoThreshold, int Debug);
88 
89  int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
90  int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray,
91  int AdaptFeatureThreshold, int Debug);
92 
93 private:
94  int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
95  int FeatureNum, const INT_FEATURE_STRUCT *Feature,
96  ScratchEvidence *evidence, int Debug);
97 
98  int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables,
99  tesseract::UnicharRating *Result);
100 
101 #ifndef GRAPHICS_DISABLED
102  void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
103  const ScratchEvidence &tables, int16_t NumFeatures, int Debug);
104 
105  void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask,
106  const ScratchEvidence &tables, bool SeparateDebugWindows);
107 
108  void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
109  int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
110  int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows);
111 #endif
112 
113 private:
114  tesseract::IntParam *classify_debug_level_;
115  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
116  uint32_t evidence_table_mask_;
117  uint32_t mult_trunc_shift_bits_;
118  uint32_t table_trunc_shift_bits_;
119  uint32_t evidence_mult_mask_;
120 };
121 
122 } // namespace tesseract
123 
124 #endif
#define SE_TABLE_SIZE
Definition: intmatcher.h:47
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define MAX_PROTO_INDEX
Definition: intproto.h:44
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
BOOL_VAR_H(wordrec_display_splits)
int16_t PROTO_ID
Definition: matchdefs.h:40
INT_VAR_H(editor_image_xpos)
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:137
uint8_t FEATURE_ID
Definition: matchdefs.h:46
void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures)
void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask)
void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template)
Definition: intmatcher.cpp:702
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:50
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:52
void Clear(const INT_CLASS_STRUCT *class_template)
Definition: intmatcher.cpp:697
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:51
static const int kIntThetaFudge
Definition: intmatcher.h:63
void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:482
static const int kEvidenceTableBits
Definition: intmatcher.h:65
int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:619
static const float kSEExponentialMultiplier
Definition: intmatcher.h:69
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:555
static const float kSimilarityCenter
Definition: intmatcher.h:71
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:67
IntegerMatcher(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:668