|
| ClassPruner (int max_classes) |
|
| ~ClassPruner () |
|
void | ComputeScores (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features) |
|
void | AdjustForExpectedNumFeatures (const uint16_t *expected_num_features, int cutoff_strength) |
|
void | DisableDisabledClasses (const UNICHARSET &unicharset) |
|
void | DisableFragments (const UNICHARSET &unicharset) |
|
void | NormalizeForXheight (int norm_multiplier, const uint8_t *normalization_factors) |
|
void | NoNormalization () |
|
void | PruneAndSort (int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset) |
|
void | DebugMatch (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const |
|
void | SummarizeResult (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uint16_t *expected_num_features, int norm_multiplier, const uint8_t *normalization_factors) const |
|
int | SetupResults (std::vector< CP_RESULT_STRUCT > *results) const |
|
Definition at line 132 of file intmatcher.cpp.
◆ ClassPruner()
tesseract::ClassPruner::ClassPruner |
( |
int |
max_classes | ) |
|
|
inline |
Definition at line 134 of file intmatcher.cpp.
141 max_classes_ = max_classes;
144 class_count_ =
new int[rounded_classes_];
145 norm_count_ =
new int[rounded_classes_];
146 sort_key_ =
new int[rounded_classes_ + 1];
147 sort_index_ =
new int[rounded_classes_ + 1];
148 for (
int i = 0; i < rounded_classes_; i++) {
151 pruning_threshold_ = 0;
#define WERDS_PER_CP_VECTOR
#define NUM_BITS_PER_CLASS
int RoundUp(int n, int block_size)
◆ ~ClassPruner()
tesseract::ClassPruner::~ClassPruner |
( |
| ) |
|
|
inline |
Definition at line 156 of file intmatcher.cpp.
157 delete[] class_count_;
158 delete[] norm_count_;
160 delete[] sort_index_;
◆ AdjustForExpectedNumFeatures()
void tesseract::ClassPruner::AdjustForExpectedNumFeatures |
( |
const uint16_t * |
expected_num_features, |
|
|
int |
cutoff_strength |
|
) |
| |
|
inline |
Adjusts the scores according to the number of expected features. Used in lieu of a constant bias, this penalizes classes that expect more features than there are present. Thus an actual c will score higher for c than e, even though almost all the features match e as well as c, because e expects more features to be present.
Definition at line 235 of file intmatcher.cpp.
236 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
237 if (num_features_ < expected_num_features[class_id]) {
238 int deficit = expected_num_features[class_id] - num_features_;
239 class_count_[class_id] -=
240 class_count_[class_id] * deficit / (num_features_ * cutoff_strength + deficit);
◆ ComputeScores()
Computes the scores for every class in the character set, by summing the weights for each feature and stores the sums internally in class_count_.
Definition at line 165 of file intmatcher.cpp.
167 num_features_ = num_features;
168 auto num_pruners = int_templates->NumClassPruners;
169 for (
int f = 0; f < num_features; ++f) {
170 const INT_FEATURE_STRUCT *feature = &features[f];
178 for (
unsigned pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
181 const uint32_t *pruner_word_ptr = int_templates->ClassPruners[pruner_set]->p[x][y][theta];
183 uint32_t pruner_word = *pruner_word_ptr++;
#define CLASS_PRUNER_CLASS_MASK
◆ DebugMatch()
Prints debug info on the class pruner matches for the pruned classes only.
Definition at line 324 of file intmatcher.cpp.
326 int num_pruners = int_templates->NumClassPruners;
327 int max_num_classes = int_templates->NumClasses;
328 for (
int f = 0; f < num_features_; ++f) {
329 const INT_FEATURE_STRUCT *feature = &features[f];
330 tprintf(
"F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
336 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
339 const uint32_t *pruner_word_ptr = int_templates->ClassPruners[pruner_set]->p[x][y][theta];
341 uint32_t pruner_word = *pruner_word_ptr++;
342 for (
int word_class = 0; word_class < 16 && class_id < max_num_classes;
343 ++word_class, ++class_id) {
344 if (norm_count_[class_id] >= pruning_threshold_) {
345 tprintf(
" %s=%d,", classify.ClassIDToDebugStr(int_templates, class_id, 0).c_str(),
void tprintf(const char *format,...)
◆ DisableDisabledClasses()
void tesseract::ClassPruner::DisableDisabledClasses |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores for classes disabled in the unicharset. Implements the black-list to recognize a subset of the character set.
Definition at line 247 of file intmatcher.cpp.
248 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
249 if (!unicharset.get_enabled(class_id)) {
250 class_count_[class_id] = 0;
◆ DisableFragments()
void tesseract::ClassPruner::DisableFragments |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores of fragments.
Definition at line 256 of file intmatcher.cpp.
257 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
260 if (unicharset.get_fragment(class_id)) {
261 class_count_[class_id] = 0;
◆ NoNormalization()
void tesseract::ClassPruner::NoNormalization |
( |
| ) |
|
|
inline |
The nop normalization copies the class_count_ array to norm_count_.
Definition at line 278 of file intmatcher.cpp.
279 for (
int class_id = 0; class_id < max_classes_; class_id++) {
280 norm_count_[class_id] = class_count_[class_id];
◆ NormalizeForXheight()
void tesseract::ClassPruner::NormalizeForXheight |
( |
int |
norm_multiplier, |
|
|
const uint8_t * |
normalization_factors |
|
) |
| |
|
inline |
Normalizes the counts for xheight, putting the normalized result in norm_count_. Applies a simple subtractive penalty for incorrect vertical position provided by the normalization_factors array, indexed by character class, and scaled by the norm_multiplier.
Definition at line 270 of file intmatcher.cpp.
271 for (
int class_id = 0; class_id < max_classes_; class_id++) {
272 norm_count_[class_id] =
273 class_count_[class_id] - ((norm_multiplier * normalization_factors[class_id]) >> 8);
◆ PruneAndSort()
void tesseract::ClassPruner::PruneAndSort |
( |
int |
pruning_factor, |
|
|
int |
keep_this, |
|
|
bool |
max_of_non_fragments, |
|
|
const UNICHARSET & |
unicharset |
|
) |
| |
|
inline |
Prunes the classes using <the maximum count> * pruning_factor/256 as a threshold for keeping classes. If max_of_non_fragments, then ignore fragments in computing the maximum count.
Definition at line 287 of file intmatcher.cpp.
290 for (
int c = 0; c < max_classes_; ++c) {
291 if (norm_count_[c] > max_count &&
297 (!max_of_non_fragments || !unicharset.get_fragment(c))) {
298 max_count = norm_count_[c];
302 pruning_threshold_ = (max_count * pruning_factor) >> 8;
304 if (pruning_threshold_ < 1) {
305 pruning_threshold_ = 1;
308 for (
int class_id = 0; class_id < max_classes_; class_id++) {
309 if (norm_count_[class_id] >= pruning_threshold_ || class_id == keep_this) {
311 sort_index_[num_classes_] = class_id;
312 sort_key_[num_classes_] = norm_count_[class_id];
317 if (num_classes_ > 1) {
318 HeapSort(num_classes_, sort_key_, sort_index_);
◆ SetupResults()
int tesseract::ClassPruner::SetupResults |
( |
std::vector< CP_RESULT_STRUCT > * |
results | ) |
const |
|
inline |
Copies the pruned, sorted classes into the output results and returns the number of classes.
Definition at line 374 of file intmatcher.cpp.
376 results->resize(num_classes_);
377 for (
int c = 0; c < num_classes_; ++c) {
378 (*results)[c].Class = sort_index_[num_classes_ - c];
379 (*results)[c].Rating =
380 1.0f - sort_key_[num_classes_ - c] /
◆ SummarizeResult()
void tesseract::ClassPruner::SummarizeResult |
( |
const Classify & |
classify, |
|
|
const INT_TEMPLATES_STRUCT * |
int_templates, |
|
|
const uint16_t * |
expected_num_features, |
|
|
int |
norm_multiplier, |
|
|
const uint8_t * |
normalization_factors |
|
) |
| const |
|
inline |
Prints a summary of the pruner result.
Definition at line 357 of file intmatcher.cpp.
360 tprintf(
"CP:%d classes, %d features:\n", num_classes_, num_features_);
361 for (
int i = 0; i < num_classes_; ++i) {
362 int class_id = sort_index_[num_classes_ - i];
363 std::string class_string = classify.ClassIDToDebugStr(int_templates, class_id, 0);
365 "%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", class_string.c_str(),
366 class_count_[class_id], expected_num_features[class_id],
367 (norm_multiplier * normalization_factors[class_id]) >> 8, sort_key_[num_classes_ - i],
The documentation for this class was generated from the following file: