tesseract  5.0.0
errorcounter.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 #ifdef HAVE_CONFIG_H
17 # include "config_auto.h"
18 #endif
19 
20 #include "errorcounter.h"
21 
22 #include "fontinfo.h"
23 #include "sampleiterator.h"
24 #include "shapeclassifier.h"
25 #include "shapetable.h"
26 #include "trainingsample.h"
27 #include "trainingsampleset.h"
28 #include "unicity_table.h"
29 
30 #include <algorithm>
31 #include <ctime>
32 
33 namespace tesseract {
34 
35 // Difference in result rating to be thought of as an "equal" choice.
36 const double kRatingEpsilon = 1.0 / 32;
37 
38 // Tests a classifier, computing its error rate.
39 // See errorcounter.h for description of arguments.
40 // Iterates over the samples, calling the classifier in normal/silent mode.
41 // If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate
42 // report_level is set (4 or greater), it will then call the classifier again
43 // with a debug flag and a keep_this argument to find out what is going on.
44 double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level,
45  CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
46  const std::vector<Image > &page_images, SampleIterator *it,
47  double *unichar_error, double *scaled_error,
48  std::string *fonts_report) {
49  const int fontsize = it->sample_set()->NumFonts();
50  ErrorCounter counter(classifier->GetUnicharset(), fontsize);
51  std::vector<UnicharRating> results;
52 
53  clock_t start = clock();
54  unsigned total_samples = 0;
55  double unscaled_error = 0.0;
56  // Set a number of samples on which to run the classify debug mode.
57  int error_samples = report_level > 3 ? report_level * report_level : 0;
58  // Iterate over all the samples, accumulating errors.
59  for (it->Begin(); !it->AtEnd(); it->Next()) {
60  TrainingSample *mutable_sample = it->MutableSample();
61  int page_index = mutable_sample->page_num();
62  Image page_pix =
63  0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
64  // No debug, no keep this.
65  classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);
66  bool debug_it = false;
67  int correct_id = mutable_sample->class_id();
68  if (counter.unicharset_.has_special_codes() &&
69  (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
70  correct_id == UNICHAR_BROKEN)) {
71  // This is junk so use the special counter.
72  debug_it = counter.AccumulateJunk(report_level > 3, results, mutable_sample);
73  } else {
74  debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, fontinfo_table, results,
75  mutable_sample);
76  }
77  if (debug_it && error_samples > 0) {
78  // Running debug, keep the correct answer, and debug the classifier.
79  tprintf("Error on sample %d: %s Classifier debug output:\n", it->GlobalSampleIndex(),
80  it->sample_set()->SampleToString(*mutable_sample).c_str());
81 #ifndef GRAPHICS_DISABLED
82  classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
83 #endif
84  --error_samples;
85  }
86  ++total_samples;
87  }
88  const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
89  // Create the appropriate error report.
90  unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
91  unichar_error, fonts_report);
92  if (scaled_error != nullptr) {
93  *scaled_error = counter.scaled_error_;
94  }
95  if (report_level > 1 && total_samples > 0) {
96  // It is useful to know the time in microseconds/char.
97  tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time,
98  1000000.0 * total_time / total_samples);
99  }
100  return unscaled_error;
101 }
102 
103 // Tests a pair of classifiers, debugging errors of the new against the old.
104 // See errorcounter.h for description of arguments.
105 // Iterates over the samples, calling the classifiers in normal/silent mode.
106 // If the new_classifier makes a boosting_mode error that the old_classifier
107 // does not, it will then call the new_classifier again with a debug flag
108 // and a keep_this argument to find out what is going on.
109 void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
110  CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
111  const std::vector<Image > &page_images, SampleIterator *it) {
112  int fontsize = it->sample_set()->NumFonts();
113  ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
114  ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
115  std::vector<UnicharRating> results;
116 
117  int total_samples = 0;
118  int error_samples = 25;
119  int total_new_errors = 0;
120  // Iterate over all the samples, accumulating errors.
121  for (it->Begin(); !it->AtEnd(); it->Next()) {
122  TrainingSample *mutable_sample = it->MutableSample();
123  int page_index = mutable_sample->page_num();
124  Image page_pix =
125  0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
126  // No debug, no keep this.
127  old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
128  &results);
129  int correct_id = mutable_sample->class_id();
130  if (correct_id != 0 && !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
131  results, mutable_sample)) {
132  // old classifier was correct, check the new one.
133  new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
134  &results);
135  if (correct_id != 0 && new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
136  results, mutable_sample)) {
137  tprintf("New Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex());
138  ++total_new_errors;
139  new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, correct_id, &results);
140  if (results.size() > 0 && error_samples > 0) {
141 #ifndef GRAPHICS_DISABLED
142  new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
143 #endif
144  --error_samples;
145  }
146  }
147  }
148  ++total_samples;
149  }
150  tprintf("Total new errors = %d\n", total_new_errors);
151 }
152 
153 // Constructor is private. Only anticipated use of ErrorCounter is via
154 // the static ComputeErrorRate.
155 ErrorCounter::ErrorCounter(const UNICHARSET &unicharset, int fontsize)
156  : scaled_error_(0.0)
157  , rating_epsilon_(kRatingEpsilon)
158  , unichar_counts_(unicharset.size(), unicharset.size(), 0)
159  , ok_score_hist_(0, 101)
160  , bad_score_hist_(0, 101)
161  , unicharset_(unicharset) {
162  Counts empty_counts;
163  font_counts_.clear();
164  font_counts_.resize(fontsize, empty_counts);
165  multi_unichar_counts_.clear();
166  multi_unichar_counts_.resize(unicharset.size(), 0);
167 }
168 
169 // Accumulates the errors from the classifier results on a single sample.
170 // Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred.
171 // boosting_mode selects the type of error to be used for boosting and the
172 // is_error_ member of sample is set according to whether the required type
173 // of error occurred. The font_table provides access to font properties
174 // for error counting and shape_table is used to understand the relationship
175 // between unichar_ids and shape_ids in the results
176 bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode,
177  const FontInfoTable &font_table,
178  const std::vector<UnicharRating> &results,
179  TrainingSample *sample) {
180  int num_results = results.size();
181  int answer_actual_rank = -1;
182  int font_id = sample->font_id();
183  int unichar_id = sample->class_id();
184  sample->set_is_error(false);
185  if (num_results == 0) {
186  // Reject. We count rejects as a separate category, but still mark the
187  // sample as an error in case any training module wants to use that to
188  // improve the classifier.
189  sample->set_is_error(true);
190  ++font_counts_[font_id].n[CT_REJECT];
191  } else {
192  // Find rank of correct unichar answer, using rating_epsilon_ to allow
193  // different answers to score as equal. (Ignoring the font.)
194  int epsilon_rank = 0;
195  int answer_epsilon_rank = -1;
196  int num_top_answers = 0;
197  double prev_rating = results[0].rating;
198  bool joined = false;
199  bool broken = false;
200  int res_index = 0;
201  while (res_index < num_results) {
202  if (results[res_index].rating < prev_rating - rating_epsilon_) {
203  ++epsilon_rank;
204  prev_rating = results[res_index].rating;
205  }
206  if (results[res_index].unichar_id == unichar_id && answer_epsilon_rank < 0) {
207  answer_epsilon_rank = epsilon_rank;
208  answer_actual_rank = res_index;
209  }
210  if (results[res_index].unichar_id == UNICHAR_JOINED && unicharset_.has_special_codes()) {
211  joined = true;
212  } else if (results[res_index].unichar_id == UNICHAR_BROKEN &&
213  unicharset_.has_special_codes()) {
214  broken = true;
215  } else if (epsilon_rank == 0) {
216  ++num_top_answers;
217  }
218  ++res_index;
219  }
220  if (answer_actual_rank != 0) {
221  // Correct result is not absolute top.
222  ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR];
223  if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) {
224  sample->set_is_error(true);
225  }
226  }
227  if (answer_epsilon_rank == 0) {
228  ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK];
229  // Unichar OK, but count if multiple unichars.
230  if (num_top_answers > 1) {
231  ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR];
232  ++multi_unichar_counts_[unichar_id];
233  }
234  // Check to see if any font in the top choice has attributes that match.
235  // TODO(rays) It is easy to add counters for individual font attributes
236  // here if we want them.
237  if (font_table.SetContainsFontProperties(font_id, results[answer_actual_rank].fonts)) {
238  // Font attributes were matched.
239  // Check for multiple properties.
240  if (font_table.SetContainsMultipleFontProperties(results[answer_actual_rank].fonts)) {
241  ++font_counts_[font_id].n[CT_OK_MULTI_FONT];
242  }
243  } else {
244  // Font attributes weren't matched.
245  ++font_counts_[font_id].n[CT_FONT_ATTR_ERR];
246  }
247  } else {
248  // This is a top unichar error.
249  ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR];
250  if (boosting_mode == CT_UNICHAR_TOP1_ERR) {
251  sample->set_is_error(true);
252  }
253  // Count maps from unichar id to wrong unichar id.
254  ++unichar_counts_(unichar_id, results[0].unichar_id);
255  if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) {
256  // It is also a 2nd choice unichar error.
257  ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR];
258  if (boosting_mode == CT_UNICHAR_TOP2_ERR) {
259  sample->set_is_error(true);
260  }
261  }
262  if (answer_epsilon_rank < 0) {
263  // It is also a top-n choice unichar error.
264  ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR];
265  if (boosting_mode == CT_UNICHAR_TOPN_ERR) {
266  sample->set_is_error(true);
267  }
268  answer_epsilon_rank = epsilon_rank;
269  }
270  }
271  // Compute mean number of return values and mean rank of correct answer.
272  font_counts_[font_id].n[CT_NUM_RESULTS] += num_results;
273  font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank;
274  if (joined) {
275  ++font_counts_[font_id].n[CT_OK_JOINED];
276  }
277  if (broken) {
278  ++font_counts_[font_id].n[CT_OK_BROKEN];
279  }
280  }
281  // If it was an error for boosting then sum the weight.
282  if (sample->is_error()) {
283  scaled_error_ += sample->weight();
284  if (debug) {
285  tprintf("%d results for char %s font %d :", num_results,
286  unicharset_.id_to_unichar(unichar_id), font_id);
287  for (int i = 0; i < num_results; ++i) {
288  tprintf(" %.3f : %s\n", results[i].rating,
289  unicharset_.id_to_unichar(results[i].unichar_id));
290  }
291  return true;
292  }
293  int percent = 0;
294  if (num_results > 0) {
295  percent = IntCastRounded(results[0].rating * 100);
296  }
297  bad_score_hist_.add(percent, 1);
298  } else {
299  int percent = 0;
300  if (answer_actual_rank >= 0) {
301  percent = IntCastRounded(results[answer_actual_rank].rating * 100);
302  }
303  ok_score_hist_.add(percent, 1);
304  }
305  return false;
306 }
307 
308 // Accumulates counts for junk. Counts only whether the junk was correctly
309 // rejected or not.
310 bool ErrorCounter::AccumulateJunk(bool debug, const std::vector<UnicharRating> &results,
311  TrainingSample *sample) {
312  // For junk we accept no answer, or an explicit shape answer matching the
313  // class id of the sample.
314  const int num_results = results.size();
315  const int font_id = sample->font_id();
316  const int unichar_id = sample->class_id();
317  int percent = 0;
318  if (num_results > 0) {
319  percent = IntCastRounded(results[0].rating * 100);
320  }
321  if (num_results > 0 && results[0].unichar_id != unichar_id) {
322  // This is a junk error.
323  ++font_counts_[font_id].n[CT_ACCEPTED_JUNK];
324  sample->set_is_error(true);
325  // It counts as an error for boosting too so sum the weight.
326  scaled_error_ += sample->weight();
327  bad_score_hist_.add(percent, 1);
328  return debug;
329  } else {
330  // Correctly rejected.
331  ++font_counts_[font_id].n[CT_REJECTED_JUNK];
332  sample->set_is_error(false);
333  ok_score_hist_.add(percent, 1);
334  }
335  return false;
336 }
337 
338 // Creates a report of the error rate. The report_level controls the detail
339 // that is reported to stderr via tprintf:
340 // 0 -> no output.
341 // >=1 -> bottom-line error rate.
342 // >=3 -> font-level error rate.
343 // boosting_mode determines the return value. It selects which (un-weighted)
344 // error rate to return.
345 // The fontinfo_table from MasterTrainer provides the names of fonts.
346 // The it determines the current subset of the training samples.
347 // If not nullptr, the top-choice unichar error rate is saved in unichar_error.
348 // If not nullptr, the report string is saved in fonts_report.
349 // (Ignoring report_level).
350 double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
351  const FontInfoTable &fontinfo_table, const SampleIterator &it,
352  double *unichar_error, std::string *fonts_report) {
353  // Compute totals over all the fonts and report individual font results
354  // when required.
355  Counts totals;
356  int fontsize = font_counts_.size();
357  for (int f = 0; f < fontsize; ++f) {
358  // Accumulate counts over fonts.
359  totals += font_counts_[f];
360  std::string font_report;
361  if (ReportString(false, font_counts_[f], font_report)) {
362  if (fonts_report != nullptr) {
363  *fonts_report += fontinfo_table.at(f).name;
364  *fonts_report += ": ";
365  *fonts_report += font_report;
366  *fonts_report += "\n";
367  }
368  if (report_level > 2) {
369  // Report individual font error rates.
370  tprintf("%s: %s\n", fontinfo_table.at(f).name, font_report.c_str());
371  }
372  }
373  }
374  // Report the totals.
375  std::string total_report;
376  bool any_results = ReportString(true, totals, total_report);
377  if (fonts_report != nullptr && fonts_report->empty()) {
378  // Make sure we return something even if there were no samples.
379  *fonts_report = "NoSamplesFound: ";
380  *fonts_report += total_report;
381  *fonts_report += "\n";
382  }
383  if (report_level > 0) {
384  // Report the totals.
385  std::string total_report;
386  if (any_results) {
387  tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.c_str());
388  }
389  // Report the worst substitution error only for now.
390  if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {
391  int charsetsize = unicharset_.size();
392  int worst_uni_id = 0;
393  int worst_result_id = 0;
394  int worst_err = 0;
395  for (int u = 0; u < charsetsize; ++u) {
396  for (int v = 0; v < charsetsize; ++v) {
397  if (unichar_counts_(u, v) > worst_err) {
398  worst_err = unichar_counts_(u, v);
399  worst_uni_id = u;
400  worst_result_id = v;
401  }
402  }
403  }
404  if (worst_err > 0) {
405  tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id,
406  unicharset_.id_to_unichar(worst_uni_id), unicharset_.id_to_unichar(worst_result_id),
407  worst_err, totals.n[CT_UNICHAR_TOP1_ERR],
408  100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);
409  }
410  }
411  tprintf("Multi-unichar shape use:\n");
412  for (int u = 0; u < multi_unichar_counts_.size(); ++u) {
413  if (multi_unichar_counts_[u] > 0) {
414  tprintf("%d multiple answers for unichar: %s\n", multi_unichar_counts_[u],
415  unicharset_.id_to_unichar(u));
416  }
417  }
418  tprintf("OK Score histogram:\n");
419  ok_score_hist_.print();
420  tprintf("ERROR Score histogram:\n");
421  bad_score_hist_.print();
422  }
423 
424  double rates[CT_SIZE];
425  if (!ComputeRates(totals, rates)) {
426  return 0.0;
427  }
428  // Set output values if asked for.
429  if (unichar_error != nullptr) {
430  *unichar_error = rates[CT_UNICHAR_TOP1_ERR];
431  }
432  return rates[boosting_mode];
433 }
434 
435 // Sets the report string to a combined human and machine-readable report
436 // string of the error rates.
437 // Returns false if there is no data, leaving report unchanged, unless
438 // even_if_empty is true.
439 bool ErrorCounter::ReportString(bool even_if_empty, const Counts &counts, std::string &report) {
440  // Compute the error rates.
441  double rates[CT_SIZE];
442  if (!ComputeRates(counts, rates) && !even_if_empty) {
443  return false;
444  }
445  // Using %.4g%%, the length of the output string should exactly match the
446  // length of the format string, but in case of overflow, allow for +eddd
447  // on each number.
448  const int kMaxExtraLength = 5; // Length of +eddd.
449  // Keep this format string and the snprintf in sync with the CountTypes enum.
450  const char format_str[] =
451  "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] "
452  "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, "
453  "FontAttr=%.4g%%, Multi=%.4g%%, "
454  "Answers=%.3g, Rank=%.3g, "
455  "OKjunk=%.4g%%, Badjunk=%.4g%%";
456  constexpr size_t max_str_len = sizeof(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1;
457  char formatted_str[max_str_len];
458  snprintf(formatted_str, max_str_len, format_str, rates[CT_UNICHAR_TOP1_ERR] * 100.0,
459  rates[CT_UNICHAR_TOP2_ERR] * 100.0, rates[CT_UNICHAR_TOPN_ERR] * 100.0,
460  rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, rates[CT_OK_MULTI_UNICHAR] * 100.0,
461  rates[CT_OK_JOINED] * 100.0, rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0,
462  rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_OK_MULTI_FONT] * 100.0, rates[CT_NUM_RESULTS],
463  rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], 100.0 * rates[CT_ACCEPTED_JUNK]);
464  report = formatted_str;
465  // Now append each field of counts with a tab in front so the result can
466  // be loaded into a spreadsheet.
467  for (int ct : counts.n) {
468  report += "\t" + std::to_string(ct);
469  }
470  return true;
471 }
472 
473 // Computes the error rates and returns in rates which is an array of size
474 // CT_SIZE. Returns false if there is no data, leaving rates unchanged.
475 bool ErrorCounter::ComputeRates(const Counts &counts, double rates[CT_SIZE]) {
476  const int ok_samples =
477  counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] + counts.n[CT_REJECT];
478  const int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK];
479  // Compute rates for normal chars.
480  double denominator = static_cast<double>(std::max(ok_samples, 1));
481  for (int ct = 0; ct <= CT_RANK; ++ct) {
482  rates[ct] = counts.n[ct] / denominator;
483  }
484  // Compute rates for junk.
485  denominator = static_cast<double>(std::max(junk_samples, 1));
486  for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct) {
487  rates[ct] = counts.n[ct] / denominator;
488  }
489  return ok_samples != 0 || junk_samples != 0;
490 }
491 
492 ErrorCounter::Counts::Counts() {
493  memset(n, 0, sizeof(n[0]) * CT_SIZE);
494 }
495 // Adds other into this for computing totals.
496 void ErrorCounter::Counts::operator+=(const Counts &other) {
497  for (int ct = 0; ct < CT_SIZE; ++ct) {
498  n[ct] += other.n[ct];
499  }
500 }
501 
502 } // namespace tesseract.
const double kRatingEpsilon
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int IntCastRounded(double x)
Definition: helpers.h:175
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:372
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ UNICHAR_BROKEN
Definition: unicharset.h:38
@ UNICHAR_JOINED
Definition: unicharset.h:37
@ CT_UNICHAR_TOPN_ERR
Definition: errorcounter.h:76
@ CT_UNICHAR_TOP_OK
Definition: errorcounter.h:70
@ CT_UNICHAR_TOP1_ERR
Definition: errorcounter.h:74
@ CT_UNICHAR_TOP2_ERR
Definition: errorcounter.h:75
@ CT_OK_MULTI_FONT
Definition: errorcounter.h:83
@ CT_REJECTED_JUNK
Definition: errorcounter.h:86
@ CT_UNICHAR_TOPTOP_ERR
Definition: errorcounter.h:77
@ CT_FONT_ATTR_ERR
Definition: errorcounter.h:82
@ CT_ACCEPTED_JUNK
Definition: errorcounter.h:87
@ CT_OK_MULTI_UNICHAR
Definition: errorcounter.h:78
@ CT_NUM_RESULTS
Definition: errorcounter.h:84
void print() const
Definition: statistc.cpp:548
void add(int32_t value, int32_t count)
Definition: statistc.cpp:99
bool has_special_codes() const
Definition: unicharset.h:757
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
size_t size() const
Definition: unicharset.h:355
virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector< UnicharRating > *results)
virtual const UNICHARSET & GetUnicharset() const
void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id)
UNICHAR_ID class_id() const
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const std::vector< Image > &page_images, SampleIterator *it)
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const std::vector< Image > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, std::string *fonts_report)
const TrainingSampleSet * sample_set() const
TrainingSample * MutableSample() const
std::string SampleToString(const TrainingSample &sample) const