tesseract  5.0.0
blobclass.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: blobclass.c
3  ** Purpose: High level blob classification and training routines.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 #include <cstdio>
19 
20 #include "classify.h"
21 #include "featdefs.h"
22 #include "mf.h"
23 #include "normfeat.h"
24 
25 namespace tesseract {
26 
27 /*---------------------------------------------------------------------------*/
28 
29 // Extracts features from the given blob and saves them in the tr_file_data_
30 // member variable.
31 // fontname: Name of font that this blob was printed in.
32 // cn_denorm: Character normalization transformation to apply to the blob.
33 // fx_info: Character normalization parameters computed with cn_denorm.
34 // blob_text: Ground truth text for the blob.
35 void Classify::LearnBlob(const std::string &fontname, TBLOB *blob, const DENORM &cn_denorm,
36  const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) {
37  std::unique_ptr<CHAR_DESC_STRUCT> CharDesc(new CHAR_DESC_STRUCT(feature_defs_));
38  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
39  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
40  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
41  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
42 
43  if (ValidCharDescription(feature_defs_, CharDesc.get())) {
44  // Label the features with a class name and font name.
45  tr_file_data_ += "\n";
46  tr_file_data_ += fontname;
47  tr_file_data_ += " ";
48  tr_file_data_ += blob_text;
49  tr_file_data_ += "\n";
50 
51  // write micro-features to file and clean up
52  WriteCharDescription(feature_defs_, CharDesc.get(), tr_file_data_);
53  } else {
54  tprintf("Blob learned was invalid!\n");
55  }
56 } // LearnBlob
57 
58 // Writes stored training data to a .tr file based on the given filename.
59 // Returns false on error.
60 bool Classify::WriteTRFile(const char *filename) {
61  bool result = false;
62  std::string tr_filename = filename;
63  tr_filename += ".tr";
64  FILE *fp = fopen(tr_filename.c_str(), "wb");
65  if (fp) {
66  result = tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length());
67  fclose(fp);
68  }
69  tr_file_data_.resize(0);
70  return result;
71 }
72 
73 } // namespace tesseract
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc)
Definition: featdefs.cpp:131
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str)
Definition: featdefs.cpp:109
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mf.cpp:41
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:251
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:56
void LearnBlob(const std::string &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:35
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:447
bool WriteTRFile(const char *filename)
Definition: blobclass.cpp:60
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:204
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:234