#include <tessdatamanager.h>
|
| TessdataManager () |
|
| TessdataManager (FileReader reader) |
|
| ~TessdataManager ()=default |
|
bool | swap () const |
|
bool | is_loaded () const |
|
void | LoadFileLater (const char *data_file_name) |
|
bool | Init (const char *data_file_name) |
|
bool | LoadMemBuffer (const char *name, const char *data, int size) |
|
void | OverwriteEntry (TessdataType type, const char *data, int size) |
|
bool | SaveFile (const char *filename, FileWriter writer) const |
|
void | Serialize (std::vector< char > *data) const |
|
void | Clear () |
|
void | Directory () const |
|
bool | IsComponentAvailable (TessdataType type) const |
|
bool | GetComponent (TessdataType type, TFile *fp) |
|
bool | GetComponent (TessdataType type, TFile *fp) const |
|
std::string | VersionString () const |
|
void | SetVersionString (const std::string &v_str) |
|
bool | IsBaseAvailable () const |
|
bool | IsLSTMAvailable () const |
|
const std::string & | GetDataFileName () const |
|
bool | CombineDataFiles (const char *language_data_path_prefix, const char *output_filename) |
|
bool | OverwriteComponents (const char *new_traineddata_filename, char **component_filenames, int num_new_components) |
|
bool | ExtractToFile (const char *filename) |
|
Definition at line 127 of file tessdatamanager.h.
◆ TessdataManager() [1/2]
tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
Definition at line 42 of file tessdatamanager.cpp.
42 : reader_(
nullptr), is_loaded_(
false), swap_(
false) {
#define TESSERACT_VERSION_STR
void SetVersionString(const std::string &v_str)
◆ TessdataManager() [2/2]
tesseract::TessdataManager::TessdataManager |
( |
FileReader |
reader | ) |
|
|
explicit |
Definition at line 46 of file tessdatamanager.cpp.
47 : reader_(reader), is_loaded_(
false), swap_(
false) {
◆ ~TessdataManager()
tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
default |
◆ Clear()
void tesseract::TessdataManager::Clear |
( |
| ) |
|
◆ CombineDataFiles()
bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 258 of file tessdatamanager.cpp.
261 for (
auto filesuffix : kTessdataFileSuffixes) {
263 ASSERT_HOST(TessdataTypeFromFileSuffix(filesuffix, &type));
264 std::string filename = language_data_path_prefix;
265 filename += filesuffix;
266 FILE *fp = fopen(filename.c_str(),
"rb");
270 tprintf(
"Load of file %s failed!\n", filename.c_str());
280 "Error: traineddata file must contain at least (a unicharset file"
281 "and inttemp) OR an lstm file.\n");
285 return SaveFile(output_filename,
nullptr);
void tprintf(const char *format,...)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool IsLSTMAvailable() const
bool SaveFile(const char *filename, FileWriter writer) const
bool IsBaseAvailable() const
◆ Directory()
void tesseract::TessdataManager::Directory |
( |
| ) |
const |
Definition at line 213 of file tessdatamanager.cpp.
217 if (!entries_[i].empty()) {
218 tprintf(
"%u:%s:size=%zu, offset=%zu\n", i, kTessdataFileSuffixes[i], entries_[i].size(),
220 offset += entries_[i].size();
std::string VersionString() const
◆ ExtractToFile()
bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 306 of file tessdatamanager.cpp.
308 ASSERT_HOST(tesseract::TessdataManager::TessdataTypeFromFileName(filename, &type));
309 if (entries_[type].empty()) {
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
◆ GetComponent() [1/2]
Definition at line 227 of file tessdatamanager.cpp.
228 if (!is_loaded_ && !
Init(data_file_name_.c_str())) {
232 return const_this->GetComponent(type, fp);
bool Init(const char *data_file_name)
◆ GetComponent() [2/2]
bool tesseract::TessdataManager::GetComponent |
( |
TessdataType |
type, |
|
|
TFile * |
fp |
|
) |
| const |
Definition at line 237 of file tessdatamanager.cpp.
239 if (entries_[type].empty()) {
242 fp->Open(&entries_[type][0], entries_[type].size());
◆ GetDataFileName()
const std::string& tesseract::TessdataManager::GetDataFileName |
( |
| ) |
const |
|
inline |
◆ Init()
bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name | ) |
|
Opens and reads the given data file right now.
- Returns
- true on success.
Definition at line 90 of file tessdatamanager.cpp.
91 std::vector<char> data;
92 if (reader_ ==
nullptr) {
93 #if defined(HAVE_LIBARCHIVE)
94 if (LoadArchiveFile(data_file_name)) {
102 if (!(*reader_)(data_file_name, &data)) {
bool LoadMemBuffer(const char *name, const char *data, int size)
◆ is_loaded()
bool tesseract::TessdataManager::is_loaded |
( |
| ) |
const |
|
inline |
◆ IsBaseAvailable()
bool tesseract::TessdataManager::IsBaseAvailable |
( |
| ) |
const |
|
inline |
◆ IsComponentAvailable()
bool tesseract::TessdataManager::IsComponentAvailable |
( |
TessdataType |
type | ) |
const |
|
inline |
◆ IsLSTMAvailable()
bool tesseract::TessdataManager::IsLSTMAvailable |
( |
| ) |
const |
|
inline |
◆ LoadFileLater()
void tesseract::TessdataManager::LoadFileLater |
( |
const char * |
data_file_name | ) |
|
◆ LoadMemBuffer()
bool tesseract::TessdataManager::LoadMemBuffer |
( |
const char * |
name, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 110 of file tessdatamanager.cpp.
113 data_file_name_ = name;
116 uint32_t num_entries;
117 if (!fp.DeSerialize(&num_entries)) {
120 swap_ = num_entries > kMaxNumTessdataEntries;
123 ReverseN(&num_entries,
sizeof(num_entries));
125 if (num_entries > kMaxNumTessdataEntries) {
129 std::vector<int64_t> offset_table(num_entries);
130 if (!fp.DeSerialize(&offset_table[0], num_entries)) {
134 if (offset_table[i] >= 0) {
135 int64_t entry_size = size - offset_table[i];
137 while (j < num_entries && offset_table[j] == -1) {
140 if (j < num_entries) {
141 entry_size = offset_table[j] - offset_table[i];
143 entries_[i].resize(entry_size);
144 if (!fp.DeSerialize(&entries_[i][0], entry_size)) {
void ReverseN(void *ptr, int num_bytes)
◆ OverwriteComponents()
bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 288 of file tessdatamanager.cpp.
292 for (
int i = 0; i < num_new_components; ++i) {
294 if (TessdataTypeFromFileName(component_filenames[i], &type)) {
296 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
303 return SaveFile(new_traineddata_filename,
nullptr);
◆ OverwriteEntry()
void tesseract::TessdataManager::OverwriteEntry |
( |
TessdataType |
type, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 157 of file tessdatamanager.cpp.
159 entries_[type].resize(size);
160 memcpy(&entries_[type][0], data, size);
◆ SaveFile()
bool tesseract::TessdataManager::SaveFile |
( |
const char * |
filename, |
|
|
FileWriter |
writer |
|
) |
| const |
Definition at line 164 of file tessdatamanager.cpp.
167 std::vector<char> data;
169 if (writer ==
nullptr) {
172 return (*writer)(data, filename);
void Serialize(std::vector< char > *data) const
◆ Serialize()
void tesseract::TessdataManager::Serialize |
( |
std::vector< char > * |
data | ) |
const |
Definition at line 177 of file tessdatamanager.cpp.
182 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
184 if (entries_[i].empty()) {
185 offset_table[i] = -1;
187 offset_table[i] = offset;
188 offset += entries_[i].size();
191 data->resize(offset, 0);
195 fp.Serialize(&num_entries);
196 fp.Serialize(&offset_table[0],
countof(offset_table));
197 for (
const auto &entry : entries_) {
198 if (!entry.empty()) {
199 fp.Serialize(&entry[0], entry.size());
constexpr size_t countof(T const (&)[N]) noexcept
◆ SetVersionString()
void tesseract::TessdataManager::SetVersionString |
( |
const std::string & |
v_str | ) |
|
◆ swap()
bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
◆ VersionString()
std::string tesseract::TessdataManager::VersionString |
( |
| ) |
const |
The documentation for this class was generated from the following files: