28 #ifndef DISABLED_LEGACY_ENGINE
96 #ifndef DISABLED_LEGACY_ENGINE
97 const std::vector<ScoredFont> &
fonts()
const {
102 int score1 = 0, score2 = 0;
105 for (
auto &f : fonts_) {
106 if (f.score > score1) {
108 fontinfo_id2_ = fontinfo_id_;
110 fontinfo_id_ = f.fontinfo_id;
111 }
else if (f.score > score2) {
113 fontinfo_id2_ = f.fontinfo_id;
145 unichar_id_ = newunichar_id;
154 script_id_ = newscript_id;
157 matrix_cell_.
col = col;
158 matrix_cell_.
row = row;
173 tprintf(
"r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
174 unichar_id_, (unicharset ==
nullptr) ?
"" : unicharset->
debug_str(unichar_id_).c_str());
178 tprintf(
" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
179 fontinfo_id2_, yshift_, classifier_);
185 return (bc1->rating_ < bc2->rating_) ? -1 : 1;
193 #ifndef DISABLED_LEGACY_ENGINE
195 std::vector<ScoredFont> fonts_;
197 int16_t fontinfo_id_;
198 int16_t fontinfo_id2_;
257 static const char *permuter_name(uint8_t permuter);
263 this->init(reserved);
265 WERD_CHOICE(
const char *src_string,
const char *src_lengths,
float src_rating,
266 float src_certainty, uint8_t src_permuter,
const UNICHARSET &unicharset)
267 : unicharset_(&unicharset) {
268 this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter);
272 this->init(word.
length());
273 this->operator=(word);
287 return adjust_factor_;
290 adjust_factor_ = factor;
296 assert(index < length_);
297 return unichar_ids_[index];
299 inline unsigned state(
unsigned index)
const {
300 return state_[index];
303 if (index >= length_) {
306 return script_pos_[index];
315 return certainties_[index];
318 return min_x_height_;
321 return max_x_height_;
324 min_x_height_ = min_height;
325 max_x_height_ = max_height;
330 const char *permuter_name()
const;
334 BLOB_CHOICE_LIST *blob_choices(
unsigned index,
MATRIX *ratings)
const;
341 assert(index < length_);
342 unichar_ids_[index] = unichar_id;
345 return dangerous_ambig_found_;
348 dangerous_ambig_found_ = value;
354 certainty_ = new_val;
374 unichar_ids_.resize(reserved_);
375 script_pos_.resize(reserved_);
376 state_.resize(reserved_);
377 certainties_.resize(reserved_);
382 inline void init(
unsigned reserved) {
383 reserved_ = reserved;
385 unichar_ids_.resize(reserved);
386 script_pos_.resize(reserved);
387 state_.resize(reserved);
388 certainties_.resize(reserved);
390 unichar_ids_.clear();
393 certainties_.clear();
396 adjust_factor_ = 1.0f;
398 certainty_ = FLT_MAX;
399 min_x_height_ = 0.0f;
400 max_x_height_ = FLT_MAX;
402 unichars_in_script_order_ =
false;
403 dangerous_ambig_found_ =
false;
411 void init(
const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
412 uint8_t src_permuter);
417 rating_ = kBadRating;
418 certainty_ = -FLT_MAX;
426 assert(reserved_ > length_);
428 this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_ - 1);
431 void append_unichar_id(
UNICHAR_ID unichar_id,
int blob_count,
float rating,
float certainty);
435 assert(index < length_);
436 unichar_ids_[index] = unichar_id;
437 state_[index] = blob_count;
438 certainties_[index] = certainty;
441 if (certainty < certainty_) {
442 certainty_ = certainty;
447 void set_blob_choice(
unsigned index,
int blob_count,
const BLOB_CHOICE *blob_choice);
449 bool contains_unichar_id(
UNICHAR_ID unichar_id)
const;
450 void remove_unichar_ids(
unsigned index,
int num);
455 this->remove_unichar_ids(index, 1);
457 bool has_rtl_unichar_id()
const;
458 void reverse_and_mirror_unichar_ids();
463 void punct_stripped(
unsigned *start_core,
unsigned *end_core)
const;
468 void GetNonSuperscriptSpan(
int *start,
int *end)
const;
472 WERD_CHOICE shallow_copy(
unsigned start,
unsigned end)
const;
474 void string_and_lengths(std::string *word_str, std::string *word_lengths_str)
const;
476 std::string word_str;
477 for (
unsigned i = 0; i < length_; ++i) {
478 word_str += unicharset_->debug_str(unichar_ids_[i]);
485 for (
unsigned i = 0; i < length_; ++i) {
486 if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
494 for (
unsigned i = 0; i < length_; ++i) {
506 return unichars_in_script_order_ = in_script_order;
510 return unichars_in_script_order_;
516 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
517 return unichar_string_;
523 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
524 return unichar_string_;
530 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
531 return unichar_lengths_;
539 void SetScriptPositions(
bool small_caps,
TWERD *word,
int debug = 0);
541 void SetAllScriptPositions(
ScriptPos position);
549 int GetTopScriptID()
const;
552 void UpdateStateForSplit(
int blob_position);
555 unsigned TotalOfStates()
const;
560 void print(
const char *msg)
const;
562 void print_state(
const char *msg)
const;
566 void DisplaySegmentation(
TWERD *word);
590 std::vector<UNICHAR_ID> unichar_ids_;
591 std::vector<ScriptPos> script_pos_;
592 std::vector<int> state_;
593 std::vector<float> certainties_;
597 float adjust_factor_;
611 bool unichars_in_script_order_;
613 bool dangerous_ambig_found_;
617 mutable std::string unichar_string_;
618 mutable std::string unichar_lengths_;
631 BLOB_CHOICE_LIST *ratings,
#define ELISTIZEH(CLASSNAME)
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
void tprintf(const char *format,...)
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
const char * ScriptPosToString(enum ScriptPos script_pos)
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
int16_t fontinfo_id2() const
void set_certainty(float newrat)
void set_unichar_id(UNICHAR_ID newunichar_id)
UNICHAR_ID unichar_id() const
void set_classifier(BlobChoiceClassifier classifier)
void print(const UNICHARSET *unicharset) const
void set_matrix_cell(int col, int row)
void set_fonts(const std::vector< ScoredFont > &fonts)
static int SortByRating(const void *p1, const void *p2)
const std::vector< ScoredFont > & fonts() const
void set_script(int newscript_id)
float min_xheight() const
float max_xheight() const
BlobChoiceClassifier classifier() const
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
const MATRIX_COORD & matrix_cell()
bool IsClassified() const
int16_t fontinfo_id() const
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
void set_rating(float newrat)
float max_x_height() const
std::string debug_string() const
bool set_unichars_in_script_order(bool in_script_order)
float certainty(unsigned index) const
void set_dangerous_ambig_found_(bool value)
void remove_unichar_id(unsigned index)
void set_x_heights(float min_height, float max_height)
void set_unichar_id(UNICHAR_ID unichar_id, unsigned index)
WERD_CHOICE(const UNICHARSET *unicharset)
bool unichars_in_script_order() const
const std::vector< UNICHAR_ID > & unichar_ids() const
UNICHAR_ID unichar_id(unsigned index) const
static const float kBadRating
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
void make_bad()
Set the fields in this choice to be default (bad) values.
bool dangerous_ambig_found() const
const std::string & unichar_string() const
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
void set_certainty(float new_val)
void set_length(unsigned len)
WERD_CHOICE(const WERD_CHOICE &word)
bool ContainsAnyNonSpaceDelimited() const
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
void init(unsigned reserved)
unsigned state(unsigned index) const
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
void set_permuter(uint8_t perm)
const std::string & unichar_lengths() const
float min_x_height() const
const UNICHARSET * unicharset() const
void remove_last_unichar_id()
void set_adjust_factor(float factor)
float adjust_factor() const
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, unsigned index)
std::string & unichar_string()
void set_rating(float new_val)
ScriptPos BlobPosition(unsigned index) const
std::string debug_str(UNICHAR_ID id) const