Definition at line 24 of file unicharcompress_test.cc.
◆ CheckCodeExtensions()
void tesseract::UnicharcompressTest::CheckCodeExtensions |
( |
const RecodedCharID & |
code, |
|
|
const std::vector< RecodedCharID > & |
times_seen |
|
) |
| |
|
inlineprotected |
Definition at line 135 of file unicharcompress_test.cc.
137 RecodedCharID extended = code;
138 int length = code.length();
140 if (final_codes !=
nullptr) {
141 for (
int ending : *final_codes) {
142 EXPECT_GT(times_seen[ending](length), 0);
143 extended.Set(length, ending);
145 EXPECT_NE(INVALID_UNICHAR_ID, unichar_id);
149 if (next_codes !=
nullptr) {
150 for (
int extension : *next_codes) {
151 EXPECT_GT(times_seen[extension](length), 0);
152 extended.Set(length, extension);
const std::vector< int > * GetFinalCodes(const RecodedCharID &code) const
const std::vector< int > * GetNextCodes(const RecodedCharID &code) const
int DecodeUnichar(const RecodedCharID &code) const
UnicharCompress compressed_
void CheckCodeExtensions(const RecodedCharID &code, const std::vector< RecodedCharID > ×_seen)
◆ ExpectCorrect()
void tesseract::UnicharcompressTest::ExpectCorrect |
( |
const std::string & |
lang | ) |
|
|
inlineprotected |
Definition at line 75 of file unicharcompress_test.cc.
83 std::vector<RecodedCharID> times_seen(code_range, zeros);
94 std::vector<UNICHAR_ID> normed_ids;
102 for (
int i = 0; i < len; ++i) {
103 int code_val = code(i);
104 EXPECT_GE(code_val, 0);
105 EXPECT_LT(code_val, code_range);
106 times_seen[code_val].Set(i, times_seen[code_val](i) + 1);
110 for (
int c = 0; c < code_range; ++c) {
113 if (times_seen[c](i) != 0) {
117 EXPECT_GE(num_used, 1) <<
"c=" << c <<
"/" << code_range;
@ SPECIAL_UNICHAR_CODES_COUNT
static const int kMaxCodeLen
int EncodeUnichar(unsigned unichar_id, RecodedCharID *code) const
bool has_special_codes() const
bool IsIndicLang(const std::string &lang)
bool IsCJKLang(const std::string &lang)
◆ IsCJKLang()
bool tesseract::UnicharcompressTest::IsCJKLang |
( |
const std::string & |
lang | ) |
|
|
inlineprotected |
Definition at line 63 of file unicharcompress_test.cc.
64 return lang ==
"chi_sim" || lang ==
"chi_tra" || lang ==
"kor" || lang ==
"jpn";
◆ IsIndicLang()
bool tesseract::UnicharcompressTest::IsIndicLang |
( |
const std::string & |
lang | ) |
|
|
inlineprotected |
Definition at line 67 of file unicharcompress_test.cc.
68 return lang ==
"asm" || lang ==
"ben" || lang ==
"bih" || lang ==
"hin" || lang ==
"mar" ||
69 lang ==
"nep" || lang ==
"san" || lang ==
"bod" || lang ==
"dzo" || lang ==
"guj" ||
70 lang ==
"kan" || lang ==
"mal" || lang ==
"ori" || lang ==
"pan" || lang ==
"sin" ||
71 lang ==
"tam" || lang ==
"tel";
◆ LoadUnicharset()
void tesseract::UnicharcompressTest::LoadUnicharset |
( |
const std::string & |
unicharset_name | ) |
|
|
inlineprotected |
Definition at line 32 of file unicharcompress_test.cc.
33 std::string radical_stroke_file =
file::JoinPath(LANGDATA_DIR,
"radical-stroke.txt");
34 std::string unicharset_file =
file::JoinPath(TESTDATA_DIR, unicharset_name);
35 std::string radical_data;
38 std::string radical_str(radical_data.c_str());
45 std::string output_name =
46 file::JoinPath(FLAGS_test_tmpdir, unicharset_name) +
".encoding.txt";
48 std::string encoding_str(&encoding[0], encoding.size());
50 LOG(
INFO) <<
"Wrote encoding to:" << output_name;
std::string GetEncodingAsString(const UNICHARSET &unicharset) const
bool ComputeEncoding(const UNICHARSET &unicharset, int null_id, std::string *radical_stroke_table)
bool load_from_file(const char *const filename, bool skip_fragments)
static std::string JoinPath(const std::string &s1, const std::string &s2)
static bool SetContents(const std::string &name, const std::string &contents, bool)
static bool GetContents(const std::string &filename, std::string *out, int)
◆ SerializeAndUndo()
void tesseract::UnicharcompressTest::SerializeAndUndo |
( |
| ) |
|
|
inlineprotected |
Definition at line 53 of file unicharcompress_test.cc.
54 std::vector<char> data;
59 rfp.Open(&data[0], data.size());
bool DeSerialize(TFile *fp)
bool Serialize(TFile *fp) const
◆ SetUp()
void tesseract::UnicharcompressTest::SetUp |
( |
| ) |
|
|
inlineoverrideprotected |
◆ compressed_
◆ encoded_null_char_
int tesseract::UnicharcompressTest::encoded_null_char_ |
|
protected |
◆ null_char_
int tesseract::UnicharcompressTest::null_char_ |
|
protected |
◆ unicharset_
UNICHARSET tesseract::UnicharcompressTest::unicharset_ |
|
protected |
The documentation for this class was generated from the following file: