tesseract  5.0.0
validate_myanmar_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "include_gunit.h"
13 #include "normstrngs.h"
14 #include "normstrngs_test.h"
15 
16 namespace tesseract {
17 
18 // Test some random Myanmar words.
19 TEST(ValidateMyanmarTest, GoodMyanmarWords) {
20  std::string str = "လျှာကသိသည် "; // No viramas in this one.
22  str = "တုန္လႈပ္မႈ ";
24 }
25 
26 // Test some random Myanmar words with dotted circles.
27 TEST(ValidateMyanmarTest, BadMyanmarWords) {
28  std::string str = "က်န္းမာေရး";
29  std::vector<std::string> glyphs;
31  GraphemeNormMode::kCombined, true, str.c_str(),
32  &glyphs));
33  std::string result;
35  str.c_str(), &result));
36  // It works if the grapheme normalization is turned off.
38  str.c_str(), &result));
39  EXPECT_EQ(str, result);
40  str = "ခုႏွစ္";
42  GraphemeNormMode::kGlyphSplit, true, str.c_str(),
43  &glyphs));
45  str.c_str(), &result));
46  // It works if the grapheme normalization is turned off.
48  str.c_str(), &result));
49  EXPECT_EQ(str, result);
50 }
51 
52 } // namespace tesseract
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
Definition: normstrngs.cpp:179
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:152
TEST(TesseractInstanceTest, TestMultipleTessInstances)