tesseract  5.0.0
validate_khmer_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "include_gunit.h"
13 #include "normstrngs.h"
14 #include "normstrngs_test.h"
15 
16 namespace tesseract {
17 
18 // Test some random Khmer words.
19 TEST(ValidateKhmerTest, GoodKhmerWords) {
20  std::string str = "ព័ត៏មានប្លែកៗ";
22  str = "ទំនុកច្រៀង";
24  str = "កាលីហ្វូញ៉ា";
26  str = "ចាប់ពីផ្លូវ";
28 }
29 
30 // Test some random Khmer words with dotted circles.
31 TEST(ValidateKhmerTest, BadKhmerWords) {
32  std::string result;
33  // Multiple dependent vowels not allowed
34  std::string str = "\u1796\u17b6\u17b7";
36  str.c_str(), &result));
37  // Multiple shifters not allowed
38  str = "\u1798\u17c9\u17ca";
40  str.c_str(), &result));
41  // Multiple signs not allowed
42  str = "\u1780\u17b6\u17cb\u17cd";
44  str.c_str(), &result));
45 }
46 
47 } // namespace tesseract
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:152
TEST(TesseractInstanceTest, TestMultipleTessInstances)