tesseract  5.0.0
stringrenderer_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "include_gunit.h"
13 
14 #include "boxchar.h"
15 #include "boxread.h"
16 #include "commandlineflags.h"
17 #include "stringrenderer.h"
18 
19 #include <allheaders.h>
20 
21 #include <memory>
22 #include <string>
23 
24 BOOL_PARAM_FLAG(display, false, "Display image for inspection");
25 
26 namespace tesseract {
27 
28 const char kEngText[] = "the quick brown fox jumps over the lazy dog";
29 const char kHinText[] = "पिताने विवाह की | हो गई उद्विग्न वह सोचा";
30 
31 const char kKorText[] = "이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는";
32 const char kArabicText[] =
33  "والفكر والصراع ، بالتأمل والفهم والتحليل ، "
34  "بالعلم والفن ، وأخيرا بالضحك أوبالبكاء ، ";
35 const char kMixedText[] = "والفكر 123 والصراع abc";
36 
37 const char kEngNonLigatureText[] = "fidelity";
38 // Same as kEngNonLigatureText, but with "fi" replaced with its ligature.
39 const char kEngLigatureText[] = "fidelity";
40 
41 static PangoFontMap *font_map;
42 
43 class StringRendererTest : public ::testing::Test {
44 protected:
45  void SetUp() override {
46  if (!font_map) {
47  font_map = pango_cairo_font_map_new_for_font_type(CAIRO_FONT_TYPE_FT);
48  }
49  pango_cairo_font_map_set_default(PANGO_CAIRO_FONT_MAP(font_map));
50  }
51 
52  static void SetUpTestCase() {
53  static std::locale system_locale("");
54  std::locale::global(system_locale);
55 
56  l_chooseDisplayProg(L_DISPLAY_WITH_XZGV);
57  FLAGS_fonts_dir = TESTING_DIR;
58  FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
60  PangoFontInfo::SoftInitFontConfig(); // init early
61  }
62 
64  if (!FLAGS_display) {
65  return;
66  }
67  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
68  Boxa *boxes = boxaCreate(0);
69  for (const auto &boxchar : boxchars) {
70  if (boxchar->box()) {
71  boxaAddBox(boxes, const_cast<Box *>(boxchar->box()), L_CLONE);
72  }
73  }
74  Image box_pix = pixDrawBoxaRandom(pix, boxes, 1);
75  boxaDestroy(&boxes);
76  pixDisplay(box_pix, 0, 0);
77  box_pix.destroy();
78  }
79  std::unique_ptr<StringRenderer> renderer_;
80 };
81 
82 TEST_F(StringRendererTest, DoesRenderToImage) {
83  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
84  Image pix = nullptr;
85  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
86  EXPECT_TRUE(pix != nullptr);
87  EXPECT_GT(renderer_->GetBoxes().size(), 0);
88  DisplayClusterBoxes(pix);
89  pix.destroy();
90 
91  renderer_ = std::make_unique<StringRenderer>("UnBatang 10", 600, 600);
92  EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
93  EXPECT_GT(renderer_->GetBoxes().size(), 0);
94  DisplayClusterBoxes(pix);
95  pix.destroy();
96 
97  renderer_ = std::make_unique<StringRenderer>("Lohit Hindi 10", 600, 600);
98  EXPECT_EQ(strlen(kHinText), renderer_->RenderToImage(kHinText, strlen(kHinText), &pix));
99  EXPECT_GT(renderer_->GetBoxes().size(), 0);
100  DisplayClusterBoxes(pix);
101  pix.destroy();
102 
103  // RTL text
104  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
105  EXPECT_EQ(strlen(kArabicText), renderer_->RenderToImage(kArabicText, strlen(kArabicText), &pix));
106  EXPECT_TRUE(pix != nullptr);
107  EXPECT_GT(renderer_->GetBoxes().size(), 0);
108  DisplayClusterBoxes(pix);
109  pix.destroy();
110 
111  // Mixed direction Arabic + english text
112  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
113  EXPECT_EQ(strlen(kMixedText), renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix));
114  EXPECT_TRUE(pix != nullptr);
115  EXPECT_GT(renderer_->GetBoxes().size(), 0);
116  DisplayClusterBoxes(pix);
117  pix.destroy();
118 }
119 
120 TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) {
121  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
122  // Underline all words but NOT intervening spaces.
123  renderer_->set_underline_start_prob(1.0);
124  renderer_->set_underline_continuation_prob(0);
125  Image pix = nullptr;
126  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
127  EXPECT_TRUE(pix != nullptr);
128  EXPECT_GT(renderer_->GetBoxes().size(), 0);
129  DisplayClusterBoxes(pix);
130  pix.destroy();
131  renderer_->ClearBoxes();
132 
133  // Underline all words AND intervening spaces.
134  renderer_->set_underline_start_prob(1.0);
135  renderer_->set_underline_continuation_prob(1.0);
136  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
137  EXPECT_TRUE(pix != nullptr);
138  EXPECT_GT(renderer_->GetBoxes().size(), 0);
139  DisplayClusterBoxes(pix);
140  pix.destroy();
141  renderer_->ClearBoxes();
142 
143  // Underline words and intervening spaces with 0.5 prob.
144  renderer_->set_underline_start_prob(0.5);
145  renderer_->set_underline_continuation_prob(0.5);
146  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
147  EXPECT_TRUE(pix != nullptr);
148  EXPECT_GT(renderer_->GetBoxes().size(), 0);
149  DisplayClusterBoxes(pix);
150  pix.destroy();
151 }
152 
153 TEST_F(StringRendererTest, DoesHandleNewlineCharacters) {
154  const char kRawText[] = "\n\n\n A \nB \nC \n\n\n";
155  const char kStrippedText[] = " A B C "; // text with newline chars removed
156  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
157  Image pix = nullptr;
158  EXPECT_EQ(strlen(kRawText), renderer_->RenderToImage(kRawText, strlen(kRawText), &pix));
159  EXPECT_TRUE(pix != nullptr);
160  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
161  // 3 characters + 4 spaces => 7 boxes
162  EXPECT_EQ(7, boxchars.size());
163  if (boxchars.size() == 7) {
164  // Verify the text content of the boxchars
165  for (size_t i = 0; i < boxchars.size(); ++i) {
166  EXPECT_EQ(std::string(1, kStrippedText[i]), boxchars[i]->ch());
167  }
168  }
169  DisplayClusterBoxes(pix);
170  pix.destroy();
171 }
172 
173 TEST_F(StringRendererTest, DoesRenderLigatures) {
174  renderer_ = std::make_unique<StringRenderer>("Arab 12", 600, 250);
175  const char kArabicLigature[] = "لا";
176 
177  Image pix = nullptr;
178  EXPECT_EQ(strlen(kArabicLigature),
179  renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix));
180  EXPECT_TRUE(pix != nullptr);
181  EXPECT_GT(renderer_->GetBoxes().size(), 0);
182  const std::vector<BoxChar *> &boxes = renderer_->GetBoxes();
183  EXPECT_EQ(1, boxes.size());
184  EXPECT_TRUE(boxes[0]->box() != nullptr);
185  EXPECT_STREQ(kArabicLigature, boxes[0]->ch().c_str());
186  DisplayClusterBoxes(pix);
187  pix.destroy();
188 
189  renderer_ = std::make_unique<StringRenderer>("Arab 12", 600, 250);
190  const char kArabicMixedText[] = "والفكر والصراع 1234,\nوالفكر لا والصراع";
191  renderer_->RenderToImage(kArabicMixedText, strlen(kArabicMixedText), &pix);
192  DisplayClusterBoxes(pix);
193  pix.destroy();
194 }
195 
196 static int FindBoxCharXCoord(const std::vector<BoxChar *> &boxchars, const std::string &ch) {
197  for (const auto &boxchar : boxchars) {
198  if (boxchar->ch() == ch) {
199  return boxchar->box()->x;
200  }
201  }
202  return INT_MAX;
203 }
204 
205 TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
206  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
207  Image pix = nullptr;
208  // Arabic letters should be in decreasing x-coordinates
209  const char kArabicWord[] = "\u0644\u0627\u0641\u0643\u0631";
210  const std::string kRevWord = "\u0631\u0643\u0641\u0627\u0644";
211  renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
212  std::string boxes_str = renderer_->GetBoxesStr();
213  // Decode to get the box text strings.
214  EXPECT_FALSE(boxes_str.empty());
215  std::vector<std::string> texts;
216  EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts, nullptr, nullptr));
217  std::string ltr_str;
218  for (auto &text : texts) {
219  ltr_str += text.c_str();
220  }
221  // The string should come out perfectly reversed, despite there being a
222  // ligature.
223  EXPECT_EQ(ltr_str, kRevWord);
224  // Just to prove there was a ligature, the number of texts is less than the
225  // number of unicodes.
226  EXPECT_LT(texts.size(), 5);
227  pix.destroy();
228 }
229 
230 TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) {
231  renderer_ = std::make_unique<StringRenderer>("Arab 10", 600, 600);
232  Image pix = nullptr;
233  // Arabic letters should be in decreasing x-coordinates
234  const char kArabicWord[] = "والفكر";
235  renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
236  EXPECT_GT(renderer_->GetBoxes().size(), 0);
237  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
238  for (size_t i = 1; i < boxchars.size(); ++i) {
239  EXPECT_GT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch();
240  }
241  pix.destroy();
242 
243  // English letters should be in increasing x-coordinates
244  const char kEnglishWord[] = "Google";
245  renderer_->ClearBoxes();
246  renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix);
247  EXPECT_EQ(boxchars.size(), strlen(kEnglishWord));
248  for (size_t i = 1; i < boxchars.size(); ++i) {
249  EXPECT_LT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch();
250  }
251  pix.destroy();
252 
253  // Mixed text should satisfy both.
254  renderer_->ClearBoxes();
255  renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix);
256  EXPECT_LT(FindBoxCharXCoord(boxchars, "a"), FindBoxCharXCoord(boxchars, "b"));
257  EXPECT_LT(FindBoxCharXCoord(boxchars, "1"), FindBoxCharXCoord(boxchars, "2"));
258  EXPECT_GT(FindBoxCharXCoord(boxchars, "و"), FindBoxCharXCoord(boxchars, "ر"));
259  pix.destroy();
260 }
261 
262 TEST_F(StringRendererTest, DoesRenderVerticalText) {
263  Image pix = nullptr;
264  renderer_ = std::make_unique<StringRenderer>("UnBatang 10", 600, 600);
265  renderer_->set_vertical_text(true);
266  EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
267  EXPECT_GT(renderer_->GetBoxes().size(), 0);
268  DisplayClusterBoxes(pix);
269  pix.destroy();
270 }
271 
272 // Checks that we preserve charboxes across RenderToImage calls, with
273 // appropriate page numbers.
274 TEST_F(StringRendererTest, DoesKeepAllImageBoxes) {
275  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
276  Image pix = nullptr;
277  int num_boxes_per_page = 0;
278  const int kNumTrials = 2;
279  for (int i = 0; i < kNumTrials; ++i) {
280  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
281  EXPECT_TRUE(pix != nullptr);
282  pix.destroy();
283  EXPECT_GT(renderer_->GetBoxes().size(), 0);
284  if (!num_boxes_per_page) {
285  num_boxes_per_page = renderer_->GetBoxes().size();
286  } else {
287  EXPECT_EQ((i + 1) * num_boxes_per_page, renderer_->GetBoxes().size());
288  }
289  for (int j = i * num_boxes_per_page; j < (i + 1) * num_boxes_per_page; ++j) {
290  EXPECT_EQ(i, renderer_->GetBoxes()[j]->page());
291  }
292  }
293 }
294 
295 TEST_F(StringRendererTest, DoesClearBoxes) {
296  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
297  Image pix = nullptr;
298  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
299  pix.destroy();
300  EXPECT_GT(renderer_->GetBoxes().size(), 0);
301  const int num_boxes_per_page = renderer_->GetBoxes().size();
302 
303  renderer_->ClearBoxes();
304  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
305  pix.destroy();
306  EXPECT_EQ(num_boxes_per_page, renderer_->GetBoxes().size());
307 }
308 
309 TEST_F(StringRendererTest, DoesLigatureTextForRendering) {
310  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
311  renderer_->set_add_ligatures(true);
312  Image pix = nullptr;
313  EXPECT_EQ(strlen(kEngNonLigatureText),
314  renderer_->RenderToImage(kEngNonLigatureText, strlen(kEngNonLigatureText), &pix));
315  pix.destroy();
316 #if 0 // not with NFC normalization
317  // There should be one less box than letters due to the 'fi' ligature.
318  EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
319  // The output box text should be ligatured.
320  EXPECT_STREQ("fi", renderer_->GetBoxes()[0]->ch().c_str());
321 #endif
322 }
323 
324 TEST_F(StringRendererTest, DoesRetainInputLigatureForRendering) {
325  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
326  Image pix = nullptr;
327  EXPECT_EQ(strlen(kEngLigatureText),
328  renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText), &pix));
329  pix.destroy();
330  // There should be one less box than letters due to the 'fi' ligature.
331  EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
332  // The output box text should be ligatured.
333  EXPECT_STREQ("\uFB01", renderer_->GetBoxes()[0]->ch().c_str());
334 }
335 
336 TEST_F(StringRendererTest, DoesStripUnrenderableWords) {
337  // Verdana should only be able to render the english letters and numbers in
338  // the mixed text.
339  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
340  std::string text(kMixedText);
341  EXPECT_GT(renderer_->StripUnrenderableWords(&text), 0);
342  EXPECT_EQ(" 123 abc", text);
343 }
344 
345 TEST_F(StringRendererTest, DoesRenderWordBoxes) {
346  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
347  renderer_->set_output_word_boxes(true);
348  Image pix = nullptr;
349  EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
350  pix.destroy();
351  // Verify #boxchars = #words + #spaces
352  std::vector<std::string> words = split(kEngText, ' ');
353  const int kNumSpaces = words.size() - 1;
354  const int kExpectedNumBoxes = words.size() + kNumSpaces;
355  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
356  EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
357  // Verify content of words and spaces
358  for (size_t i = 0; i < boxchars.size(); i += 2) {
359  EXPECT_EQ(words[i / 2], boxchars[i]->ch());
360  if (i < boxchars.size() - 1) {
361  EXPECT_EQ(" ", boxchars[i + 1]->ch());
362  EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
363  }
364  }
365 }
366 
367 TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) {
368  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 600, 600);
369  renderer_->set_output_word_boxes(true);
370  Image pix = nullptr;
371  const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog";
372  EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
373  pix.destroy();
374  // Verify #boxchars = #words + #spaces + #newlines
375  std::vector<std::string> words;
376  for (auto &line : split(kMultlineText, '\n')) {
377  for (auto &word : split(line, ' ')) {
378  words.push_back(word);
379  }
380  }
381  const int kNumSeparators = words.size() - 1;
382  const int kExpectedNumBoxes = words.size() + kNumSeparators;
383  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
384  EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
385  // Verify content of words and spaces
386  for (size_t i = 0; i < boxchars.size(); i += 2) {
387  EXPECT_EQ(words[i / 2], boxchars[i]->ch());
388  if (i + 1 < boxchars.size()) {
389  EXPECT_EQ(" ", boxchars[i + 1]->ch());
390  EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
391  }
392  }
393 }
394 
395 TEST_F(StringRendererTest, DoesRenderAllFontsToImage) {
396  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 1200, 1200);
397  size_t offset = 0;
398  std::string font_used;
399  do {
400  Image pix = nullptr;
401  font_used.clear();
402  offset += renderer_->RenderAllFontsToImage(1.0, kEngText + offset, strlen(kEngText + offset),
403  &font_used, &pix);
404  if (offset < strlen(kEngText)) {
405  EXPECT_TRUE(pix != nullptr);
406  EXPECT_STRNE("", font_used.c_str());
407  }
408  if (FLAGS_display) {
409  pixDisplay(pix, 0, 0);
410  }
411  pix.destroy();
412  } while (offset < strlen(kEngText));
413 }
414 
415 TEST_F(StringRendererTest, DoesNotRenderWordJoiner) {
416  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 500, 200);
417  const std::string word = "A- -B C-D A BC";
418  const std::string joined_word = StringRenderer::InsertWordJoiners(word);
419  Image pix = nullptr;
420  renderer_->RenderToImage(joined_word.c_str(), joined_word.length(), &pix);
421  pix.destroy();
422  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
423  const std::string kWordJoinerUTF8 = "\u2060";
424  ASSERT_EQ(word.length(), boxchars.size());
425  for (size_t i = 0; i < boxchars.size(); ++i) {
426  EXPECT_NE(kWordJoinerUTF8, boxchars[i]->ch());
427  EXPECT_EQ(word.substr(i, 1), boxchars[i]->ch());
428  }
429 }
430 
431 TEST_F(StringRendererTest, DISABLED_DoesDropUncoveredChars) {
432  renderer_ = std::make_unique<StringRenderer>("Verdana 10", 500, 200);
433  renderer_->set_drop_uncovered_chars(true);
434  const std::string kWord = "office";
435  const std::string kCleanWord = "oice";
436  Image pix = nullptr;
437  EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
438  EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length()));
439  int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix);
440  pix.destroy();
441  const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
442  EXPECT_EQ(kWord.length(), offset);
443  ASSERT_EQ(kCleanWord.length(), boxchars.size());
444  for (size_t i = 0; i < boxchars.size(); ++i) {
445  EXPECT_EQ(kCleanWord.substr(i, 1), boxchars[i]->ch());
446  }
447 }
448 
449 // ------------ StringRenderer::ConvertBasicLatinToFullwidthLatin() ------------
450 
451 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesConvertBasicLatin) {
452  const std::string kHalfAlpha = "ABCD";
453  const std::string kFullAlpha = "ABCD";
454  EXPECT_EQ(kFullAlpha, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfAlpha));
455 
456  const std::string kHalfDigit = "0123";
457  const std::string kFullDigit = "0123";
458  EXPECT_EQ(kFullDigit, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfDigit));
459 
460  const std::string kHalfSym = "()[]:;!?";
461  const std::string kFullSym = "()[]:;!?";
462  EXPECT_EQ(kFullSym, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSym));
463 }
464 
465 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertFullwidthLatin) {
466  const std::string kFullAlpha = "ABCD";
467  EXPECT_EQ(kFullAlpha, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullAlpha));
468 
469  const std::string kFullDigit = "0123";
470  EXPECT_EQ(kFullDigit, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullDigit));
471 
472  const std::string kFullSym = "()[]:;!?";
473  EXPECT_EQ(kFullSym, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSym));
474 }
475 
476 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertNonLatin) {
477  const std::string kHalfKana = "アイウエオ";
478  const std::string kFullKana = "アイウエオ";
479  EXPECT_EQ(kHalfKana, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfKana));
480  EXPECT_EQ(kFullKana, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullKana));
481 }
482 
483 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertSpace) {
484  const std::string kHalfSpace = " ";
485  const std::string kFullSpace = " ";
486  EXPECT_EQ(kHalfSpace, StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSpace));
487  EXPECT_EQ(kFullSpace, StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSpace));
488 }
489 
490 // ------------ StringRenderer::ConvertFullwidthLatinToBasicLatin() ------------
491 
492 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesConvertFullwidthLatin) {
493  const std::string kHalfAlpha = "ABCD";
494  const std::string kFullAlpha = "ABCD";
495  EXPECT_EQ(kHalfAlpha, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullAlpha));
496 
497  const std::string kHalfDigit = "0123";
498  const std::string kFullDigit = "0123";
499  EXPECT_EQ(kHalfDigit, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullDigit));
500 
501  const std::string kHalfSym = "()[]:;!?";
502  const std::string kFullSym = "()[]:;!?";
503  EXPECT_EQ(kHalfSym, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSym));
504 }
505 
506 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertBasicLatin) {
507  const std::string kHalfAlpha = "ABCD";
508  EXPECT_EQ(kHalfAlpha, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfAlpha));
509 
510  const std::string kHalfDigit = "0123";
511  EXPECT_EQ(kHalfDigit, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfDigit));
512 
513  const std::string kHalfSym = "()[]:;!?";
514  EXPECT_EQ(kHalfSym, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSym));
515 }
516 
517 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertNonLatin) {
518  const std::string kHalfKana = "アイウエオ";
519  const std::string kFullKana = "アイウエオ";
520  EXPECT_EQ(kHalfKana, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfKana));
521  EXPECT_EQ(kFullKana, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullKana));
522 }
523 
524 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertSpace) {
525  const std::string kHalfSpace = " ";
526  const std::string kFullSpace = " ";
527  EXPECT_EQ(kHalfSpace, StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSpace));
528  EXPECT_EQ(kFullSpace, StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSpace));
529 }
530 } // namespace tesseract
BOOL_PARAM_FLAG(display, false, "Display image for inspection")
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:41
const char kArabicText[]
bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)
Definition: boxread.cpp:90
const char kMixedText[]
const char kHinText[]
const char kEngText[]
const char kEngNonLigatureText[]
const char kKorText[]
const char kEngLigatureText[]
TEST_F(EuroText, FastLatinOCR)
TEST(TesseractInstanceTest, TestMultipleTessInstances)
void destroy()
Definition: image.cpp:32
static std::string InsertWordJoiners(const std::string &text)
static std::string ConvertBasicLatinToFullwidthLatin(const std::string &text)
static std::string ConvertFullwidthLatinToBasicLatin(const std::string &text)
static void MakeTmpdir()
Definition: include_gunit.h:38
std::unique_ptr< StringRenderer > renderer_