tesseract  5.0.0
apiexample_test.cc
Go to the documentation of this file.
1 // File: apiexample_test.cc
3 // Description: Api Test for Tesseract using text fixtures and parameters.
4 // Tests for Devanagari, Latin and Arabic scripts are disabled by default.
5 // Disabled tests can be run when required by using the
6 // --gtest_also_run_disabled_tests argument.
7 // ./unittest/apiexample_test --gtest_also_run_disabled_tests
8 //
9 // Author: ShreeDevi Kumar
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 // http://www.apache.org/licenses/LICENSE-2.0
15 // Unless required by applicable law or agreed to in writing, software
16 // distributed under the License is distributed on an "AS IS" BASIS,
17 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 // See the License for the specific language governing permissions and
19 // limitations under the License.
21 
22 // expects clone of tessdata_fast repo in ../../tessdata_fast
23 
24 //#include "log.h"
25 #include <allheaders.h>
26 #include <tesseract/baseapi.h>
27 #include <time.h>
28 #include <fstream>
29 #include <iostream>
30 #include <locale>
31 #include <memory> // std::unique_ptr
32 #include <string>
33 #include "include_gunit.h"
34 #include "image.h"
35 
36 namespace tesseract {
37 
38 class QuickTest : public testing::Test {
39 protected:
40  void SetUp() override {
41  start_time_ = time(nullptr);
42  }
43  void TearDown() override {
44 #ifndef NDEBUG
45  // Debug builds can be very slow, so allow 4 min for OCR of a test image.
46  // apitest_example including disabled tests takes about 18 min on ARMv7.
47  const time_t MAX_SECONDS_FOR_TEST = 240;
48 #else
49  // Release builds typically need less than 10 s for OCR of a test image,
50  // apitest_example including disabled tests takes about 90 s on ARMv7.
51  const time_t MAX_SECONDS_FOR_TEST = 55;
52 #endif
53  const time_t end_time = time(nullptr);
54  EXPECT_TRUE(end_time - start_time_ <= MAX_SECONDS_FOR_TEST)
55  << "The test took too long - " << ::testing::PrintToString(end_time - start_time_);
56  }
57  time_t start_time_;
58 };
59 
60 void OCRTester(const char *imgname, const char *groundtruth, const char *tessdatadir,
61  const char *lang) {
62  // log.info() << tessdatadir << " for language: " << lang << std::endl;
63  char *outText;
64  std::locale loc("C"); // You can also use "" for the default system locale
65  std::ifstream file(groundtruth);
66  file.imbue(loc); // Use it for file input
67  std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
68  auto api = std::make_unique<tesseract::TessBaseAPI>();
69  ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
70  Image image = pixRead(imgname);
71  ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
72  api->SetImage(image);
73  outText = api->GetUTF8Text();
74  EXPECT_EQ(gtText, outText) << "Phototest.tif OCR does not match ground truth for "
75  << ::testing::PrintToString(lang);
76  api->End();
77  api->ClearPersistentCache();
78  delete[] outText;
79  image.destroy();
80 }
81 
82 class MatchGroundTruth : public QuickTest, public ::testing::WithParamInterface<const char *> {};
83 
84 TEST_P(MatchGroundTruth, FastPhototestOCR) {
85  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_fast",
86  GetParam());
87 }
88 
89 TEST_P(MatchGroundTruth, BestPhototestOCR) {
90  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_best",
91  GetParam());
92 }
93 
94 TEST_P(MatchGroundTruth, TessPhototestOCR) {
95  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR, GetParam());
96 }
97 
98 INSTANTIATE_TEST_SUITE_P(Eng, MatchGroundTruth, ::testing::Values("eng"));
99 INSTANTIATE_TEST_SUITE_P(DISABLED_Latin, MatchGroundTruth, ::testing::Values("script/Latin"));
100 INSTANTIATE_TEST_SUITE_P(DISABLED_Deva, MatchGroundTruth, ::testing::Values("script/Devanagari"));
101 INSTANTIATE_TEST_SUITE_P(DISABLED_Arabic, MatchGroundTruth, ::testing::Values("script/Arabic"));
102 
103 class EuroText : public QuickTest {};
104 
105 TEST_F(EuroText, FastLatinOCR) {
106  OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt", TESSDATA_DIR "_fast",
107  "script/Latin");
108 }
109 
110 // script/Latin for eurotext.tif does not match groundtruth
111 // for tessdata & tessdata_best.
112 // so do not test these here.
113 
114 } // namespace tesseract
void OCRTester(const char *imgname, const char *groundtruth, const char *tessdatadir, const char *lang)
INSTANTIATE_TEST_SUITE_P(Eng, MatchGroundTruth, ::testing::Values("eng"))
TEST_P(MatchGroundTruth, FastPhototestOCR)
TEST_F(EuroText, FastLatinOCR)
void destroy()
Definition: image.cpp:32
void TearDown() override
void SetUp() override