tesseract  5.0.0
shapetable_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <string>
13 #include <utility>
14 
15 #include "include_gunit.h"
16 
17 #include "serialis.h"
18 #include "shapetable.h"
19 #include "unicharset.h"
20 
21 namespace tesseract {
22 
23 #ifndef DISABLED_LEGACY_ENGINE
24 
25 static std::string TmpNameToPath(const std::string &name) {
26  return file::JoinPath(FLAGS_test_tmpdir, name);
27 }
28 
29 // Sets up a simple shape with some unichars.
30 static void Setup352(int font_id, Shape *shape) {
31  shape->AddToShape(3, font_id);
32  shape->AddToShape(5, font_id);
33  shape->AddToShape(2, font_id);
34 }
35 
36 // Verifies some properties of the 352 shape.
37 static void Expect352(int font_id, const Shape &shape) {
38  EXPECT_EQ(3, shape.size());
39  EXPECT_TRUE(shape.ContainsUnichar(2));
40  EXPECT_TRUE(shape.ContainsUnichar(3));
41  EXPECT_TRUE(shape.ContainsUnichar(5));
42  EXPECT_FALSE(shape.ContainsUnichar(1));
43  EXPECT_TRUE(shape.ContainsUnicharAndFont(2, font_id));
44  EXPECT_FALSE(shape.ContainsUnicharAndFont(2, font_id - 1));
45  EXPECT_FALSE(shape.ContainsUnicharAndFont(font_id, 2));
46  // It should be a subset of itself.
47  EXPECT_TRUE(shape.IsSubsetOf(shape));
48 }
49 
50 #endif
51 
52 // The fixture for testing Shape.
53 class ShapeTest : public testing::Test {
54 protected:
55  void SetUp() override {
56  std::locale::global(std::locale(""));
58  }
59 };
60 
61 // Tests that a Shape works as expected for all the basic functions.
62 TEST_F(ShapeTest, BasicTest) {
63 #ifdef DISABLED_LEGACY_ENGINE
64  // Skip test because Shape is missing.
65  GTEST_SKIP();
66 #else
67  Shape shape1;
68  EXPECT_EQ(0, shape1.size());
69  Setup352(101, &shape1);
70  Expect352(101, shape1);
71  // It should still work after file I/O.
72  std::string filename = TmpNameToPath("shapefile");
73  FILE *fp = fopen(filename.c_str(), "wb");
74  ASSERT_TRUE(fp != nullptr);
75  EXPECT_TRUE(shape1.Serialize(fp));
76  fclose(fp);
77  TFile tfp;
78  EXPECT_TRUE(tfp.Open(filename.c_str(), nullptr));
79  Shape shape2;
80  EXPECT_TRUE(shape2.DeSerialize(&tfp));
81  Expect352(101, shape2);
82  // They should be subsets of each other.
83  EXPECT_TRUE(shape1.IsSubsetOf(shape2));
84  EXPECT_TRUE(shape2.IsSubsetOf(shape1));
85  // They should be equal unichars.
86  EXPECT_TRUE(shape1.IsEqualUnichars(&shape2));
87  // and still pass afterwards.
88  Expect352(101, shape1);
89  Expect352(101, shape2);
90 #endif
91 }
92 
93 // Tests AddShape separately, as it takes quite a bit of work.
94 TEST_F(ShapeTest, AddShapeTest) {
95 #ifdef DISABLED_LEGACY_ENGINE
96  // Skip test because Shape is missing.
97  GTEST_SKIP();
98 #else
99  Shape shape1;
100  Setup352(101, &shape1);
101  Expect352(101, shape1);
102  // Now setup a different shape with different content.
103  Shape shape2;
104  shape2.AddToShape(3, 101); // Duplicates shape1.
105  shape2.AddToShape(5, 110); // Different font to shape1.
106  shape2.AddToShape(7, 101); // Different unichar to shape1.
107  // They should NOT be subsets of each other.
108  EXPECT_FALSE(shape1.IsSubsetOf(shape2));
109  EXPECT_FALSE(shape2.IsSubsetOf(shape1));
110  // Now add shape2 to shape1.
111  shape1.AddShape(shape2);
112  // Test subsets again.
113  EXPECT_FALSE(shape1.IsSubsetOf(shape2));
114  EXPECT_TRUE(shape2.IsSubsetOf(shape1));
115  EXPECT_EQ(4, shape1.size());
116  EXPECT_FALSE(shape1.ContainsUnichar(1));
117  EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 101));
118  EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 110));
119  EXPECT_FALSE(shape1.ContainsUnicharAndFont(3, 110));
120  EXPECT_FALSE(shape1.ContainsUnicharAndFont(7, 110));
121  EXPECT_FALSE(shape1.IsEqualUnichars(&shape2));
122 #endif
123 }
124 
125 // The fixture for testing Shape.
126 class ShapeTableTest : public testing::Test {};
127 
128 // Tests that a Shape works as expected for all the basic functions.
129 TEST_F(ShapeTableTest, FullTest) {
130 #ifdef DISABLED_LEGACY_ENGINE
131  // Skip test because Shape is missing.
132  GTEST_SKIP();
133 #else
134  Shape shape1;
135  Setup352(101, &shape1);
136  // Build a shape table with the same data, but in separate shapes.
137  UNICHARSET unicharset;
138  unicharset.unichar_insert(" ");
139  for (int i = 1; i <= 10; ++i) {
140  char class_str[20];
141  snprintf(class_str, sizeof(class_str), "class%d", i);
142  unicharset.unichar_insert(class_str);
143  }
144  ShapeTable st(unicharset);
145  EXPECT_EQ(0, st.AddShape(3, 101));
146  EXPECT_EQ(1, st.AddShape(5, 101));
147  EXPECT_EQ(2, st.AddShape(2, 101));
148  EXPECT_EQ(3, st.NumShapes());
149  Expect352(101, shape1);
150  EXPECT_EQ(3, st.AddShape(shape1));
151  for (int i = 0; i < 3; ++i) {
152  EXPECT_FALSE(st.MutableShape(i)->IsEqualUnichars(&shape1));
153  }
154  EXPECT_TRUE(st.MutableShape(3)->IsEqualUnichars(&shape1));
155  EXPECT_TRUE(st.AnyMultipleUnichars());
156  st.DeleteShape(3);
157  EXPECT_FALSE(st.AnyMultipleUnichars());
158 
159  // Now merge to make a single shape like shape1.
160  EXPECT_EQ(1, st.MasterUnicharCount(0));
161  st.MergeShapes(0, 1);
162  EXPECT_EQ(3, st.MergedUnicharCount(1, 2));
163  st.MergeShapes(1, 2);
164  for (int i = 0; i < 3; ++i) {
165  EXPECT_EQ(3, st.MasterUnicharCount(i));
166  // Master font count is the sum of all the font counts in the shape, not
167  // the actual number of different fonts in the shape.
168  EXPECT_EQ(3, st.MasterFontCount(i));
169  }
170  EXPECT_EQ(0, st.MasterDestinationIndex(1));
171  EXPECT_EQ(0, st.MasterDestinationIndex(2));
172  ShapeTable st2;
173  st2.AppendMasterShapes(st, nullptr);
174  EXPECT_EQ(1, st.NumMasterShapes());
175  EXPECT_EQ(1, st2.NumShapes());
176  EXPECT_TRUE(st2.MutableShape(0)->IsEqualUnichars(&shape1));
177  EXPECT_TRUE(st2.AnyMultipleUnichars());
178 #endif
179 }
180 
181 } // namespace tesseract
TEST_F(EuroText, FastLatinOCR)
bool Open(const char *filename, FileReader reader)
Definition: serialis.cpp:140
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:654
bool IsSubsetOf(const Shape &other) const
Definition: shapetable.cpp:206
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
Definition: shapetable.cpp:133
void AddToShape(int unichar_id, int font_id)
Definition: shapetable.cpp:103
int size() const
Definition: shapetable.h:169
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:86
void AddShape(const Shape &other)
Definition: shapetable.cpp:123
bool IsEqualUnichars(Shape *other)
Definition: shapetable.cpp:222
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:92
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:150
bool AnyMultipleUnichars() const
Definition: shapetable.cpp:458
int MergedUnicharCount(unsigned shape_id1, unsigned shape_id2) const
Definition: shapetable.cpp:520
unsigned AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:351
int NumMasterShapes() const
Definition: shapetable.cpp:699
unsigned MasterDestinationIndex(unsigned shape_id) const
Definition: shapetable.cpp:548
int MasterFontCount(unsigned shape_id) const
Definition: shapetable.cpp:509
unsigned NumShapes() const
Definition: shapetable.h:248
void DeleteShape(unsigned shape_id)
Definition: shapetable.cpp:376
Shape * MutableShape(unsigned shape_id)
Definition: shapetable.h:295
void MergeShapes(unsigned shape_id1, unsigned shape_id2)
Definition: shapetable.cpp:530
unsigned MasterUnicharCount(unsigned shape_id) const
Definition: shapetable.cpp:503
void AppendMasterShapes(const ShapeTable &other, std::vector< int > *shape_map)
Definition: shapetable.cpp:683
static void MakeTmpdir()
Definition: include_gunit.h:38
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65
void SetUp() override