tesseract  5.0.0
tesseract::TextlineProjectionTest Class Reference
Inheritance diagram for tesseract::TextlineProjectionTest:

Protected Member Functions

std::string OutputNameToPath (const std::string &name)
 
 TextlineProjectionTest ()
 
 ~TextlineProjectionTest () override
 
void SetImage (const char *filename)
 
void SetupProjection ()
 
void EvaluateBox (const TBOX &box, bool greater_or_equal, int target_value, const char *text, const char *message)
 
void EvaluateDistance (const TBOX &box, const TBOX &true_box, const TBOX &false_box, const char *text, const char *message)
 
void VerifyBoxes (const char *imagefile, int line_height)
 

Protected Attributes

Image src_pix_
 
Image bin_pix_
 
BLOCK_LIST blocks_
 
std::string ocr_text_
 
tesseract::TessBaseAPI api_
 
std::unique_ptr< Tesseracttesseract_
 
ColumnFinderfinder_
 
const DENORMdenorm_
 
const TextlineProjectionprojection_
 

Detailed Description

Definition at line 33 of file textlineprojection_test.cc.

Constructor & Destructor Documentation

◆ TextlineProjectionTest()

tesseract::TextlineProjectionTest::TextlineProjectionTest ( )
inlineprotected

Definition at line 40 of file textlineprojection_test.cc.

40  {
41  src_pix_ = nullptr;
42  bin_pix_ = nullptr;
43  finder_ = nullptr;
44  denorm_ = nullptr;
45  projection_ = nullptr;
46  }

◆ ~TextlineProjectionTest()

tesseract::TextlineProjectionTest::~TextlineProjectionTest ( )
inlineoverrideprotected

Definition at line 47 of file textlineprojection_test.cc.

47  {
48  src_pix_.destroy();
49  bin_pix_.destroy();
50  delete finder_;
51  }
void destroy()
Definition: image.cpp:32

Member Function Documentation

◆ EvaluateBox()

void tesseract::TextlineProjectionTest::EvaluateBox ( const TBOX box,
bool  greater_or_equal,
int  target_value,
const char *  text,
const char *  message 
)
inlineprotected

Definition at line 112 of file textlineprojection_test.cc.

113  {
114  int value = projection_->EvaluateBox(box, denorm_, false);
115  if (greater_or_equal != (value > target_value)) {
116  LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
117  << ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
118  box.print();
119  value = projection_->EvaluateBox(box, denorm_, true);
120  } else {
121  LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
122  }
123  if (greater_or_equal) {
124  EXPECT_GE(value, target_value);
125  } else {
126  EXPECT_LT(value, target_value);
127  }
128  }
@ LOG
@ INFO
Definition: log.h:28
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const

◆ EvaluateDistance()

void tesseract::TextlineProjectionTest::EvaluateDistance ( const TBOX box,
const TBOX true_box,
const TBOX false_box,
const char *  text,
const char *  message 
)
inlineprotected

Definition at line 132 of file textlineprojection_test.cc.

133  {
134  int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
135  int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
136  if (false_dist <= true_dist) {
137  LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
138  << " for " << message << " word '" << text << "' at:";
139  true_box.print();
140  projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
141  projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
142  } else {
143  LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
144  << ") for " << message << " word '" << text << "'";
145  }
146  }
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const

◆ OutputNameToPath()

std::string tesseract::TextlineProjectionTest::OutputNameToPath ( const std::string &  name)
inlineprotected

Definition at line 35 of file textlineprojection_test.cc.

35  {
37  return file::JoinPath(FLAGS_test_tmpdir, name);
38  }
static void MakeTmpdir()
Definition: include_gunit.h:38
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65

◆ SetImage()

void tesseract::TextlineProjectionTest::SetImage ( const char *  filename)
inlineprotected

Definition at line 53 of file textlineprojection_test.cc.

53  {
54  src_pix_.destroy();
55  src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
56  api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
59  }
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:266
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:508
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:365
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:573

◆ SetupProjection()

void tesseract::TextlineProjectionTest::SetupProjection ( )
inlineprotected

Definition at line 68 of file textlineprojection_test.cc.

68  {
70  auto osd_tess = std::make_unique<Tesseract>();
71  OSResults osr;
72  EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, "", "osd", tesseract::OEM_TESSERACT_ONLY,
73  nullptr, 0, nullptr, nullptr, false, &mgr),
74  0);
75  tesseract_ = std::make_unique<Tesseract>();
76  EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, "", "eng", tesseract::OEM_TESSERACT_ONLY,
77  nullptr, 0, nullptr, nullptr, false, &mgr),
78  0);
80  *tesseract_->mutable_pix_binary() = bin_pix_.clone();
81  osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
82  tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
83  int width = pixGetWidth(bin_pix_);
84  int height = pixGetHeight(bin_pix_);
85  // First make a single block covering the whole image.
86  auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
87  block->set_right_to_left(false);
88  BLOCK_LIST src_blocks;
89  BLOCK_IT block_it(&src_blocks);
90  block_it.add_to_end(block);
91  Image photomask_pix = nullptr;
92  // The blocks made by the ColumnFinder. Moved to blocks before return.
93  BLOCK_LIST found_blocks;
94  TO_BLOCK_LIST temp_blocks;
95  finder_ =
96  tesseract_->SetupPageSegAndDetectOrientation(tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess.get(),
97  &osr, &temp_blocks, &photomask_pix, nullptr);
98  TO_BLOCK_IT to_block_it(&temp_blocks);
99  TO_BLOCK *to_block = to_block_it.data();
100  denorm_ = finder_->denorm();
101  TO_BLOCK_LIST to_blocks;
102  BLOBNBOX_LIST diacritic_blobs;
103  EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, photomask_pix, nullptr,
104  nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks),
105  0);
107  photomask_pix.destroy();
108  }
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:164
Tesseract * tesseract() const
Definition: baseapi.h:713
Pix * GetThresholdedImage()
Definition: baseapi.cpp:628
int source_resolution() const
Image clone() const
Definition: image.cpp:24
const TextlineProjection * projection() const
Definition: colfind.h:70
const DENORM * denorm() const
Definition: colfind.h:67
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
Definition: colfind.cpp:286
std::unique_ptr< Tesseract > tesseract_

◆ VerifyBoxes()

void tesseract::TextlineProjectionTest::VerifyBoxes ( const char *  imagefile,
int  line_height 
)
inlineprotected

Definition at line 150 of file textlineprojection_test.cc.

150  {
151  SetImage(imagefile);
152  api_.Recognize(nullptr);
153  SetupProjection();
154  MutableIterator *it = api_.GetMutableIterator();
155  do {
156  char *text = it->GetUTF8Text(tesseract::RIL_WORD);
157  const PAGE_RES_IT *pr_it = it->PageResIt();
158  WERD_RES *word = pr_it->word();
159  // The word_box refers to the internal, possibly rotated, coords.
160  TBOX word_box = word->word->bounding_box();
161  bool small_word = word_box.height() * 1.5 < line_height;
162  bool tall_word = word_box.height() * 1.125 > line_height;
163  // We pad small and tall words differently because ascenders and
164  // descenders affect the position and size of the upper/lower boxes.
165  int padding;
166  if (small_word) {
167  padding = word_box.height();
168  } else if (tall_word) {
169  padding = word_box.height() / 3;
170  } else {
171  padding = word_box.height() / 2;
172  }
173  // Test that the word box gets a good score.
174  EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");
175 
176  // Now test a displaced box, both above and below the word.
177  TBOX upper_box(word_box);
178  upper_box.set_bottom(word_box.top());
179  upper_box.set_top(word_box.top() + padding);
180  EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
181  EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
182  TBOX lower_box = word_box;
183  lower_box.set_top(word_box.bottom());
184  lower_box.set_bottom(word_box.bottom() - padding);
185  if (tall_word) {
186  lower_box.move(ICOORD(0, padding / 2));
187  }
188  EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
189  EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
190 
191  // Since some words have no text below and some words have no text above
192  // check that at least one of the boxes satisfies BoxOutOfTextline.
193  bool upper_or_lower_out_of_textline =
194  projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
195  projection_->BoxOutOfHTextline(lower_box, denorm_, false);
196  if (!upper_or_lower_out_of_textline) {
197  projection_->BoxOutOfHTextline(upper_box, denorm_, true);
198  projection_->BoxOutOfHTextline(lower_box, denorm_, true);
199  }
200  EXPECT_TRUE(upper_or_lower_out_of_textline);
201 
202  // Now test DistanceOfBoxFromBox by faking a challenger word, and asking
203  // that each pad box be nearer to its true textline than the
204  // challenger. Due to the tight spacing of latin text, getting
205  // the right position and size of these test boxes is quite fiddly.
206  padding = line_height / 4;
207  upper_box.set_top(upper_box.bottom() + padding);
208  TBOX target_box(word_box);
209  if (!small_word) {
210  upper_box.move(ICOORD(0, -padding * 3 / 2));
211  }
212  target_box.set_top(upper_box.bottom());
213  TBOX upper_challenger(upper_box);
214  upper_challenger.set_bottom(upper_box.top());
215  upper_challenger.set_top(upper_box.top() + word_box.height());
216  EvaluateDistance(upper_box, target_box, upper_challenger, text, "Upper Word");
217  if (tall_word) {
218  lower_box.move(ICOORD(0, padding / 2));
219  }
220  lower_box.set_bottom(lower_box.top() - padding);
221  target_box = word_box;
222  target_box.set_bottom(lower_box.top());
223  TBOX lower_challenger(lower_box);
224  lower_challenger.set_top(lower_box.bottom());
225  lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
226  EvaluateDistance(lower_box, target_box, lower_challenger, text, "Lower Word");
227 
228  delete[] text;
229  } while (it->Next(tesseract::RIL_WORD));
230  delete it;
231  }
@ TBOX
const int kMinStrongTextValue
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:831
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1330
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box, const char *text, const char *message)
void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text, const char *message)

Member Data Documentation

◆ api_

tesseract::TessBaseAPI tesseract::TextlineProjectionTest::api_
protected

Definition at line 237 of file textlineprojection_test.cc.

◆ bin_pix_

Image tesseract::TextlineProjectionTest::bin_pix_
protected

Definition at line 234 of file textlineprojection_test.cc.

◆ blocks_

BLOCK_LIST tesseract::TextlineProjectionTest::blocks_
protected

Definition at line 235 of file textlineprojection_test.cc.

◆ denorm_

const DENORM* tesseract::TextlineProjectionTest::denorm_
protected

Definition at line 240 of file textlineprojection_test.cc.

◆ finder_

ColumnFinder* tesseract::TextlineProjectionTest::finder_
protected

Definition at line 239 of file textlineprojection_test.cc.

◆ ocr_text_

std::string tesseract::TextlineProjectionTest::ocr_text_
protected

Definition at line 236 of file textlineprojection_test.cc.

◆ projection_

const TextlineProjection* tesseract::TextlineProjectionTest::projection_
protected

Definition at line 241 of file textlineprojection_test.cc.

◆ src_pix_

Image tesseract::TextlineProjectionTest::src_pix_
protected

Definition at line 233 of file textlineprojection_test.cc.

◆ tesseract_

std::unique_ptr<Tesseract> tesseract::TextlineProjectionTest::tesseract_
protected

Definition at line 238 of file textlineprojection_test.cc.


The documentation for this class was generated from the following file: