tesseract  5.0.0
resultiterator.h
Go to the documentation of this file.
1 // File: resultiterator.h
3 // Description: Iterator for tesseract results that is capable of
4 // iterating in proper reading order over Bi Directional
5 // (e.g. mixed Hebrew and English) text.
6 // Author: David Eger
7 //
8 // (C) Copyright 2011, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
22 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
23 
24 #include "export.h" // for TESS_API, TESS_LOCAL
25 #include "ltrresultiterator.h" // for LTRResultIterator
26 #include "publictypes.h" // for PageIteratorLevel
27 #include "unichar.h" // for StrongScriptDirection
28 
29 #include <set> // for std::pair
30 #include <vector> // for std::vector
31 
32 namespace tesseract {
33 
35 public:
36  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
37 
42  ~ResultIterator() override = default;
43 
44  // ============= Moving around within the page ============.
49  void Begin() override;
50 
63  bool Next(PageIteratorLevel level) override;
64 
71  bool IsAtBeginningOf(PageIteratorLevel level) const override;
72 
78  bool IsAtFinalElement(PageIteratorLevel level,
79  PageIteratorLevel element) const override;
80 
81  // ============= Functions that refer to words only ============.
82  // Returns the number of blanks before the current word.
83  int BlanksBeforeWord() const;
84 
85  // ============= Accessing data ==============.
86 
91  virtual char *GetUTF8Text(PageIteratorLevel level) const;
92 
96  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
97  *GetRawLSTMTimesteps() const;
98  virtual std::vector<std::vector<std::pair<const char *, float>>>
99  *GetBestLSTMSymbolChoices() const;
100 
105  bool ParagraphIsLtr() const;
106 
107  // ============= Exposed only for testing =============.
108 
131  static void CalculateTextlineOrder(
132  bool paragraph_is_ltr,
133  const std::vector<StrongScriptDirection> &word_dirs,
134  std::vector<int> *reading_order);
135 
136  static const int kMinorRunStart;
137  static const int kMinorRunEnd;
138  static const int kComplexWord;
139 
140 protected:
147  explicit ResultIterator(const LTRResultIterator &resit);
148 
149 private:
154  bool CurrentParagraphIsLtr() const;
155 
167  void CalculateTextlineOrder(bool paragraph_is_ltr,
168  const LTRResultIterator &resit,
169  std::vector<int> *indices) const;
171  void CalculateTextlineOrder(bool paragraph_is_ltr,
172  const LTRResultIterator &resit,
173  std::vector<StrongScriptDirection> *ssd,
174  std::vector<int> *indices) const;
175 
180  int LTRWordIndex() const;
181 
186  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
187 
189  void MoveToLogicalStartOfTextline();
190 
195  void MoveToLogicalStartOfWord();
196 
198  bool IsAtFinalSymbolOfWord() const;
199 
201  bool IsAtFirstSymbolOfWord() const;
202 
207  void AppendSuffixMarks(std::string *text) const;
208 
210  void AppendUTF8WordText(std::string *text) const;
211 
219  void IterateAndAppendUTF8TextlineText(std::string *text);
220 
227  void AppendUTF8ParagraphText(std::string *text) const;
228 
230  bool BidiDebug(int min_level) const;
231 
232  bool current_paragraph_is_ltr_;
233 
238  bool at_beginning_of_minor_run_;
239 
241  bool in_minor_direction_;
242 
247  bool preserve_interword_spaces_;
248 };
249 
250 } // namespace tesseract.
251 
252 #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
static const int kMinorRunEnd
static const int kMinorRunStart
~ResultIterator() override=default
static const int kComplexWord
#define TESS_API
Definition: export.h:34