tesseract  5.0.0
renderer.cpp
Go to the documentation of this file.
1 // File: renderer.cpp
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifdef HAVE_CONFIG_H
19 # include "config_auto.h"
20 #endif
21 #include <tesseract/baseapi.h>
22 #include <tesseract/renderer.h>
23 #include <cstring>
24 #include <memory> // std::unique_ptr
25 #include <string> // std::string
26 #include "serialis.h" // Serialize
27 
28 namespace tesseract {
29 
30 /**********************************************************************
31  * Base Renderer interface implementation
32  **********************************************************************/
33 TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension)
34  : next_(nullptr)
35  , fout_(stdout)
36  , file_extension_(extension)
37  , title_("")
38  , imagenum_(-1)
39  , happy_(true) {
40  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
41  std::string outfile = std::string(outputbase) + "." + extension;
42  fout_ = fopen(outfile.c_str(), "wb");
43  if (fout_ == nullptr) {
44  happy_ = false;
45  }
46  }
47 }
48 
50  if (fout_ != nullptr) {
51  if (fout_ != stdout) {
52  fclose(fout_);
53  } else {
54  clearerr(fout_);
55  }
56  }
57  delete next_;
58 }
59 
61  if (next == nullptr) {
62  return;
63  }
64 
65  TessResultRenderer *remainder = next_;
66  next_ = next;
67  if (remainder) {
68  while (next->next_ != nullptr) {
69  next = next->next_;
70  }
71  next->next_ = remainder;
72  }
73 }
74 
75 bool TessResultRenderer::BeginDocument(const char *title) {
76  if (!happy_) {
77  return false;
78  }
79  title_ = title;
80  imagenum_ = -1;
81  bool ok = BeginDocumentHandler();
82  if (next_) {
83  ok = next_->BeginDocument(title) && ok;
84  }
85  return ok;
86 }
87 
89  if (!happy_) {
90  return false;
91  }
92  ++imagenum_;
93  bool ok = AddImageHandler(api);
94  if (next_) {
95  ok = next_->AddImage(api) && ok;
96  }
97  return ok;
98 }
99 
101  if (!happy_) {
102  return false;
103  }
104  bool ok = EndDocumentHandler();
105  if (next_) {
106  ok = next_->EndDocument() && ok;
107  }
108  return ok;
109 }
110 
111 void TessResultRenderer::AppendString(const char *s) {
112  AppendData(s, strlen(s));
113 }
114 
115 void TessResultRenderer::AppendData(const char *s, int len) {
116  if (!tesseract::Serialize(fout_, s, len)) {
117  happy_ = false;
118  }
119  fflush(fout_);
120 }
121 
123  return happy_;
124 }
125 
127  return happy_;
128 }
129 
130 /**********************************************************************
131  * UTF8 Text Renderer interface implementation
132  **********************************************************************/
133 TessTextRenderer::TessTextRenderer(const char *outputbase)
134  : TessResultRenderer(outputbase, "txt") {}
135 
137  const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
138  if (utf8 == nullptr) {
139  return false;
140  }
141 
142  const char *pageSeparator = api->GetStringVariable("page_separator");
143  if (pageSeparator != nullptr && *pageSeparator != '\0' && imagenum() > 0) {
144  AppendString(pageSeparator);
145  }
146 
147  AppendString(utf8.get());
148 
149  return true;
150 }
151 
152 /**********************************************************************
153  * TSV Text Renderer interface implementation
154  **********************************************************************/
155 TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") {
156  font_info_ = false;
157 }
158 
159 TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info)
160  : TessResultRenderer(outputbase, "tsv") {
161  font_info_ = font_info;
162 }
163 
165  // Output TSV column headings
166  AppendString(
167  "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
168  "num\tleft\ttop\twidth\theight\tconf\ttext\n");
169  return true;
170 }
171 
173  return true;
174 }
175 
177  const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
178  if (tsv == nullptr) {
179  return false;
180  }
181 
182  AppendString(tsv.get());
183 
184  return true;
185 }
186 
187 /**********************************************************************
188  * UNLV Text Renderer interface implementation
189  **********************************************************************/
190 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
191  : TessResultRenderer(outputbase, "unlv") {}
192 
194  const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
195  if (unlv == nullptr) {
196  return false;
197  }
198 
199  AppendString(unlv.get());
200 
201  return true;
202 }
203 
204 /**********************************************************************
205  * BoxText Renderer interface implementation
206  **********************************************************************/
208  : TessResultRenderer(outputbase, "box") {}
209 
211  const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
212  if (text == nullptr) {
213  return false;
214  }
215 
216  AppendString(text.get());
217 
218  return true;
219 }
220 
221 #ifndef DISABLED_LEGACY_ENGINE
222 
223 /**********************************************************************
224  * Osd Text Renderer interface implementation
225  **********************************************************************/
226 TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {}
227 
229  const std::unique_ptr<const char[]> osd(api->GetOsdText(imagenum()));
230  if (osd == nullptr) {
231  return false;
232  }
233 
234  AppendString(osd.get());
235 
236  return true;
237 }
238 
239 #endif // ndef DISABLED_LEGACY_ENGINE
240 
241 } // namespace tesseract
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:251
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1372
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1704
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1512
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:311
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:122
virtual bool AddImageHandler(TessBaseAPI *api)=0
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:88
TessResultRenderer * next()
Definition: renderer.h:58
const char * title() const
Definition: renderer.h:88
bool BeginDocument(const char *title)
Definition: renderer.cpp:75
void AppendString(const char *s)
Definition: renderer.cpp:111
virtual bool EndDocumentHandler()
Definition: renderer.cpp:126
void AppendData(const char *s, int len)
Definition: renderer.cpp:115
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:33
void insert(TessResultRenderer *next)
Definition: renderer.cpp:60
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:136
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:133
TessTsvRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:159
bool EndDocumentHandler() override
Definition: renderer.cpp:172
bool BeginDocumentHandler() override
Definition: renderer.cpp:164
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:176
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:190
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:193
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:210
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:207
TessOsdRenderer(const char *outputbase)
Definition: renderer.cpp:226
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:228