tesseract  5.0.0
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H_
19 #define TESSERACT_API_RENDERER_H_
20 
21 #include "export.h"
22 
23 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
24 // complexity of includes here. Use forward declarations wherever possible
25 // and hide includes of complex types in baseapi.cpp.
26 #include <string> // for std::string
27 #include <vector> // for std::vector
28 
29 struct Pix;
30 
31 namespace tesseract {
32 
33 class TessBaseAPI;
34 
49 public:
50  virtual ~TessResultRenderer();
51 
52  // Takes ownership of pointer so must be new'd instance.
53  // Renderers aren't ordered, but appends the sequences of next parameter
54  // and existing next(). The renderers should be unique across both lists.
55  void insert(TessResultRenderer *next);
56 
57  // Returns the next renderer or nullptr.
59  return next_;
60  }
61 
67  bool BeginDocument(const char *title);
68 
77  bool AddImage(TessBaseAPI *api);
78 
83  bool EndDocument();
84 
85  const char *file_extension() const {
86  return file_extension_;
87  }
88  const char *title() const {
89  return title_.c_str();
90  }
91 
92  // Is everything fine? Otherwise something went wrong.
93  bool happy() const {
94  return happy_;
95  }
96 
106  int imagenum() const {
107  return imagenum_;
108  }
109 
110 protected:
121  TessResultRenderer(const char *outputbase, const char *extension);
122 
123  // Hook for specialized handling in BeginDocument()
124  virtual bool BeginDocumentHandler();
125 
126  // This must be overridden to render the OCR'd results
127  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
128 
129  // Hook for specialized handling in EndDocument()
130  virtual bool EndDocumentHandler();
131 
132  // Renderers can call this to append '\0' terminated strings into
133  // the output string returned by GetOutput.
134  // This method will grow the output buffer if needed.
135  void AppendString(const char *s);
136 
137  // Renderers can call this to append binary byte sequences into
138  // the output string returned by GetOutput. Note that s is not necessarily
139  // '\0' terminated (and can contain '\0' within it).
140  // This method will grow the output buffer if needed.
141  void AppendData(const char *s, int len);
142 
143 private:
144  TessResultRenderer *next_; // Can link multiple renderers together
145  FILE *fout_; // output file pointer
146  const char *file_extension_; // standard extension for generated output
147  std::string title_; // title of document being rendered
148  int imagenum_; // index of last image added
149  bool happy_; // I get grumpy when the disk fills up, etc.
150 };
151 
156 public:
157  explicit TessTextRenderer(const char *outputbase);
158 
159 protected:
160  bool AddImageHandler(TessBaseAPI *api) override;
161 };
162 
167 public:
168  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
169  explicit TessHOcrRenderer(const char *outputbase);
170 
171 protected:
172  bool BeginDocumentHandler() override;
173  bool AddImageHandler(TessBaseAPI *api) override;
174  bool EndDocumentHandler() override;
175 
176 private:
177  bool font_info_; // whether to print font information
178 };
179 
184 public:
185  explicit TessAltoRenderer(const char *outputbase);
186 
187 protected:
188  bool BeginDocumentHandler() override;
189  bool AddImageHandler(TessBaseAPI *api) override;
190  bool EndDocumentHandler() override;
191 
192 private:
193  bool begin_document;
194 };
195 
200 public:
201  explicit TessTsvRenderer(const char *outputbase, bool font_info);
202  explicit TessTsvRenderer(const char *outputbase);
203 
204 protected:
205  bool BeginDocumentHandler() override;
206  bool AddImageHandler(TessBaseAPI *api) override;
207  bool EndDocumentHandler() override;
208 
209 private:
210  bool font_info_; // whether to print font information
211 };
212 
217 public:
218  // datadir is the location of the TESSDATA. We need it because
219  // we load a custom PDF font from this location.
220  TessPDFRenderer(const char *outputbase, const char *datadir,
221  bool textonly = false);
222 
223 protected:
224  bool BeginDocumentHandler() override;
225  bool AddImageHandler(TessBaseAPI *api) override;
226  bool EndDocumentHandler() override;
227 
228 private:
229  // We don't want to have every image in memory at once,
230  // so we store some metadata as we go along producing
231  // PDFs one page at a time. At the end, that metadata is
232  // used to make everything that isn't easily handled in a
233  // streaming fashion.
234  long int obj_; // counter for PDF objects
235  std::vector<long int> offsets_; // offset of every PDF object in bytes
236  std::vector<long int> pages_; // object number for every /Page object
237  std::string datadir_; // where to find the custom font
238  bool textonly_; // skip images if set
239  // Bookkeeping only. DIY = Do It Yourself.
240  void AppendPDFObjectDIY(size_t objectsize);
241  // Bookkeeping + emit data.
242  void AppendPDFObject(const char *data);
243  // Create the /Contents object for an entire page.
244  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
245  // Turn an image into a PDF object. Only transcode if we have to.
246  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
247  char **pdf_object, long int *pdf_object_size,
248  int jpg_quality);
249 };
250 
255 public:
256  explicit TessUnlvRenderer(const char *outputbase);
257 
258 protected:
259  bool AddImageHandler(TessBaseAPI *api) override;
260 };
261 
266 public:
267  explicit TessLSTMBoxRenderer(const char *outputbase);
268 
269 protected:
270  bool AddImageHandler(TessBaseAPI *api) override;
271 };
272 
277 public:
278  explicit TessBoxTextRenderer(const char *outputbase);
279 
280 protected:
281  bool AddImageHandler(TessBaseAPI *api) override;
282 };
283 
288 public:
289  explicit TessWordStrBoxRenderer(const char *outputbase);
290 
291 protected:
292  bool AddImageHandler(TessBaseAPI *api) override;
293 };
294 
295 #ifndef DISABLED_LEGACY_ENGINE
296 
301 public:
302  explicit TessOsdRenderer(const char *outputbase);
303 
304 protected:
305  bool AddImageHandler(TessBaseAPI *api) override;
306 };
307 
308 #endif // ndef DISABLED_LEGACY_ENGINE
309 
310 } // namespace tesseract.
311 
312 #endif // TESSERACT_API_RENDERER_H_
struct TessBaseAPI TessBaseAPI
Definition: capi.h:62
struct TessResultRenderer TessResultRenderer
Definition: capi.h:61
virtual bool AddImageHandler(TessBaseAPI *api)=0
TessResultRenderer * next()
Definition: renderer.h:58
const char * title() const
Definition: renderer.h:88
const char * file_extension() const
Definition: renderer.h:85
#define TESS_API
Definition: export.h:34