tesseract  5.0.0
pango_font_info.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pango_font_info.h
3  * Description: Font-related objects and helper functions
4  * Author: Ranjith Unnikrishnan
5  * Created: Mon Nov 18 2013
6  *
7  * (C) Copyright 2013, Google Inc.
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  * http://www.apache.org/licenses/LICENSE-2.0
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
21 #define TESSERACT_TRAINING_PANGO_FONT_INFO_H_
22 
23 #include "export.h"
24 
25 #include "commandlineflags.h"
26 
27 #include "pango/pango-font.h"
28 #include "pango/pango.h"
29 #include "pango/pangocairo.h"
30 
31 #include <string>
32 #include <unordered_map>
33 #include <utility>
34 #include <vector>
35 
36 using char32 = signed int;
37 
38 namespace tesseract {
39 
40 // Data holder class for a font, intended to avoid having to work with Pango or
41 // FontConfig-specific objects directly.
42 class TESS_PANGO_TRAINING_API PangoFontInfo {
43 public:
44  enum FontTypeEnum {
49  };
50  PangoFontInfo();
51  ~PangoFontInfo();
52  // Initialize from parsing a font description name, defined as a string of the
53  // format:
54  // "FamilyName [FaceName] [PointSize]"
55  // where a missing FaceName implies the default regular face.
56  // eg. "Arial Italic 12", "Verdana"
57  //
58  // FaceName is a combination of:
59  // [StyleName] [Variant] [Weight] [Stretch]
60  // with (all optional) Pango-defined values of:
61  // StyleName: Oblique, Italic
62  // Variant : Small-Caps
63  // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
64  // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
65  // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
66  explicit PangoFontInfo(const std::string &name);
67  bool ParseFontDescriptionName(const std::string &name);
68 
69  // Returns true if the font have codepoint coverage for the specified text.
70  bool CoversUTF8Text(const char *utf8_text, int byte_length) const;
71  // Modifies string to remove unicode points that are not covered by the
72  // font. Returns the number of characters dropped.
73  int DropUncoveredChars(std::string *utf8_text) const;
74 
75  // Returns true if the entire string can be rendered by the font with full
76  // character coverage and no unknown glyph or dotted-circle glyph
77  // substitutions on encountering a badly formed unicode sequence.
78  // If true, returns individual graphemes. Any whitespace characters in the
79  // original string are also included in the list.
80  bool CanRenderString(const char *utf8_word, int len, std::vector<std::string> *graphemes) const;
81  bool CanRenderString(const char *utf8_word, int len) const;
82 
83  // Retrieves the x_bearing and x_advance for the given utf8 character in the
84  // font. Returns false if the glyph for the character could not be found in
85  // the font.
86  // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
87  bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const;
88 
89  // If not already initialized, initializes FontConfig by setting its
90  // environment variable and creating a fonts.conf file that points to the
91  // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
92  static void SoftInitFontConfig();
93  // Re-initializes font config, whether or not already initialized.
94  // If already initialized, any existing cache is deleted, just to be sure.
95  static void HardInitFontConfig(const char *fonts_dir, const char *cache_dir);
96 
97  // Accessors
98  std::string DescriptionName() const;
99  // Font Family name eg. "Arial"
100  const std::string &family_name() const {
101  return family_name_;
102  }
103  // Size in points (1/72"), rounded to the nearest integer.
104  int font_size() const {
105  return font_size_;
106  }
108  return font_type_;
109  }
110 
111  int resolution() const {
112  return resolution_;
113  }
114  void set_resolution(const int resolution) {
115  resolution_ = resolution;
116  }
117 
118 private:
119  friend class FontUtils;
120  void Clear();
121  bool ParseFontDescription(const PangoFontDescription *desc);
122  // Returns the PangoFont structure corresponding to the closest available font
123  // in the font map.
124  PangoFont *ToPangoFont() const;
125 
126  // Font properties set automatically from parsing the font description name.
127  std::string family_name_;
128  int font_size_;
129  FontTypeEnum font_type_;
130  // The Pango description that was used to initialize the instance.
131  PangoFontDescription *desc_;
132  // Default output resolution to assume for GetSpacingProperties() and any
133  // other methods that returns pixel values.
134  int resolution_;
135  // Fontconfig operates through an environment variable, so it intrinsically
136  // cannot be thread-friendly, but you can serialize multiple independent
137  // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
138  // These hold the last initialized values set by HardInitFontConfig or
139  // the first call to SoftInitFontConfig.
140  // Directory to be scanned for font files.
141  static std::string fonts_dir_;
142  // Directory to store the cache of font information. (Can be the same as
143  // fonts_dir_)
144  static std::string cache_dir_;
145 
146 private:
147  PangoFontInfo(const PangoFontInfo &) = delete;
148  void operator=(const PangoFontInfo &) = delete;
149 };
150 
151 // Static utility methods for querying font availability and font-selection
152 // based on codepoint coverage.
153 class TESS_PANGO_TRAINING_API FontUtils {
154 public:
155  // Returns true if the font of the given description name is available in the
156  // target directory specified by --fonts_dir
157  static bool IsAvailableFont(const char *font_desc) {
158  return IsAvailableFont(font_desc, nullptr);
159  }
160  // Returns true if the font of the given description name is available in the
161  // target directory specified by --fonts_dir. If false is returned, and
162  // best_match is not nullptr, the closest matching font is returned there.
163  static bool IsAvailableFont(const char *font_desc, std::string *best_match);
164  // Outputs description names of available fonts.
165  static const std::vector<std::string> &ListAvailableFonts();
166 
167  // Picks font among available fonts that covers and can render the given word,
168  // and returns the font description name and the decomposition of the word to
169  // graphemes. Returns false if no suitable font was found.
170  static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name,
171  std::vector<std::string> *graphemes);
172 
173  // Picks font among all_fonts that covers and can render the given word,
174  // and returns the font description name and the decomposition of the word to
175  // graphemes. Returns false if no suitable font was found.
176  static bool SelectFont(const char *utf8_word, const int utf8_len,
177  const std::vector<std::string> &all_fonts, std::string *font_name,
178  std::vector<std::string> *graphemes);
179 
180  // NOTE: The following utilities were written to be backward compatible with
181  // StringRender.
182 
183  // BestFonts returns a font name and a bit vector of the characters it
184  // can render for the fonts that score within some fraction of the best
185  // font on the characters in the given hash map.
186  // In the flags vector, each flag is set according to whether the
187  // corresponding character (in order of iterating ch_map) can be rendered.
188  // The return string is a list of the acceptable fonts that were used.
189  static std::string BestFonts(const std::unordered_map<char32, int64_t> &ch_map,
190  std::vector<std::pair<const char *, std::vector<bool>>> *font_flag);
191 
192  // FontScore returns the weighted renderability score of the given
193  // hash map character table in the given font. The unweighted score
194  // is also returned in raw_score.
195  // The values in the bool vector ch_flags correspond to whether the
196  // corresponding character (in order of iterating ch_map) can be rendered.
197  static int FontScore(const std::unordered_map<char32, int64_t> &ch_map,
198  const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags);
199 
200  // PangoFontInfo is reinitialized, so clear the static list of fonts.
201  static void ReInit();
202  static void PangoFontTypeInfo();
203 
204 private:
205  static std::vector<std::string> available_fonts_; // cache list
206 };
207 } // namespace tesseract
208 
209 #endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_
signed int char32
void set_resolution(const int resolution)
FontTypeEnum font_type() const
const std::string & family_name() const
static bool IsAvailableFont(const char *font_desc)