tesseract  5.0.0
tesseract::FontUtils Class Reference

#include <pango_font_info.h>

Static Public Member Functions

static bool IsAvailableFont (const char *font_desc)
 
static bool IsAvailableFont (const char *font_desc, std::string *best_match)
 
static const std::vector< std::string > & ListAvailableFonts ()
 
static bool SelectFont (const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
 
static bool SelectFont (const char *utf8_word, const int utf8_len, const std::vector< std::string > &all_fonts, std::string *font_name, std::vector< std::string > *graphemes)
 
static std::string BestFonts (const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool >>> *font_flag)
 
static int FontScore (const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
 
static void ReInit ()
 
static void PangoFontTypeInfo ()
 

Detailed Description

Definition at line 153 of file pango_font_info.h.

Member Function Documentation

◆ BestFonts()

std::string tesseract::FontUtils::BestFonts ( const std::unordered_map< char32, int64_t > &  ch_map,
std::vector< std::pair< const char *, std::vector< bool >>> *  font_flag 
)
static

Definition at line 612 of file pango_font_info.cpp.

613  {
614  const double kMinOKFraction = 0.99;
615  // Weighted fraction of characters that must be renderable in a font to make
616  // it OK even if the raw count is not good.
617  const double kMinWeightedFraction = 0.99995;
618 
619  fonts->clear();
620  std::vector<std::vector<bool>> font_flags;
621  std::vector<int> font_scores;
622  std::vector<int> raw_scores;
623  int most_ok_chars = 0;
624  int best_raw_score = 0;
625  const std::vector<std::string> &font_names = FontUtils::ListAvailableFonts();
626  for (const auto &font_name : font_names) {
627  std::vector<bool> ch_flags;
628  int raw_score = 0;
629  int ok_chars = FontScore(ch_map, font_name, &raw_score, &ch_flags);
630  most_ok_chars = std::max(ok_chars, most_ok_chars);
631  best_raw_score = std::max(raw_score, best_raw_score);
632 
633  font_flags.push_back(ch_flags);
634  font_scores.push_back(ok_chars);
635  raw_scores.push_back(raw_score);
636  }
637 
638  // Now select the fonts with a score above a threshold fraction
639  // of both the raw and weighted best scores. To prevent bogus fonts being
640  // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
641  // BOTH weighted and raw scores.
642  // In low character-count scripts, the issue is more getting enough fonts,
643  // when only 1 or 2 might have all those rare dingbats etc in them, so we
644  // allow a font with a very high weighted (coverage) score
645  // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
646  int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
647  int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
648  int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
649 
650  std::string font_list;
651  for (unsigned i = 0; i < font_names.size(); ++i) {
652  int score = font_scores[i];
653  int raw_score = raw_scores[i];
654  if ((score >= least_good_enough && raw_score >= least_raw_enough) || score >= override_enough) {
655  fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
656  tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(),
657  100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score);
658  font_list += font_names[i];
659  font_list += "\n";
660  } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
661  tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(),
662  100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score);
663  }
664  }
665  return font_list;
666 }
#define tlog(level,...)
Definition: tlog.h:36
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
static const std::vector< std::string > & ListAvailableFonts()

◆ FontScore()

int tesseract::FontUtils::FontScore ( const std::unordered_map< char32, int64_t > &  ch_map,
const std::string &  fontname,
int *  raw_score,
std::vector< bool > *  ch_flags 
)
static

Definition at line 577 of file pango_font_info.cpp.

578  {
579  PangoFontInfo font_info;
580  if (!font_info.ParseFontDescriptionName(fontname)) {
581  tprintf("ERROR: Could not parse %s\n", fontname.c_str());
582  }
583  PangoFont *font = font_info.ToPangoFont();
584  PangoCoverage *coverage = nullptr;
585  if (font != nullptr) {
586  coverage = pango_font_get_coverage(font, nullptr);
587  }
588  if (ch_flags) {
589  ch_flags->clear();
590  ch_flags->reserve(ch_map.size());
591  }
592  *raw_score = 0;
593  int ok_chars = 0;
594  for (auto it : ch_map) {
595  bool covered =
596  (coverage != nullptr) && (IsWhitespace(it.first) ||
597  (pango_coverage_get(coverage, it.first) == PANGO_COVERAGE_EXACT));
598  if (covered) {
599  ++(*raw_score);
600  ok_chars += it.second;
601  }
602  if (ch_flags) {
603  ch_flags->push_back(covered);
604  }
605  }
606  pango_coverage_unref(coverage);
607  g_object_unref(font);
608  return ok_chars;
609 }
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:228
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool ParseFontDescriptionName(const std::string &name)

◆ IsAvailableFont() [1/2]

static bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc)
inlinestatic

Definition at line 157 of file pango_font_info.h.

157  {
158  return IsAvailableFont(font_desc, nullptr);
159  }
static bool IsAvailableFont(const char *font_desc)

◆ IsAvailableFont() [2/2]

bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc,
std::string *  best_match 
)
static

Definition at line 480 of file pango_font_info.cpp.

480  {
481  std::string query_desc(input_query_desc);
482  PangoFontDescription *desc = pango_font_description_from_string(query_desc.c_str());
483  PangoFont *selected_font = nullptr;
484  {
486  PangoFontMap *font_map = pango_cairo_font_map_get_default();
487  PangoContext *context = pango_context_new();
488  pango_context_set_font_map(context, font_map);
489  {
491  selected_font = pango_font_map_load_font(font_map, context, desc);
492  }
493  g_object_unref(context);
494  }
495  if (selected_font == nullptr) {
496  pango_font_description_free(desc);
497  tlog(4, "** Font '%s' failed to load from font map!\n", input_query_desc);
498  return false;
499  }
500  PangoFontDescription *selected_desc = pango_font_describe(selected_font);
501 
502  bool equal = pango_font_description_equal(desc, selected_desc);
503  tlog(3, "query weight = %d \t selected weight =%d\n", pango_font_description_get_weight(desc),
504  pango_font_description_get_weight(selected_desc));
505 
506  char *selected_desc_str = pango_font_description_to_string(selected_desc);
507  tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(), selected_desc_str);
508  if (!equal && best_match != nullptr) {
509  *best_match = selected_desc_str;
510  // Clip the ending ' 0' if there is one. It seems that, if there is no
511  // point size on the end of the fontname, then Pango always appends ' 0'.
512  int len = best_match->size();
513  if (len > 2 && best_match->at(len - 1) == '0' && best_match->at(len - 2) == ' ') {
514  *best_match = best_match->substr(0, len - 2);
515  }
516  }
517  g_free(selected_desc_str);
518  pango_font_description_free(selected_desc);
519  g_object_unref(selected_font);
520  pango_font_description_free(desc);
521  if (!equal)
522  tlog(4, "** Font '%s' failed pango_font_description_equal!\n", input_query_desc);
523  return equal;
524 }
#define DISABLE_HEAP_LEAK_CHECK

◆ ListAvailableFonts()

const std::vector< std::string > & tesseract::FontUtils::ListAvailableFonts ( )
static

Definition at line 539 of file pango_font_info.cpp.

539  {
540  if (!available_fonts_.empty()) {
541  return available_fonts_;
542  }
543 
544  PangoFontFamily **families = nullptr;
545  int n_families = 0;
546  ListFontFamilies(&families, &n_families);
547  for (int i = 0; i < n_families; ++i) {
548  const char *family_name = pango_font_family_get_name(families[i]);
549  tlog(2, "Listing family %s\n", family_name);
550  if (ShouldIgnoreFontFamilyName(family_name)) {
551  continue;
552  }
553 
554  int n_faces;
555  PangoFontFace **faces = nullptr;
556  pango_font_family_list_faces(families[i], &faces, &n_faces);
557  for (int j = 0; j < n_faces; ++j) {
558  PangoFontDescription *desc = pango_font_face_describe(faces[j]);
559  char *desc_str = pango_font_description_to_string(desc);
560  // "synthesized" font faces that are not truly loadable, so we skip it
561  if (!pango_font_face_is_synthesized(faces[j]) && IsAvailableFont(desc_str)) {
562  available_fonts_.emplace_back(desc_str);
563  }
564  pango_font_description_free(desc);
565  g_free(desc_str);
566  }
567  g_free(faces);
568  }
569  g_free(families);
570  std::sort(available_fonts_.begin(), available_fonts_.end());
571  return available_fonts_;
572 }

◆ PangoFontTypeInfo()

void tesseract::FontUtils::PangoFontTypeInfo ( )
static

Definition at line 710 of file pango_font_info.cpp.

710  {
711  PangoFontMap *font_map = pango_cairo_font_map_get_default();
712  if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
713  CAIRO_FONT_TYPE_TOY) {
714  printf("Using CAIRO_FONT_TYPE_TOY.\n");
715  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
716  CAIRO_FONT_TYPE_FT) {
717  printf("Using CAIRO_FONT_TYPE_FT.\n");
718  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
719  CAIRO_FONT_TYPE_WIN32) {
720  printf("Using CAIRO_FONT_TYPE_WIN32.\n");
721  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
722  CAIRO_FONT_TYPE_QUARTZ) {
723  printf("Using CAIRO_FONT_TYPE_QUARTZ.\n");
724  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
725  CAIRO_FONT_TYPE_USER) {
726  printf("Using CAIRO_FONT_TYPE_USER.\n");
727  } else if (!font_map) {
728  printf("Can not create pango cairo font map!\n");
729  }
730 }

◆ ReInit()

void tesseract::FontUtils::ReInit ( )
static

Definition at line 704 of file pango_font_info.cpp.

704  {
705  available_fonts_.clear();
706 }

◆ SelectFont() [1/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
const std::vector< std::string > &  all_fonts,
std::string *  font_name,
std::vector< std::string > *  graphemes 
)
static

Definition at line 675 of file pango_font_info.cpp.

677  {
678  if (font_name) {
679  font_name->clear();
680  }
681  if (graphemes) {
682  graphemes->clear();
683  }
684  for (const auto &all_font : all_fonts) {
685  PangoFontInfo font;
686  std::vector<std::string> found_graphemes;
687  ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_font), "Could not parse font desc name %s\n",
688  all_font.c_str());
689  if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
690  if (graphemes) {
691  graphemes->swap(found_graphemes);
692  }
693  if (font_name) {
694  *font_name = all_font;
695  }
696  return true;
697  }
698  }
699  return false;
700 }
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:62
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const

◆ SelectFont() [2/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
std::string *  font_name,
std::vector< std::string > *  graphemes 
)
static

Definition at line 669 of file pango_font_info.cpp.

670  {
671  return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, graphemes);
672 }
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)

The documentation for this class was generated from the following files: