tesseract  5.0.0
tesseract::FPAnalyzer Class Reference

Public Member Functions

 FPAnalyzer (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
 ~FPAnalyzer ()=default
 
void Pass1Analyze ()
 
void EstimatePitch (bool pass1)
 
bool maybe_fixed_pitch ()
 
void MergeFragments ()
 
void FinalizeLargeChars ()
 
bool Pass2Analyze ()
 
void OutputEstimations ()
 
void DebugOutputResult ()
 
size_t num_rows ()
 
unsigned max_iteration ()
 

Detailed Description

Definition at line 951 of file cjkpitch.cpp.

Constructor & Destructor Documentation

◆ FPAnalyzer()

tesseract::FPAnalyzer::FPAnalyzer ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 1031 of file cjkpitch.cpp.

1032  : page_tr_(page_tr)
1033  , num_tall_rows_(0)
1034  , num_bad_rows_(0)
1035  , num_empty_rows_(0)
1036  , max_chars_per_row_(0) {
1037  TO_BLOCK_IT block_it(port_blocks);
1038 
1039  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1040  TO_BLOCK *block = block_it.data();
1041  if (!block->get_rows()->empty()) {
1042  ASSERT_HOST(block->xheight > 0);
1043  find_repeated_chars(block, false);
1044  }
1045  }
1046 
1047  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1048  TO_ROW_IT row_it = block_it.data()->get_rows();
1049  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1050  FPRow row;
1051  row.Init(row_it.data());
1052  rows_.push_back(row);
1053  size_t num_chars = rows_.back().num_chars();
1054  if (num_chars <= 1) {
1055  num_empty_rows_++;
1056  }
1057  if (num_chars > max_chars_per_row_) {
1058  max_chars_per_row_ = num_chars;
1059  }
1060  }
1061  }
1062 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1661

◆ ~FPAnalyzer()

tesseract::FPAnalyzer::~FPAnalyzer ( )
default

Member Function Documentation

◆ DebugOutputResult()

void tesseract::FPAnalyzer::DebugOutputResult ( )
inline

Definition at line 1003 of file cjkpitch.cpp.

1003  {
1004  tprintf("FPAnalyzer: final result\n");
1005  for (size_t i = 0; i < rows_.size(); i++) {
1006  rows_[i].DebugOutputResult(i);
1007  }
1008  }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ EstimatePitch()

void tesseract::FPAnalyzer::EstimatePitch ( bool  pass1)

Definition at line 1064 of file cjkpitch.cpp.

1064  {
1065  LocalCorrelation pitch_height_stats;
1066 
1067  num_tall_rows_ = 0;
1068  num_bad_rows_ = 0;
1069  pitch_height_stats.Clear();
1070  for (auto &row : rows_) {
1071  row.EstimatePitch(pass1);
1072  if (row.good_pitches()) {
1073  pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches());
1074  if (row.height_pitch_ratio() > 1.1) {
1075  num_tall_rows_++;
1076  }
1077  } else {
1078  num_bad_rows_++;
1079  }
1080  }
1081 
1082  pitch_height_stats.Finish();
1083  for (auto &row : rows_) {
1084  if (row.good_pitches() >= 5) {
1085  // We have enough evidences. Just use the pitch estimation
1086  // from this row.
1087  row.set_estimated_pitch(row.pitch());
1088  } else if (row.num_chars() > 1) {
1089  float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f);
1090  // CJK characters are more likely to be fragmented than poorly
1091  // chopped. So trust the page-level estimation of character
1092  // pitch only if it's larger than row-level estimation or
1093  // row-level estimation is too large (2x bigger than row height).
1094  if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) {
1095  row.set_estimated_pitch(estimated_pitch);
1096  } else {
1097  row.set_estimated_pitch(row.pitch());
1098  }
1099  }
1100  }
1101 }

◆ FinalizeLargeChars()

void tesseract::FPAnalyzer::FinalizeLargeChars ( )
inline

Definition at line 980 of file cjkpitch.cpp.

980  {
981  for (auto &row : rows_) {
982  row.FinalizeLargeChars();
983  }
984  }

◆ max_iteration()

unsigned tesseract::FPAnalyzer::max_iteration ( )
inline

Definition at line 1015 of file cjkpitch.cpp.

1015  {
1016  // We're fixing at least one character per iteration. So basically
1017  // we shouldn't require more than max_chars_per_row_ iterations.
1018  return max_chars_per_row_ + 100;
1019  }

◆ maybe_fixed_pitch()

bool tesseract::FPAnalyzer::maybe_fixed_pitch ( )
inline

Definition at line 967 of file cjkpitch.cpp.

967  {
968  if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) {
969  return false;
970  }
971  return true;
972  }

◆ MergeFragments()

void tesseract::FPAnalyzer::MergeFragments ( )
inline

Definition at line 974 of file cjkpitch.cpp.

974  {
975  for (auto &row : rows_) {
976  row.MergeFragments();
977  }
978  }

◆ num_rows()

size_t tesseract::FPAnalyzer::num_rows ( )
inline

Definition at line 1010 of file cjkpitch.cpp.

1010  {
1011  return rows_.size();
1012  }

◆ OutputEstimations()

void tesseract::FPAnalyzer::OutputEstimations ( )
inline

Definition at line 996 of file cjkpitch.cpp.

996  {
997  for (auto &row : rows_) {
998  row.OutputEstimations();
999  }
1000  // Don't we need page-level estimation of gaps/spaces?
1001  }

◆ Pass1Analyze()

void tesseract::FPAnalyzer::Pass1Analyze ( )
inline

Definition at line 956 of file cjkpitch.cpp.

956  {
957  for (auto &row : rows_) {
958  row.Pass1Analyze();
959  }
960  }

◆ Pass2Analyze()

bool tesseract::FPAnalyzer::Pass2Analyze ( )
inline

Definition at line 986 of file cjkpitch.cpp.

986  {
987  bool changed = false;
988  for (auto &row : rows_) {
989  if (row.Pass2Analyze()) {
990  changed = true;
991  }
992  }
993  return changed;
994  }

The documentation for this class was generated from the following file: