tesseract
5.0.0
|
#include <textord.h>
Public Member Functions | |
Textord (CCStruct *ccstruct) | |
~Textord ()=default | |
void | TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) |
void | CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res) |
bool | use_cjk_fp_model () const |
void | set_use_cjk_fp_model (bool flag) |
void | to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks) |
ROW * | make_prop_words (TO_ROW *row, FCOORD rotation) |
ROW * | make_blob_words (TO_ROW *row, FCOORD rotation) |
void | find_components (Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) |
void | filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on) |
BOOL_VAR_H (textord_single_height_mode) | |
BOOL_VAR_H (tosp_old_to_method) | |
BOOL_VAR_H (tosp_old_to_constrain_sp_kn) | |
BOOL_VAR_H (tosp_only_use_prop_rows) | |
BOOL_VAR_H (tosp_force_wordbreak_on_punct) | |
BOOL_VAR_H (tosp_use_pre_chopping) | |
BOOL_VAR_H (tosp_old_to_bug_fix) | |
BOOL_VAR_H (tosp_block_use_cert_spaces) | |
BOOL_VAR_H (tosp_row_use_cert_spaces) | |
BOOL_VAR_H (tosp_narrow_blobs_not_cert) | |
BOOL_VAR_H (tosp_row_use_cert_spaces1) | |
BOOL_VAR_H (tosp_recovery_isolated_row_stats) | |
BOOL_VAR_H (tosp_only_small_gaps_for_kern) | |
BOOL_VAR_H (tosp_all_flips_fuzzy) | |
BOOL_VAR_H (tosp_fuzzy_limit_all) | |
BOOL_VAR_H (tosp_stats_use_xht_gaps) | |
BOOL_VAR_H (tosp_use_xht_gaps) | |
BOOL_VAR_H (tosp_only_use_xht_gaps) | |
BOOL_VAR_H (tosp_rule_9_test_punct) | |
BOOL_VAR_H (tosp_flip_fuzz_kn_to_sp) | |
BOOL_VAR_H (tosp_flip_fuzz_sp_to_kn) | |
BOOL_VAR_H (tosp_improve_thresh) | |
INT_VAR_H (tosp_debug_level) | |
INT_VAR_H (tosp_enough_space_samples_for_median) | |
INT_VAR_H (tosp_redo_kern_limit) | |
INT_VAR_H (tosp_few_samples) | |
INT_VAR_H (tosp_short_row) | |
INT_VAR_H (tosp_sanity_method) | |
double_VAR_H (tosp_old_sp_kn_th_factor) | |
double_VAR_H (tosp_threshold_bias1) | |
double_VAR_H (tosp_threshold_bias2) | |
double_VAR_H (tosp_narrow_fraction) | |
double_VAR_H (tosp_narrow_aspect_ratio) | |
double_VAR_H (tosp_wide_fraction) | |
double_VAR_H (tosp_wide_aspect_ratio) | |
double_VAR_H (tosp_fuzzy_space_factor) | |
double_VAR_H (tosp_fuzzy_space_factor1) | |
double_VAR_H (tosp_fuzzy_space_factor2) | |
double_VAR_H (tosp_gap_factor) | |
double_VAR_H (tosp_kern_gap_factor1) | |
double_VAR_H (tosp_kern_gap_factor2) | |
double_VAR_H (tosp_kern_gap_factor3) | |
double_VAR_H (tosp_ignore_big_gaps) | |
double_VAR_H (tosp_ignore_very_big_gaps) | |
double_VAR_H (tosp_rep_space) | |
double_VAR_H (tosp_enough_small_gaps) | |
double_VAR_H (tosp_table_kn_sp_ratio) | |
double_VAR_H (tosp_table_xht_sp_ratio) | |
double_VAR_H (tosp_table_fuzzy_kn_sp_ratio) | |
double_VAR_H (tosp_fuzzy_kn_fraction) | |
double_VAR_H (tosp_fuzzy_sp_fraction) | |
double_VAR_H (tosp_min_sane_kn_sp) | |
double_VAR_H (tosp_init_guess_kn_mult) | |
double_VAR_H (tosp_init_guess_xht_mult) | |
double_VAR_H (tosp_max_sane_kn_thresh) | |
double_VAR_H (tosp_flip_caution) | |
double_VAR_H (tosp_large_kerning) | |
double_VAR_H (tosp_dont_fool_with_small_kerns) | |
double_VAR_H (tosp_near_lh_edge) | |
double_VAR_H (tosp_silly_kn_sp_gap) | |
double_VAR_H (tosp_pass_wide_fuzz_sp_to_context) | |
BOOL_VAR_H (textord_no_rejects) | |
BOOL_VAR_H (textord_show_blobs) | |
BOOL_VAR_H (textord_show_boxes) | |
INT_VAR_H (textord_max_noise_size) | |
INT_VAR_H (textord_baseline_debug) | |
double_VAR_H (textord_noise_area_ratio) | |
double_VAR_H (textord_initialx_ile) | |
double_VAR_H (textord_initialasc_ile) | |
INT_VAR_H (textord_noise_sizefraction) | |
double_VAR_H (textord_noise_sizelimit) | |
INT_VAR_H (textord_noise_translimit) | |
double_VAR_H (textord_noise_normratio) | |
BOOL_VAR_H (textord_noise_rejwords) | |
BOOL_VAR_H (textord_noise_rejrows) | |
double_VAR_H (textord_noise_syfract) | |
double_VAR_H (textord_noise_sxfract) | |
double_VAR_H (textord_noise_hfract) | |
INT_VAR_H (textord_noise_sncount) | |
double_VAR_H (textord_noise_rowratio) | |
BOOL_VAR_H (textord_noise_debug) | |
double_VAR_H (textord_blshift_maxshift) | |
double_VAR_H (textord_blshift_xfraction) | |
compute_block_xheight | |
Compute the xheight of the individual rows, then correlate them and interpret ascenderless lines, correcting xheights. First we compute our best guess of the x-height of each row independently with compute_row_xheight(), which looks for a pair of commonly occurring heights that could be x-height and ascender height. This function also attempts to find descenders of lowercase letters (i.e. not the small descenders that could appear in upper case letters as Q,J). After this computation each row falls into one of the following categories: ROW_ASCENDERS_FOUND: we found xheight and ascender modes, so this must be a regular row; we'll use its xheight to compute xheight and ascrise estimates for the block ROW_DESCENDERS_FOUND: no ascenders, so we do not have a high confidence in the xheight of this row (don't use it for estimating block xheight), but this row can't contain all caps ROW_UNKNOWN: a row with no ascenders/descenders, could be all lowercase (or mostly lowercase for fonts with very few ascenders), all upper case or small caps ROW_INVALID: no meaningful xheight could be found for this row We then run correct_row_xheight() and use the computed xheight and ascrise averages to correct xheight values of the rows in ROW_DESCENDERS_FOUND, ROW_UNKNOWN and ROW_INVALID categories. | |
void | compute_block_xheight (TO_BLOCK *block, float gradient) |
compute_row_xheight | |
Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row. | |
void | compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size) |
make_spline_rows | |
Re-fit the rows in the block to the given gradient. | |
void | make_spline_rows (TO_BLOCK *block, float gradient, bool testing_on) |
|
explicit |
Definition at line 35 of file textord.cpp.
|
default |
tesseract::Textord::BOOL_VAR_H | ( | textord_no_rejects | ) |
tesseract::Textord::BOOL_VAR_H | ( | textord_noise_debug | ) |
tesseract::Textord::BOOL_VAR_H | ( | textord_noise_rejrows | ) |
tesseract::Textord::BOOL_VAR_H | ( | textord_noise_rejwords | ) |
tesseract::Textord::BOOL_VAR_H | ( | textord_show_blobs | ) |
tesseract::Textord::BOOL_VAR_H | ( | textord_show_boxes | ) |
tesseract::Textord::BOOL_VAR_H | ( | textord_single_height_mode | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_all_flips_fuzzy | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_block_use_cert_spaces | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_flip_fuzz_kn_to_sp | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_flip_fuzz_sp_to_kn | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_force_wordbreak_on_punct | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_fuzzy_limit_all | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_improve_thresh | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_narrow_blobs_not_cert | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_old_to_bug_fix | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_old_to_constrain_sp_kn | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_old_to_method | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_only_small_gaps_for_kern | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_only_use_prop_rows | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_only_use_xht_gaps | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_recovery_isolated_row_stats | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_row_use_cert_spaces | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_row_use_cert_spaces1 | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_rule_9_test_punct | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_stats_use_xht_gaps | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_use_pre_chopping | ) |
tesseract::Textord::BOOL_VAR_H | ( | tosp_use_xht_gaps | ) |
void tesseract::Textord::CleanupSingleRowResult | ( | PageSegMode | pageseg_mode, |
PAGE_RES * | page_res | ||
) |
Definition at line 264 of file textord.cpp.
void tesseract::Textord::compute_block_xheight | ( | TO_BLOCK * | block, |
float | gradient | ||
) |
Definition at line 1278 of file makerow.cpp.
void tesseract::Textord::compute_row_xheight | ( | TO_ROW * | row, |
const FCOORD & | rotation, | ||
float | gradient, | ||
int | block_line_size | ||
) |
Definition at line 1384 of file makerow.cpp.
tesseract::Textord::double_VAR_H | ( | textord_blshift_maxshift | ) |
tesseract::Textord::double_VAR_H | ( | textord_blshift_xfraction | ) |
tesseract::Textord::double_VAR_H | ( | textord_initialasc_ile | ) |
tesseract::Textord::double_VAR_H | ( | textord_initialx_ile | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_area_ratio | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_hfract | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_normratio | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_rowratio | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_sizelimit | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_sxfract | ) |
tesseract::Textord::double_VAR_H | ( | textord_noise_syfract | ) |
tesseract::Textord::double_VAR_H | ( | tosp_dont_fool_with_small_kerns | ) |
tesseract::Textord::double_VAR_H | ( | tosp_enough_small_gaps | ) |
tesseract::Textord::double_VAR_H | ( | tosp_flip_caution | ) |
tesseract::Textord::double_VAR_H | ( | tosp_fuzzy_kn_fraction | ) |
tesseract::Textord::double_VAR_H | ( | tosp_fuzzy_sp_fraction | ) |
tesseract::Textord::double_VAR_H | ( | tosp_fuzzy_space_factor | ) |
tesseract::Textord::double_VAR_H | ( | tosp_fuzzy_space_factor1 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_fuzzy_space_factor2 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_gap_factor | ) |
tesseract::Textord::double_VAR_H | ( | tosp_ignore_big_gaps | ) |
tesseract::Textord::double_VAR_H | ( | tosp_ignore_very_big_gaps | ) |
tesseract::Textord::double_VAR_H | ( | tosp_init_guess_kn_mult | ) |
tesseract::Textord::double_VAR_H | ( | tosp_init_guess_xht_mult | ) |
tesseract::Textord::double_VAR_H | ( | tosp_kern_gap_factor1 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_kern_gap_factor2 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_kern_gap_factor3 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_large_kerning | ) |
tesseract::Textord::double_VAR_H | ( | tosp_max_sane_kn_thresh | ) |
tesseract::Textord::double_VAR_H | ( | tosp_min_sane_kn_sp | ) |
tesseract::Textord::double_VAR_H | ( | tosp_narrow_aspect_ratio | ) |
tesseract::Textord::double_VAR_H | ( | tosp_narrow_fraction | ) |
tesseract::Textord::double_VAR_H | ( | tosp_near_lh_edge | ) |
tesseract::Textord::double_VAR_H | ( | tosp_old_sp_kn_th_factor | ) |
tesseract::Textord::double_VAR_H | ( | tosp_pass_wide_fuzz_sp_to_context | ) |
tesseract::Textord::double_VAR_H | ( | tosp_rep_space | ) |
tesseract::Textord::double_VAR_H | ( | tosp_silly_kn_sp_gap | ) |
tesseract::Textord::double_VAR_H | ( | tosp_table_fuzzy_kn_sp_ratio | ) |
tesseract::Textord::double_VAR_H | ( | tosp_table_kn_sp_ratio | ) |
tesseract::Textord::double_VAR_H | ( | tosp_table_xht_sp_ratio | ) |
tesseract::Textord::double_VAR_H | ( | tosp_threshold_bias1 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_threshold_bias2 | ) |
tesseract::Textord::double_VAR_H | ( | tosp_wide_aspect_ratio | ) |
tesseract::Textord::double_VAR_H | ( | tosp_wide_fraction | ) |
void tesseract::Textord::filter_blobs | ( | ICOORD | page_tr, |
TO_BLOCK_LIST * | blocks, | ||
bool | testing_on | ||
) |
Definition at line 238 of file tordmain.cpp.
void tesseract::Textord::find_components | ( | Image | pix, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | to_blocks | ||
) |
Definition at line 211 of file tordmain.cpp.
tesseract::Textord::INT_VAR_H | ( | textord_baseline_debug | ) |
tesseract::Textord::INT_VAR_H | ( | textord_max_noise_size | ) |
tesseract::Textord::INT_VAR_H | ( | textord_noise_sizefraction | ) |
tesseract::Textord::INT_VAR_H | ( | textord_noise_sncount | ) |
tesseract::Textord::INT_VAR_H | ( | textord_noise_translimit | ) |
tesseract::Textord::INT_VAR_H | ( | tosp_debug_level | ) |
tesseract::Textord::INT_VAR_H | ( | tosp_enough_space_samples_for_median | ) |
tesseract::Textord::INT_VAR_H | ( | tosp_few_samples | ) |
tesseract::Textord::INT_VAR_H | ( | tosp_redo_kern_limit | ) |
tesseract::Textord::INT_VAR_H | ( | tosp_sanity_method | ) |
tesseract::Textord::INT_VAR_H | ( | tosp_short_row | ) |
Definition at line 1118 of file tospace.cpp.
Definition at line 844 of file tospace.cpp.
void tesseract::Textord::make_spline_rows | ( | TO_BLOCK * | block, |
float | gradient, | ||
bool | testing_on | ||
) |
Definition at line 1998 of file makerow.cpp.
|
inline |
void tesseract::Textord::TextordPage | ( | PageSegMode | pageseg_mode, |
const FCOORD & | reskew, | ||
int | width, | ||
int | height, | ||
Image | binary_pix, | ||
Image | thresholds_pix, | ||
Image | grey_pix, | ||
bool | use_box_bottoms, | ||
BLOBNBOX_LIST * | diacritic_blobs, | ||
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | to_blocks | ||
) |
Definition at line 177 of file textord.cpp.
void tesseract::Textord::to_spacing | ( | ICOORD | page_tr, |
TO_BLOCK_LIST * | blocks | ||
) |
Definition at line 45 of file tospace.cpp.
|
inline |