32 #include <allheaders.h>
37 #ifndef DISABLED_LEGACY_ENGINE
61 static Image RemoveEnclosingCircle(
Image pixs) {
62 Image pixsi = pixInvert(
nullptr, pixs);
63 Image pixc = pixCreateTemplate(pixs);
64 pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
65 pixSeedfillBinary(pixc, pixc, pixsi, 4);
66 pixInvert(pixc, pixc);
68 Image pixt = pixs & pixc;
70 pixCountConnComp(pixt, 8, &max_count);
72 l_int32 min_count = INT32_MAX;
73 Image pixout =
nullptr;
76 pixErodeBrick(pixc, pixc, 3, 3);
79 pixCountConnComp(pixt, 8, &count);
80 if (i == 1 || count > max_count) {
83 }
else if (count < min_count) {
87 }
else if (count >= min_count) {
104 int width = pixGetWidth(pix_binary_);
105 int height = pixGetHeight(pix_binary_);
107 auto pageseg_mode =
static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
110 std::string name = input_file;
111 const char *lastdot = strrchr(name.c_str(),
'.');
112 if (lastdot !=
nullptr) {
113 name[lastdot - name.c_str()] =
'\0';
117 if (blocks->empty()) {
120 BLOCK_IT block_it(blocks);
121 auto *block =
new BLOCK(
"",
true, 0, 0, 0, 0, width, height);
123 block_it.add_to_end(block);
134 BLOBNBOX_LIST diacritic_blobs;
135 int auto_page_seg_ret_val = 0;
136 TO_BLOCK_LIST to_blocks;
139 auto_page_seg_ret_val =
141 enable_noise_removal ? &diacritic_blobs :
nullptr, osd_tess, osr);
143 return auto_page_seg_ret_val;
148 deskew_ =
FCOORD(1.0f, 0.0f);
149 reskew_ =
FCOORD(1.0f, 0.0f);
151 Image pixcleaned = RemoveEnclosingCircle(pix_binary_);
152 if (pixcleaned !=
nullptr) {
154 pix_binary_ = pixcleaned;
159 if (auto_page_seg_ret_val < 0) {
163 if (blocks->empty()) {
170 bool cjk_mode = textord_use_cjk_fp_model;
172 textord_.
TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_,
173 pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks);
174 return auto_page_seg_ret_val;
203 Image photomask_pix =
nullptr;
204 Image musicmask_pix =
nullptr;
206 BLOCK_LIST found_blocks;
207 TO_BLOCK_LIST temp_blocks;
210 pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
211 pageseg_apply_music_mask ? &musicmask_pix :
nullptr);
213 if (finder !=
nullptr) {
214 TO_BLOCK_IT to_block_it(&temp_blocks);
215 TO_BLOCK *to_block = to_block_it.data();
216 if (musicmask_pix !=
nullptr) {
219 photomask_pix |= musicmask_pix;
221 #ifndef DISABLED_LEGACY_ENGINE
226 result = finder->
FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block,
227 photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_,
228 &found_blocks, diacritic_blobs, to_blocks);
241 BLOCK_IT block_it(blocks);
243 block_it.add_list_after(&found_blocks);
250 std::vector<int> *allowed_ids) {
274 OSResults *osr, TO_BLOCK_LIST *to_blocks,
275 Image *photo_mask_pix,
276 Image *music_mask_pix) {
279 TabVector_LIST v_lines;
280 TabVector_LIST h_lines;
284 if (tessedit_dump_pageseg_images) {
285 pixa_debug_.
AddPix(pix_binary_,
"PageSegInput");
289 &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines);
290 if (tessedit_dump_pageseg_images) {
291 pixa_debug_.
AddPix(pix_binary_,
"NoLines");
295 if (tessedit_dump_pageseg_images) {
296 Image pix_no_image_ =
nullptr;
297 if (*photo_mask_pix !=
nullptr) {
298 pix_no_image_ = pixSubtract(
nullptr, pix_binary_, *photo_mask_pix);
300 pix_no_image_ = pix_binary_.
clone();
302 pixa_debug_.
AddPix(pix_no_image_,
"NoImages");
312 TO_BLOCK_IT to_block_it(to_blocks);
316 TO_BLOCK *to_block = to_block_it.data();
319 int estimated_resolution = source_resolution_;
324 estimated_resolution = res;
325 tprintf(
"Estimating resolution as %d\n", estimated_resolution);
331 blkbox.
topright(), estimated_resolution, textord_use_cjk_fp_model,
332 textord_tabfind_aligned_gap_fraction, &v_lines, &h_lines, vertical_x,
337 #ifndef DISABLED_LEGACY_ENGINE
343 BLOBNBOX_CLIST osd_blobs;
348 int osd_orientation = 0;
355 if (
PSM_OSD_ENABLED(pageseg_mode) && osd_tess !=
nullptr && osr !=
nullptr) {
356 std::vector<int> osd_scripts;
357 if (osd_tess !=
this) {
361 for (
auto &
lang : sub_langs_) {
362 AddAllScriptsConverted(
lang->unicharset, osd_tess->
unicharset, &osd_scripts);
372 double osd_margin = min_orientation_margin * 2;
373 for (
int i = 0; i < 4; ++i) {
374 if (i != osd_orientation && osd_score - osr->
orientations[i] < osd_margin) {
383 strcmp(
"Japanese", best_script_str) == 0 ||
384 strcmp(
"Korean", best_script_str) == 0 || strcmp(
"Hangul", best_script_str) == 0;
388 if (osd_margin < min_orientation_margin) {
390 if (!cjk && !vertical_text && osd_orientation == 2) {
393 "OSD: Weak margin (%.2f), horiz textlines, not CJK: "
399 "OSD: Weak margin (%.2f) for %d blob text block, "
400 "but using orientation anyway: %d\n",
401 osd_margin, osd_blobs.length(), osd_orientation);
405 osd_blobs.shallow_clear();
constexpr int kResolutionEstimationFactor
bool PSM_OSD_ENABLED(int pageseg_mode)
@ PSM_CIRCLE_WORD
Treat the image as a single word in a circle.
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_SINGLE_BLOCK_VERT_TEXT
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
bool read_unlv_file(std::string &name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)
void tprintf(const char *format,...)
int IntCastRounded(double x)
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
int os_detect_blobs(const std::vector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
constexpr int kMaxCredibleResolution
bool PSM_SPARSE(int pageseg_mode)
int textord_debug_tabfind
const int kMaxCircleErosions
constexpr int kMinCredibleResolution
bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode)
int LabelSpecialText(TO_BLOCK *to_block) override
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr)
int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
bool right_to_left() const
ColumnFinder * SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix, Image *music_mask_pix)
void AddPix(const Image pix, const char *caption)
PDBLK pdblk
Page Description Block.
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
const ICOORD & botleft() const
const ICOORD & topright() const
const char * get_script_from_script_id(int id) const
int get_script_table_size() const
int get_script_id_from_name(const char *script_name) const
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)
void set_cjk_script(bool is_cjk)
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void SetEquationDetect(EquationDetectBase *detect)
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block)
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
static Image FindImages(Image pix, DebugPixa *pixa_debug)
static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines)
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)