41 #ifndef GRAPHICS_DISABLED
42 static INT_VAR(textord_tabfind_show_strokewidths, 0,
"Show stroke widths (ScrollView)");
44 static INT_VAR(textord_tabfind_show_strokewidths, 0,
"Show stroke widths");
46 static BOOL_VAR(textord_tabfind_only_strokewidths,
false,
"Only run stroke widths");
116 , nontext_map_(nullptr)
117 , projection_(nullptr)
119 , grid_box_(bleft, tright)
120 , rerotation_(1.0f, 0.0f) {
124 #ifndef GRAPHICS_DISABLED
125 if (widths_win_ !=
nullptr) {
127 if (textord_tabfind_only_strokewidths) {
133 delete initial_widths_win_;
135 delete textlines_win_;
136 delete smoothed_win_;
137 delete diacritics_win_;
147 BLOBNBOX_IT blob_it(&block->
blobs);
148 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
149 SetNeighbours(
false,
false, blob_it.data());
161 InsertBlobs(input_block);
163 while (cjk_merge && FixBrokenCJK(input_block)) {
167 FindTextlineFlowDirection(pageseg_mode,
false);
173 static void CollectHorizVertBlobs(BLOBNBOX_LIST *input_blobs,
int *num_vertical_blobs,
174 int *num_horizontal_blobs, BLOBNBOX_CLIST *vertical_blobs,
175 BLOBNBOX_CLIST *horizontal_blobs,
176 BLOBNBOX_CLIST *nondescript_blobs) {
177 BLOBNBOX_C_IT v_it(vertical_blobs);
178 BLOBNBOX_C_IT h_it(horizontal_blobs);
179 BLOBNBOX_C_IT n_it(nondescript_blobs);
180 BLOBNBOX_IT blob_it(input_blobs);
181 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
184 float y_x =
static_cast<float>(box.
height()) / box.
width();
185 float x_y = 1.0f / y_x;
187 float ratio = x_y > y_x ? x_y : y_x;
191 ++*num_vertical_blobs;
193 v_it.add_after_then_move(blob);
196 ++*num_horizontal_blobs;
198 h_it.add_after_then_move(blob);
200 }
else if (ok_blob) {
201 n_it.add_after_then_move(blob);
213 BLOBNBOX_CLIST *osd_blobs) {
214 int vertical_boxes = 0;
215 int horizontal_boxes = 0;
217 BLOBNBOX_CLIST vertical_blobs;
218 BLOBNBOX_CLIST horizontal_blobs;
219 BLOBNBOX_CLIST nondescript_blobs;
220 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes, &vertical_blobs,
221 &horizontal_blobs, &nondescript_blobs);
222 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes, &vertical_blobs,
223 &horizontal_blobs, &nondescript_blobs);
225 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", horizontal_boxes,
226 vertical_boxes, horizontal_blobs.length(), vertical_blobs.length(),
227 nondescript_blobs.length());
229 if (osd_blobs !=
nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
231 BLOBNBOX_C_IT osd_it(osd_blobs);
232 osd_it.add_list_after(&nondescript_blobs);
236 static_cast<int>((vertical_boxes + horizontal_boxes) * find_vertical_text_ratio);
237 if (vertical_boxes >= min_vert_boxes) {
238 if (osd_blobs !=
nullptr) {
239 BLOBNBOX_C_IT osd_it(osd_blobs);
240 osd_it.add_list_after(&vertical_blobs);
244 if (osd_blobs !=
nullptr) {
245 BLOBNBOX_C_IT osd_it(osd_blobs);
246 osd_it.add_list_after(&horizontal_blobs);
256 rerotation_.
set_x(rotation.
x());
257 rerotation_.
set_y(-rotation.
y());
264 ColPartition_LIST leader_parts;
265 FindLeadersAndMarkNoise(block, &leader_parts);
269 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
272 MarkLeaderNeighbours(part,
LR_LEFT);
273 MarkLeaderNeighbours(part,
LR_RIGHT);
295 TBOX search_box = box;
296 search_box.
pad(padding, padding);
308 if (nbox.
height() > max_height) {
309 max_height = nbox.
height();
313 tprintf(
"Max neighbour size=%d for candidate line box at:", max_height);
317 #ifndef GRAPHICS_DISABLED
318 if (leaders_win_ !=
nullptr) {
348 BLOBNBOX_LIST *diacritic_blobs,
350 ColPartition_LIST *big_parts) {
351 nontext_map_ = nontext_pix;
352 projection_ = projection;
363 FindTextlineFlowDirection(pageseg_mode,
false);
365 #ifndef GRAPHICS_DISABLED
366 if (textord_tabfind_show_strokewidths) {
378 FindTextlineFlowDirection(pageseg_mode,
true);
380 diacritic_blobs, part_grid, big_parts, &skew);
382 tprintf(
"Detected %d diacritics\n", diacritic_blobs->length());
386 FindTextlineFlowDirection(pageseg_mode,
true);
387 r = FindInitialPartitions(pageseg_mode, rerotation,
false, block, diacritic_blobs, part_grid,
390 nontext_map_ =
nullptr;
391 projection_ =
nullptr;
395 static void PrintBoxWidths(
BLOBNBOX *neighbour) {
397 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.
left(),
410 FCOORD click(
static_cast<float>(x),
static_cast<float>(y));
414 PrintBoxWidths(neighbour);
430 "Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
431 "Good= %d %d %d %d\n",
450 void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK *block, ColPartition_LIST *leader_parts) {
456 gsearch.StartFullSearch();
457 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
458 SetNeighbours(
true,
false, bbox);
460 ColPartition_IT part_it(leader_parts);
461 gsearch.StartFullSearch();
462 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
468 auto *part =
new ColPartition(
BRT_UNKNOWN, ICOORD(0, 1));
470 for (blob = bbox; blob !=
nullptr && blob->flow() ==
BTFT_NONE;
478 if (part->MarkAsLeaderIfMonospaced()) {
479 part_it.add_after_then_move(part);
485 #ifndef GRAPHICS_DISABLED
486 if (textord_tabfind_show_strokewidths) {
487 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
492 BLOBNBOX_IT blob_it(&block->
blobs);
494 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
495 BLOBNBOX *blob = small_it.data();
500 blob->ClearNeighbours();
501 blob_it.add_to_end(small_it.extract());
508 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
509 BLOBNBOX *blob = noise_it.data();
510 if (blob->flow() ==
BTFT_LEADER || blob->joined_to_prev()) {
511 small_it.add_to_end(noise_it.extract());
514 blob->ClearNeighbours();
523 void StrokeWidth::InsertBlobs(TO_BLOCK *block) {
531 void StrokeWidth::MarkLeaderNeighbours(
const ColPartition *part,
LeftOrRight side) {
532 const TBOX &part_box = part->bounding_box();
535 BLOBNBOX *best_blob =
nullptr;
537 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.left() : part_box.right(),
538 part_box.bottom(), part_box.top());
540 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) !=
nullptr) {
541 const TBOX &blob_box = blob->bounding_box();
542 if (!blob_box.y_overlap(part_box)) {
545 int x_gap = blob_box.x_gap(part_box);
548 }
else if (best_blob ==
nullptr || x_gap < best_gap) {
553 if (best_blob !=
nullptr) {
555 best_blob->set_leader_on_right(
true);
557 best_blob->set_leader_on_left(
true);
559 #ifndef GRAPHICS_DISABLED
560 if (leaders_win_ !=
nullptr) {
562 const TBOX &blob_box = best_blob->bounding_box();
563 leaders_win_->
Rectangle(blob_box.left(), blob_box.bottom(), blob_box.right(), blob_box.top());
570 static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST *blobs) {
572 BLOBNBOX_IT it(blobs);
573 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
574 BLOBNBOX *blob = it.data();
575 int width = blob->bounding_box().width();
576 int height = blob->bounding_box().height();
578 sizes.add(height, 1);
581 return static_cast<int>(sizes.ile(0.75f) + 0.5);
589 bool StrokeWidth::FixBrokenCJK(TO_BLOCK *block) {
590 BLOBNBOX_LIST *blobs = &block->blobs;
591 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
595 BLOBNBOX_IT blob_it(blobs);
597 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
598 BLOBNBOX *blob = blob_it.data();
599 if (blob->cblob() ==
nullptr || blob->cblob()->out_list()->empty()) {
605 tprintf(
"Checking for Broken CJK (max size=%d):", max_height);
609 BLOBNBOX_CLIST overlapped_blobs;
610 AccumulateOverlaps(blob, debug, max_height, max_dist, &bbox, &overlapped_blobs);
611 if (!overlapped_blobs.empty()) {
618 tprintf(
"Bad final aspectratio:");
626 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
631 BLOBNBOX_C_IT n_it(&overlapped_blobs);
632 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
633 BLOBNBOX *neighbour =
nullptr;
634 neighbour = n_it.data();
639 if (!n_it.cycled_list()) {
642 PrintBoxWidths(blob);
652 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
653 BLOBNBOX *neighbour = n_it.data();
657 blob->really_merge(neighbour);
658 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
659 blob->rotate_box(rerotation_);
671 int num_remaining = 0;
672 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
673 BLOBNBOX *blob = blob_it.data();
674 if (blob->cblob() !=
nullptr && !blob->cblob()->out_list()->empty()) {
680 block->DeleteUnownedNoise();
689 static bool AcceptableCJKMerge(
const TBOX &bbox,
const TBOX &nbox,
bool debug,
int max_size,
690 int max_dist,
int *x_gap,
int *y_gap) {
691 *x_gap = bbox.x_gap(nbox);
692 *y_gap = bbox.y_gap(nbox);
696 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
699 if (*x_gap <= max_dist && *y_gap <= max_dist && merged.width() <= max_size &&
700 merged.height() <= max_size) {
702 double old_ratio =
static_cast<double>(bbox.width()) / bbox.height();
703 if (old_ratio < 1.0) {
704 old_ratio = 1.0 / old_ratio;
706 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
707 if (new_ratio < 1.0) {
708 new_ratio = 1.0 / new_ratio;
721 void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX *not_this,
bool debug,
int max_size,
722 int max_dist,
TBOX *bbox, BLOBNBOX_CLIST *blobs) {
728 for (
auto &nearest : nearests) {
731 int x = (bbox->left() + bbox->right()) / 2;
732 int y = (bbox->bottom() + bbox->top()) / 2;
737 while ((neighbour = radsearch.NextRadSearch()) !=
nullptr) {
738 if (neighbour == not_this) {
741 TBOX nbox = neighbour->bounding_box();
743 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, &x_gap, &y_gap)) {
746 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
752 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
753 if (nearests[dir] ==
nullptr) {
756 nbox = nearests[dir]->bounding_box();
757 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, &x_gap, &y_gap)) {
760 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
765 nearests[dir] =
nullptr;
769 }
else if (x_gap < 0 && x_gap <= y_gap) {
772 if (nearests[dir] ==
nullptr || y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
773 nearests[dir] = neighbour;
775 }
else if (y_gap < 0 && y_gap <= x_gap) {
778 if (nearests[dir] ==
nullptr || x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
779 nearests[dir] = neighbour;
788 for (
auto &nearest : nearests) {
789 if (nearest ==
nullptr) {
792 const TBOX &nbox = nearest->bounding_box();
794 tprintf(
"Testing for overlap with:");
797 if (bbox->overlap(nbox)) {
798 blobs->shallow_clear();
800 tprintf(
"Final box overlaps nearest\n");
814 void StrokeWidth::FindTextlineFlowDirection(
PageSegMode pageseg_mode,
bool display_if_debugging) {
818 gsearch.StartFullSearch();
819 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
820 SetNeighbours(
false, display_if_debugging, bbox);
823 gsearch.StartFullSearch();
824 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
825 SimplifyObviousNeighbours(bbox);
828 gsearch.StartFullSearch();
829 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
830 if (FindingVerticalOnly(pageseg_mode)) {
831 bbox->set_vert_possible(
true);
832 bbox->set_horz_possible(
false);
833 }
else if (FindingHorizontalOnly(pageseg_mode)) {
834 bbox->set_vert_possible(
false);
835 bbox->set_horz_possible(
true);
837 SetNeighbourFlows(bbox);
840 #ifndef GRAPHICS_DISABLED
841 if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
842 textord_tabfind_show_strokewidths > 1) {
843 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
847 gsearch.StartFullSearch();
848 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
849 SmoothNeighbourTypes(pageseg_mode,
false, bbox);
852 gsearch.StartFullSearch();
853 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
854 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
857 gsearch.StartFullSearch();
858 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
859 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
861 #ifndef GRAPHICS_DISABLED
862 if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
863 textord_tabfind_show_strokewidths > 1) {
864 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
873 void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap, BLOBNBOX *blob) {
874 int line_trap_count = 0;
875 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
877 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
879 if (line_trap_count > 0 && activate_line_trap) {
881 blob->ClearNeighbours();
882 const TBOX &box = blob->bounding_box();
893 int StrokeWidth::FindGoodNeighbour(
BlobNeighbourDir dir,
bool leaders, BLOBNBOX *blob) {
895 TBOX blob_box = blob->bounding_box();
898 tprintf(
"FGN in dir %d for blob:", dir);
901 int top = blob_box.top();
902 int bottom = blob_box.bottom();
903 int left = blob_box.left();
904 int right = blob_box.right();
905 int width = right - left;
906 int height = top - bottom;
914 int line_trap_count = 0;
916 int min_good_overlap = (dir ==
BND_LEFT || dir ==
BND_RIGHT) ? height / 2 : width / 2;
917 int min_decent_overlap = (dir ==
BND_LEFT || dir ==
BND_RIGHT) ? height / 3 : width / 3;
919 min_good_overlap = min_decent_overlap = 1;
927 TBOX search_box = blob_box;
931 search_box.set_left(search_box.left() - search_pad);
934 search_box.set_right(search_box.right() + search_pad);
937 search_box.set_bottom(search_box.bottom() - search_pad);
940 search_box.set_top(search_box.top() + search_pad);
947 rectsearch.StartRectSearch(search_box);
948 BLOBNBOX *best_neighbour =
nullptr;
949 double best_goodness = 0.0;
950 bool best_is_good =
false;
952 while ((neighbour = rectsearch.NextRectSearch()) !=
nullptr) {
953 TBOX nbox = neighbour->bounding_box();
954 if (neighbour == blob) {
957 int mid_x = (nbox.left() + nbox.right()) / 2;
958 if (mid_x < blob->left_rule() || mid_x > blob->right_rule()) {
968 int n_width = nbox.width();
969 int n_height = nbox.height();
970 if (std::min(n_width, n_height) > line_trap_min &&
971 std::max(n_width, n_height) < line_trap_max) {
994 overlap = std::min(
static_cast<int>(nbox.top()), top) -
995 std::max(
static_cast<int>(nbox.bottom()), bottom);
996 if (overlap == nbox.height() && nbox.width() > nbox.height()) {
997 perp_overlap = nbox.width();
999 perp_overlap = overlap;
1001 gap = dir ==
BND_LEFT ? left - nbox.left() : nbox.right() - right;
1010 overlap = std::min(
static_cast<int>(nbox.right()), right) -
1011 std::max(
static_cast<int>(nbox.left()), left);
1012 if (overlap == nbox.width() && nbox.height() > nbox.width()) {
1013 perp_overlap = nbox.height();
1015 perp_overlap = overlap;
1017 gap = dir ==
BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top;
1026 if (-gap > overlap) {
1028 tprintf(
"Overlaps wrong way\n");
1032 if (perp_overlap < min_decent_overlap) {
1034 tprintf(
"Doesn't overlap enough\n");
1041 overlap >= min_good_overlap && !bad_sizes &&
1049 double goodness = (1.0 + is_good) * overlap / gap;
1051 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n", goodness, best_goodness,
1052 is_good, overlap, gap);
1054 if (goodness > best_goodness) {
1055 best_neighbour = neighbour;
1056 best_goodness = goodness;
1057 best_is_good = is_good;
1060 blob->set_neighbour(dir, best_neighbour, best_is_good);
1061 return line_trap_count;
1065 static void ListNeighbours(
const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) {
1066 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1068 BLOBNBOX *neighbour = blob->neighbour(bnd);
1069 if (neighbour !=
nullptr) {
1070 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1076 static void List2ndNeighbours(
const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) {
1077 ListNeighbours(blob, neighbours);
1078 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1080 BLOBNBOX *neighbour = blob->neighbour(bnd);
1081 if (neighbour !=
nullptr) {
1082 ListNeighbours(neighbour, neighbours);
1088 static void List3rdNeighbours(
const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) {
1089 List2ndNeighbours(blob, neighbours);
1090 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1092 BLOBNBOX *neighbour = blob->neighbour(bnd);
1093 if (neighbour !=
nullptr) {
1094 List2ndNeighbours(neighbour, neighbours);
1101 static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST *neighbours,
int *pure_h_count,
1102 int *pure_v_count) {
1106 BLOBNBOX_C_IT it(neighbours);
1107 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1108 BLOBNBOX *blob = it.data();
1109 int h_min, h_max, v_min, v_max;
1110 blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
1112 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1114 if (h_max < v_min || blob->leader_on_left() || blob->leader_on_right()) {
1120 }
else if (v_max < h_min) {
1132 blob->bounding_box().print();
1140 void StrokeWidth::SetNeighbourFlows(BLOBNBOX *blob) {
1141 if (blob->DefiniteIndividualFlow()) {
1147 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:", blob->flow(), blob->region_type());
1148 blob->bounding_box().print();
1150 BLOBNBOX_CLIST neighbours;
1151 List3rdNeighbours(blob, &neighbours);
1153 int pure_h_count = 0;
1154 int pure_v_count = 0;
1155 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1157 HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1);
1158 tprintf(
"SetFlows: h_count=%d, v_count=%d\n", pure_h_count, pure_v_count);
1160 if (!neighbours.empty()) {
1161 blob->set_vert_possible(
true);
1162 blob->set_horz_possible(
true);
1163 if (pure_h_count > 2 * pure_v_count) {
1165 blob->set_vert_possible(
false);
1166 }
else if (pure_v_count > 2 * pure_h_count) {
1168 blob->set_horz_possible(
false);
1172 blob->set_vert_possible(
false);
1173 blob->set_horz_possible(
false);
1178 static void CountNeighbourTypes(BLOBNBOX_CLIST *neighbours,
int *pure_h_count,
int *pure_v_count) {
1179 BLOBNBOX_C_IT it(neighbours);
1180 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1181 BLOBNBOX *blob = it.data();
1182 if (blob->UniquelyHorizontal()) {
1185 if (blob->UniquelyVertical()) {
1194 void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX *blob) {
1197 if ((blob->bounding_box().width() > 3 * blob->area_stroke_width() &&
1198 blob->bounding_box().height() > 3 * blob->area_stroke_width())) {
1200 if (blob->bounding_box().width() > 4 * blob->bounding_box().height()) {
1202 blob->set_neighbour(
BND_ABOVE,
nullptr,
false);
1203 blob->set_neighbour(
BND_BELOW,
nullptr,
false);
1206 if (blob->bounding_box().height() > 4 * blob->bounding_box().width()) {
1208 blob->set_neighbour(
BND_LEFT,
nullptr,
false);
1209 blob->set_neighbour(
BND_RIGHT,
nullptr,
false);
1216 int h_min, h_max, v_min, v_max;
1217 blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
1218 if ((h_max + margin < v_min && h_max < margin / 2) || blob->leader_on_left() ||
1219 blob->leader_on_right()) {
1221 blob->set_neighbour(
BND_ABOVE,
nullptr,
false);
1222 blob->set_neighbour(
BND_BELOW,
nullptr,
false);
1223 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1225 blob->set_neighbour(
BND_LEFT,
nullptr,
false);
1226 blob->set_neighbour(
BND_RIGHT,
nullptr,
false);
1233 void StrokeWidth::SmoothNeighbourTypes(
PageSegMode pageseg_mode,
bool reset_all, BLOBNBOX *blob) {
1234 if ((blob->vert_possible() && blob->horz_possible()) || reset_all) {
1236 BLOBNBOX_CLIST neighbours;
1237 List2ndNeighbours(blob, &neighbours);
1239 int pure_h_count = 0;
1240 int pure_v_count = 0;
1241 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1243 blob->bounding_box().bottom())) {
1244 HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1);
1245 tprintf(
"pure_h=%d, pure_v=%d\n", pure_h_count, pure_v_count);
1247 if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
1249 blob->set_vert_possible(
false);
1250 blob->set_horz_possible(
true);
1251 }
else if (pure_v_count > pure_h_count && !FindingHorizontalOnly(pageseg_mode)) {
1253 blob->set_horz_possible(
false);
1254 blob->set_vert_possible(
true);
1257 blob->bounding_box().bottom())) {
1258 HandleClick(blob->bounding_box().left() + 1, blob->bounding_box().bottom() + 1);
1259 tprintf(
"Clean on pass 3!\n");
1276 PageSegMode pageseg_mode,
const FCOORD &rerotation,
bool find_problems, TO_BLOCK *block,
1277 BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts,
1278 FCOORD *skew_angle) {
1279 if (!FindingHorizontalOnly(pageseg_mode)) {
1280 FindVerticalTextChains(part_grid);
1282 if (!FindingVerticalOnly(pageseg_mode)) {
1283 FindHorizontalTextChains(part_grid);
1285 #ifndef GRAPHICS_DISABLED
1286 if (textord_tabfind_show_strokewidths) {
1287 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1288 part_grid->DisplayBoxes(chains_win_);
1292 if (find_problems) {
1296 part_grid->SplitOverlappingPartitions(big_parts);
1297 EasyMerges(part_grid);
1298 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1300 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box, rerotation)) {
1303 while (part_grid->GridSmoothNeighbours(
BTFT_NEIGHBOURS, nontext_map_, grid_box, rerotation)) {
1306 int pre_overlap = part_grid->ComputeTotalOverlap(
nullptr);
1307 TestDiacritics(part_grid, block);
1308 MergeDiacritics(block, part_grid);
1309 if (find_problems && diacritic_blobs !=
nullptr &&
1310 DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid, diacritic_blobs)) {
1313 #ifndef GRAPHICS_DISABLED
1314 if (textord_tabfind_show_strokewidths) {
1315 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1316 part_grid->DisplayBoxes(textlines_win_);
1317 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1320 PartitionRemainingBlobs(pageseg_mode, part_grid);
1321 part_grid->SplitOverlappingPartitions(big_parts);
1322 EasyMerges(part_grid);
1323 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box, rerotation)) {
1326 while (part_grid->GridSmoothNeighbours(
BTFT_NEIGHBOURS, nontext_map_, grid_box, rerotation)) {
1330 while (part_grid->GridSmoothNeighbours(
BTFT_STRONG_CHAIN, nontext_map_, grid_box, rerotation)) {
1333 #ifndef GRAPHICS_DISABLED
1334 if (textord_tabfind_show_strokewidths) {
1335 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1336 part_grid->DisplayBoxes(smoothed_win_);
1346 bool StrokeWidth::DetectAndRemoveNoise(
int pre_overlap,
const TBOX &grid_box, TO_BLOCK *block,
1347 ColPartitionGrid *part_grid,
1348 BLOBNBOX_LIST *diacritic_blobs) {
1349 ColPartitionGrid *noise_grid =
nullptr;
1350 int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
1351 if (pre_overlap == 0) {
1354 BLOBNBOX_IT diacritic_it(diacritic_blobs);
1355 if (noise_grid !=
nullptr) {
1359 #ifndef GRAPHICS_DISABLED
1360 if (textord_tabfind_show_strokewidths) {
1361 ScrollView *noise_win =
MakeWindow(1000, 500,
"Noise Areas");
1362 noise_grid->DisplayBoxes(noise_win);
1365 part_grid->DeleteNonLeaderParts();
1366 BLOBNBOX_IT blob_it(&block->noise_blobs);
1368 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1369 BLOBNBOX *blob = blob_it.data();
1370 blob->ClearNeighbours();
1371 if (!blob->IsDiacritic() || blob->owner() !=
nullptr) {
1374 TBOX search_box(blob->bounding_box());
1376 rsearch.StartRectSearch(search_box);
1377 ColPartition *part = rsearch.NextRectSearch();
1378 if (part !=
nullptr) {
1380 blob->set_owns_cblob(
true);
1381 blob->compute_bounding_box();
1382 diacritic_it.add_after_then_move(blob_it.extract());
1385 noise_grid->DeleteParts();
1389 noise_grid->DeleteParts();
1399 static BLOBNBOX *MutualUnusedVNeighbour(
const BLOBNBOX *blob,
BlobNeighbourDir dir) {
1400 BLOBNBOX *next_blob = blob->
neighbour(dir);
1401 if (next_blob ==
nullptr || next_blob->owner() !=
nullptr || next_blob->UniquelyHorizontal()) {
1404 if (next_blob->neighbour(
DirOtherWay(dir)) == blob) {
1411 void StrokeWidth::FindVerticalTextChains(ColPartitionGrid *part_grid) {
1417 gsearch.StartFullSearch();
1418 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1422 if (bbox->owner() ==
nullptr && bbox->UniquelyVertical() &&
1423 (blob = MutualUnusedVNeighbour(bbox,
BND_ABOVE)) !=
nullptr) {
1427 while (blob !=
nullptr) {
1429 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1431 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1432 while (blob !=
nullptr) {
1434 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1436 CompletePartition(pageseg_mode, part, part_grid);
1445 static BLOBNBOX *MutualUnusedHNeighbour(
const BLOBNBOX *blob,
BlobNeighbourDir dir) {
1446 BLOBNBOX *next_blob = blob->
neighbour(dir);
1447 if (next_blob ==
nullptr || next_blob->owner() !=
nullptr || next_blob->UniquelyVertical()) {
1450 if (next_blob->neighbour(
DirOtherWay(dir)) == blob) {
1457 void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid *part_grid) {
1463 gsearch.StartFullSearch();
1464 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1466 if (bbox->owner() ==
nullptr && bbox->UniquelyHorizontal() &&
1467 (blob = MutualUnusedHNeighbour(bbox,
BND_RIGHT)) !=
nullptr) {
1469 auto *part =
new ColPartition(
BRT_TEXT, ICOORD(0, 1));
1471 while (blob !=
nullptr) {
1473 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1475 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1476 while (blob !=
nullptr) {
1478 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1480 CompletePartition(pageseg_mode, part, part_grid);
1492 void StrokeWidth::TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block) {
1494 small_grid.InsertBlobList(&block->noise_blobs);
1495 small_grid.InsertBlobList(&block->blobs);
1496 int medium_diacritics = 0;
1497 int small_diacritics = 0;
1498 BLOBNBOX_IT small_it(&block->noise_blobs);
1499 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1500 BLOBNBOX *blob = small_it.data();
1501 if (blob->owner() ==
nullptr && !blob->IsDiacritic() && DiacriticBlob(&small_grid, blob)) {
1505 BLOBNBOX_IT blob_it(&block->blobs);
1506 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1507 BLOBNBOX *blob = blob_it.data();
1508 if (blob->IsDiacritic()) {
1509 small_it.add_to_end(blob_it.extract());
1512 ColPartition *part = blob->owner();
1513 if (part ==
nullptr && DiacriticBlob(&small_grid, blob)) {
1514 ++medium_diacritics;
1516 small_it.add_to_end(blob_it.extract());
1517 }
else if (part !=
nullptr && !part->block_owned() && part->boxes_count() < 3) {
1523 BLOBNBOX_C_IT box_it(part->boxes());
1524 for (box_it.mark_cycle_pt();
1525 !box_it.cycled_list() && DiacriticBlob(&small_grid, box_it.data()); box_it.forward()) {
1528 if (box_it.cycled_list()) {
1530 while (!box_it.empty()) {
1536 BLOBNBOX *box = box_it.extract();
1537 box->set_owner(
nullptr);
1539 ++medium_diacritics;
1546 small_it.add_to_end(blob_it.extract());
1547 part_grid->RemoveBBox(part);
1551 blob->bounding_box().bottom())) {
1552 tprintf(
"Blob not available to be a diacritic at:");
1553 blob->bounding_box().print();
1556 if (textord_tabfind_show_strokewidths) {
1557 tprintf(
"Found %d small diacritics, %d medium\n", small_diacritics, medium_diacritics);
1567 bool StrokeWidth::DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob) {
1571 TBOX small_box(blob->bounding_box());
1574 tprintf(
"Testing blob for diacriticness at:");
1577 int x = (small_box.left() + small_box.right()) / 2;
1578 int y = (small_box.bottom() + small_box.top()) / 2;
1581 int height = small_box.height();
1594 BLOBNBOX *best_x_overlap =
nullptr;
1595 BLOBNBOX *best_y_overlap =
nullptr;
1596 int best_total_dist = 0;
1600 TBOX search_box(small_box);
1603 search_box.pad(x_pad, y_pad);
1605 rsearch.SetUniqueMode(
true);
1607 rsearch.StartRectSearch(search_box);
1608 BLOBNBOX *neighbour;
1609 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1611 neighbour->owner() == blob->owner()) {
1614 TBOX nbox = neighbour->bounding_box();
1615 if (neighbour->owner() ==
nullptr || neighbour->owner()->IsVerticalType() ||
1618 tprintf(
"Neighbour not strong enough:");
1623 if (nbox.height() < min_height) {
1625 tprintf(
"Neighbour not big enough:");
1630 int x_gap = small_box.x_gap(nbox);
1631 int y_gap = small_box.y_gap(nbox);
1634 tprintf(
"xgap=%d, y=%d, total dist=%d\n", x_gap, y_gap, total_distance);
1638 tprintf(
"Neighbour with median size %d too far away:", neighbour->owner()->median_height());
1639 neighbour->bounding_box().print();
1645 tprintf(
"Computing reduced box for :");
1648 int left = small_box.left() - small_box.width();
1649 int right = small_box.right() + small_box.width();
1650 nbox = neighbour->BoundsWithinLimits(left, right);
1651 y_gap = small_box.y_gap(nbox);
1652 if (best_x_overlap ==
nullptr || y_gap < best_y_gap) {
1653 best_x_overlap = neighbour;
1661 tprintf(
"Shrunken box doesn't win:");
1664 }
else if (blob->ConfirmNoTabViolation(*neighbour)) {
1665 if (best_y_overlap ==
nullptr || total_distance < best_total_dist) {
1667 tprintf(
"New best y overlap:");
1670 best_y_overlap = neighbour;
1671 best_total_dist = total_distance;
1673 tprintf(
"New y overlap box doesn't win:");
1677 tprintf(
"Neighbour wrong side of a tab:");
1681 if (best_x_overlap !=
nullptr &&
1682 (best_y_overlap ==
nullptr || best_xbox.major_y_overlap(best_y_overlap->bounding_box()))) {
1683 blob->set_diacritic_box(best_xbox);
1684 blob->set_base_char_blob(best_x_overlap);
1686 tprintf(
"DiacriticBlob OK! (x-overlap:");
1692 if (best_y_overlap !=
nullptr &&
1693 DiacriticXGapFilled(small_grid, small_box, best_y_overlap->bounding_box()) &&
1694 NoNoiseInBetween(small_box, best_y_overlap->bounding_box())) {
1695 blob->set_diacritic_box(best_y_overlap->bounding_box());
1696 blob->set_base_char_blob(best_y_overlap);
1698 tprintf(
"DiacriticBlob OK! (y-overlap:");
1700 best_y_overlap->bounding_box().print();
1705 tprintf(
"DiacriticBlob fails:");
1707 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1708 if (best_y_overlap !=
nullptr) {
1709 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1710 DiacriticXGapFilled(small_grid, small_box, best_y_overlap->bounding_box()),
1711 NoNoiseInBetween(small_box, best_y_overlap->bounding_box()));
1730 bool StrokeWidth::DiacriticXGapFilled(BlobGrid *grid,
const TBOX &diacritic_box,
1731 const TBOX &base_box) {
1734 TBOX occupied_box(base_box);
1736 while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) {
1737 TBOX search_box(occupied_box);
1738 if (diacritic_box.left() > search_box.right()) {
1740 search_box.set_left(search_box.right());
1741 search_box.set_right(search_box.left() + max_gap);
1744 search_box.set_right(search_box.left());
1745 search_box.set_left(search_box.left() - max_gap);
1748 rsearch.StartRectSearch(search_box);
1749 BLOBNBOX *neighbour;
1750 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1751 const TBOX &nbox = neighbour->bounding_box();
1752 if (nbox.x_gap(diacritic_box) < diacritic_gap) {
1753 if (nbox.left() < occupied_box.left()) {
1754 occupied_box.set_left(nbox.left());
1756 if (nbox.right() > occupied_box.right()) {
1757 occupied_box.set_right(nbox.right());
1762 if (neighbour ==
nullptr) {
1770 void StrokeWidth::MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid) {
1771 BLOBNBOX_IT small_it(&block->noise_blobs);
1772 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1773 BLOBNBOX *blob = small_it.data();
1774 if (blob->base_char_blob() !=
nullptr) {
1775 ColPartition *part = blob->base_char_blob()->owner();
1778 if (part !=
nullptr && !part->block_owned() && blob->owner() ==
nullptr &&
1779 blob->IsDiacritic()) {
1782 part_grid->RemoveBBox(part);
1784 blob->set_region_type(part->blob_type());
1785 blob->set_flow(part->flow());
1786 blob->set_owner(part);
1787 part_grid->InsertBBox(
true,
true, part);
1790 blob->set_base_char_blob(
nullptr);
1798 void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid,
1799 ColPartition_LIST *big_parts) {
1800 BLOBNBOX_IT large_it(&block->large_blobs);
1801 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1802 BLOBNBOX *blob = large_it.data();
1803 ColPartition *big_part = blob->owner();
1804 if (big_part ==
nullptr) {
1814 void StrokeWidth::PartitionRemainingBlobs(
PageSegMode pageseg_mode, ColPartitionGrid *part_grid) {
1817 int prev_grid_x = -1;
1818 int prev_grid_y = -1;
1819 BLOBNBOX_CLIST cell_list;
1820 BLOBNBOX_C_IT cell_it(&cell_list);
1821 bool cell_all_noise =
true;
1822 gsearch.StartFullSearch();
1823 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1824 int grid_x = gsearch.GridX();
1825 int grid_y = gsearch.GridY();
1826 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1828 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, &cell_list);
1829 cell_it.set_to_list(&cell_list);
1830 prev_grid_x = grid_x;
1831 prev_grid_y = grid_y;
1832 cell_all_noise =
true;
1834 if (bbox->owner() ==
nullptr) {
1835 cell_it.add_to_end(bbox);
1837 cell_all_noise =
false;
1840 cell_all_noise =
false;
1843 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, &cell_list);
1848 void StrokeWidth::MakePartitionsFromCellList(
PageSegMode pageseg_mode,
bool combine,
1849 ColPartitionGrid *part_grid,
1850 BLOBNBOX_CLIST *cell_list) {
1851 if (cell_list->empty()) {
1854 BLOBNBOX_C_IT cell_it(cell_list);
1856 BLOBNBOX *bbox = cell_it.extract();
1857 auto *part =
new ColPartition(bbox->region_type(), ICOORD(0, 1));
1859 part->set_flow(bbox->flow());
1860 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1861 part->AddBox(cell_it.extract());
1863 CompletePartition(pageseg_mode, part, part_grid);
1865 for (; !cell_it.empty(); cell_it.forward()) {
1866 BLOBNBOX *bbox = cell_it.extract();
1867 auto *part =
new ColPartition(bbox->region_type(), ICOORD(0, 1));
1868 part->set_flow(bbox->flow());
1870 CompletePartition(pageseg_mode, part, part_grid);
1877 void StrokeWidth::CompletePartition(
PageSegMode pageseg_mode, ColPartition *part,
1878 ColPartitionGrid *part_grid) {
1879 part->ComputeLimits();
1880 TBOX box = part->bounding_box();
1884 if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
1885 value = part->boxes_count() == 1 ? 0 : -2;
1886 }
else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
1887 value = part->boxes_count() == 1 ? 0 : 2;
1889 part->SetRegionAndFlowTypesFromProjectionValue(value);
1891 part_grid->InsertBBox(
true,
true, part);
1896 void StrokeWidth::EasyMerges(ColPartitionGrid *part_grid) {
1897 using namespace std::placeholders;
1898 part_grid->Merges(std::bind(&StrokeWidth::OrientationSearchBox,
this, _1, _2),
1899 std::bind(&StrokeWidth::ConfirmEasyMerge,
this, _1, _2));
1905 bool StrokeWidth::OrientationSearchBox(ColPartition *part,
TBOX *box) {
1906 if (part->IsVerticalType()) {
1907 box->set_top(box->top() + box->width());
1908 box->set_bottom(box->bottom() - box->width());
1910 box->set_left(box->left() - box->height());
1911 box->set_right(box->right() + box->height());
1917 bool StrokeWidth::ConfirmEasyMerge(
const ColPartition *p1,
const ColPartition *p2) {
1924 if ((p1->IsVerticalType() || p2->IsVerticalType()) && p1->HCoreOverlap(*p2) <= 0 &&
1925 ((!p1->IsSingleton() && !p2->IsSingleton()) ||
1926 !p1->bounding_box().major_overlap(p2->bounding_box()))) {
1929 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) && p1->VCoreOverlap(*p2) <= 0 &&
1930 ((!p1->IsSingleton() && !p2->IsSingleton()) ||
1931 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1932 !p1->OKDiacriticMerge(*p2,
false) && !p2->OKDiacriticMerge(*p1,
false)))) {
1935 if (!p1->ConfirmNoTabViolation(*p2)) {
1941 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1945 bool StrokeWidth::NoNoiseInBetween(
const TBOX &box1,
const TBOX &box2)
const {
1949 #ifndef GRAPHICS_DISABLED
1954 ScrollView *StrokeWidth::DisplayGoodBlobs(
const char *window_name,
int x,
int y) {
1961 gsearch.StartFullSearch();
1963 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1964 const TBOX &box = bbox->bounding_box();
1965 int left_x = box.left();
1966 int right_x = box.right();
1967 int top_y = box.top();
1968 int bottom_y = box.bottom();
1969 int goodness = bbox->GoodTextBlob();
1971 if (bbox->UniquelyVertical()) {
1974 if (bbox->UniquelyHorizontal()) {
1979 if (goodness == 0) {
1981 }
else if (goodness == 1) {
1988 window->Rectangle(left_x, bottom_y, right_x, top_y);
1994 static void DrawDiacriticJoiner(
const BLOBNBOX *blob, ScrollView *window) {
1995 const TBOX &blob_box(blob->bounding_box());
1996 int top = std::max(
static_cast<int>(blob_box.top()), blob->base_char_top());
1997 int bottom = std::min(
static_cast<int>(blob_box.bottom()), blob->base_char_bottom());
1998 int x = (blob_box.left() + blob_box.right()) / 2;
1999 window->Line(x, top, x, bottom);
2003 ScrollView *StrokeWidth::DisplayDiacritics(
const char *window_name,
int x,
int y, TO_BLOCK *block) {
2008 BLOBNBOX_IT it(&block->blobs);
2009 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2010 BLOBNBOX *blob = it.data();
2011 if (blob->IsDiacritic()) {
2013 DrawDiacriticJoiner(blob, window);
2015 window->Pen(blob->BoxColor());
2017 const TBOX &box = blob->bounding_box();
2018 window->Rectangle(box.left(), box.bottom(), box.right(), box.top());
2020 it.set_to_list(&block->noise_blobs);
2021 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2022 BLOBNBOX *blob = it.data();
2023 if (blob->IsDiacritic()) {
2025 DrawDiacriticJoiner(blob, window);
2029 const TBOX &box = blob->bounding_box();
2030 window->Rectangle(box.left(), box.bottom(), box.right(), box.top());
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
const double kMaxDiacriticDistanceRatio
const int kLineResiduePadRatio
const double kNoiseOverlapAreaFactor
const int kCJKMaxComponents
@ PSM_SINGLE_BLOCK_VERT_TEXT
@ PSM_SINGLE_COLUMN
Assume a single column of text of variable sizes.
const double kMinDiacriticSizeRatio
const double kCJKBrokenDistanceFraction
const int kLineTrapLongest
void tprintf(const char *format,...)
const double kCJKAspectRatio
int IntCastRounded(double x)
const double kStrokeWidthTolerance
const double kNoiseOverlapGrowthFactor
const double kCJKAspectRatioIncrease
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
const double kNeighbourSearchFactor
const double kLineResidueAspectRatio
int textord_debug_tabfind
const double kBrokenCJKIterationFraction
const int kMaxCJKSizeRatio
const double kDiacriticXPadRatio
const double kLineResidueSizeRatio
const double kMaxDiacriticGapToBaseCharHeight
const int kLineTrapShortest
const double kStrokeWidthFractionTolerance
const double kStrokeWidthFractionCJK
const double kStrokeWidthCJK
const double kDiacriticYPadRatio
BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir)
const float kSizeRatioToReject
const int kMostlyOneDirRatio
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
float vert_stroke_width() const
bool good_stroke_neighbour(BlobNeighbourDir n) const
BLOBNBOX * neighbour(BlobNeighbourDir n) const
void NeighbourGaps(int gaps[BND_COUNT]) const
bool UniquelyHorizontal() const
bool UniquelyVertical() const
const TBOX & bounding_box() const
bool vert_possible() const
BlobTextFlowType flow() const
float horz_stroke_width() const
bool horz_possible() const
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
static bool UnMergeableType(BlobRegionType type)
BLOBNBOX_LIST small_blobs
BLOBNBOX_LIST large_blobs
BLOBNBOX_LIST noise_blobs
void set_y(float yin)
rewrite function
void set_x(float xin)
rewrite function
TDimension height() const
TDimension bottom() const
void pad(int xpad, int ypad)
bool contains(const FCOORD pt) const
static bool WithinTestRegion(int detail_level, int x, int y)
void StartRadSearch(int x, int y, int max_radius)
void StartRectSearch(const TBOX &rect)
const ICOORD & bleft() const
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
const ICOORD & tright() const
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
virtual void HandleClick(int x, int y)
ScrollView * MakeWindow(int x, int y, const char *window_name)
void RemoveBBox(BLOBNBOX *bbox)
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBlobList(BLOBNBOX_LIST *blobs)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Image pix)
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
void HandleClick(int x, int y) override
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
static bool DifferentSizes(int size1, int size2)
static bool VeryDifferentSizes(int size1, int size2)
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map)
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
void DisplayProjection() const
void Rectangle(int x1, int y1, int x2, int y2)
SVEvent * AwaitEvent(SVEventType type)