42 static BOOL_VAR(textord_really_old_xheight,
false,
"Use original wiseowl xheight");
44 static BOOL_VAR(textord_debug_baselines,
false,
"Debug baseline generation");
45 static BOOL_VAR(textord_oldbl_paradef,
true,
"Use para default mechanism");
46 static BOOL_VAR(textord_oldbl_split_splines,
true,
"Split stepped splines");
47 static BOOL_VAR(textord_oldbl_merge_parts,
true,
"Merge suspect partitions");
48 static BOOL_VAR(oldbl_corrfix,
true,
"Improve correlation of heights");
49 static BOOL_VAR(oldbl_xhfix,
false,
"Fix bug in modes threshold for xheights");
50 static BOOL_VAR(textord_ocropus_mode,
false,
"Make baselines for ocropus");
51 static double_VAR(oldbl_xhfract, 0.4,
"Fraction of est allowed in calc");
52 static INT_VAR(oldbl_holed_losscount, 10,
"Max lost before fallback line used");
53 static double_VAR(oldbl_dot_error_size, 1.26,
"Max aspect ratio of a dot");
54 static double_VAR(textord_oldbl_jumplimit, 0.15,
"X fraction for new partition");
57 #define X_HEIGHT_FRACTION 0.7
58 #define DESCENDER_FRACTION 0.5
59 #define MIN_ASC_FRACTION 0.20
60 #define MIN_DESC_FRACTION 0.25
61 #define MINASCRISE 2.0
62 #define MAXHEIGHTVARIANCE 0.15
64 #define MAXOVERLAP 0.1
66 #define HEIGHTBUCKETS 200
71 #define ABS(x) ((x) < 0 ? (-(x)) : (x))
79 void Textord::make_old_baselines(TO_BLOCK *block,
82 QSPLINE *prev_baseline;
84 TO_ROW_IT row_it = block->get_rows();
87 prev_baseline =
nullptr;
88 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
90 find_textlines(block, row, 2,
nullptr);
91 if (row->xheight <= 0 && prev_baseline !=
nullptr) {
92 find_textlines(block, row, 2, prev_baseline);
94 if (row->xheight > 0) {
95 prev_baseline = &row->baseline;
97 prev_baseline =
nullptr;
98 blob_it.set_to_list(row->blob_list());
99 if (textord_debug_baselines) {
100 tprintf(
"Row baseline generation failed on row at (%d,%d)\n",
101 blob_it.data()->bounding_box().left(), blob_it.data()->bounding_box().bottom());
105 correlate_lines(block, gradient);
106 block->block->set_xheight(block->xheight);
117 void Textord::correlate_lines(TO_BLOCK *block,
float gradient) {
121 TO_ROW_IT row_it = block->get_rows();
123 rowcount = row_it.length();
126 block->xheight = block->line_size;
130 std::vector<TO_ROW *> rows(rowcount);
132 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
134 rows[rowindex++] = row_it.data();
138 correlate_neighbours(block, &rows[0], rowcount);
141 block->xheight =
static_cast<float>(correlate_with_stats(&rows[0], rowcount, block));
142 if (block->xheight <= 0) {
159 void Textord::correlate_neighbours(TO_BLOCK *block,
169 for (rowindex = 0; rowindex < rowcount; rowindex++) {
170 row = rows[rowindex];
171 if (row->xheight < 0) {
173 for (otherrow = rowindex - 2;
174 otherrow >= 0 && (rows[otherrow]->xheight < 0.0 ||
180 for (otherrow = rowindex + 1;
181 otherrow < rowcount && (rows[otherrow]->xheight < 0.0 ||
188 find_textlines(block, row, 2, &rows[upperrow]->
baseline);
190 if (row->xheight < 0 && lowerrow < rowcount) {
191 find_textlines(block, row, 2, &rows[lowerrow]->
baseline);
193 if (row->xheight < 0) {
195 find_textlines(block, row, 1, &rows[upperrow]->
baseline);
196 }
else if (lowerrow < rowcount) {
197 find_textlines(block, row, 1, &rows[lowerrow]->
baseline);
203 for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
204 row = rows[rowindex];
205 if (row->xheight < 0) {
207 row->xheight = -row->xheight;
209 biggest = std::max(biggest, row->xheight);
220 int Textord::correlate_with_stats(TO_ROW **rows,
236 xcount = fullcount = desccount = 0;
237 lineheight = ascheight = fullheight = descheight = 0.0;
238 for (rowindex = 0; rowindex < rowcount; rowindex++) {
239 row = rows[rowindex];
240 if (row->ascrise > 0.0) {
241 lineheight += row->xheight;
242 ascheight += row->ascrise;
245 fullheight += row->xheight;
248 if (row->descdrop < 0.0) {
250 descheight += row->descdrop;
255 if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
256 lineheight /= xcount;
258 fullheight = lineheight + ascheight / xcount;
264 fullheight /= fullcount;
268 if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) {
269 descheight /= desccount;
275 if (lineheight > 0.0f) {
276 block->block->set_cell_over_xheight((fullheight - descheight) / lineheight);
281 for (rowindex = 0; rowindex < rowcount; rowindex++) {
282 row = rows[rowindex];
283 row->all_caps =
false;
288 row->ascrise = fullheight - lineheight;
290 row->xheight = lineheight;
294 row->ascrise = row->xheight - lineheight;
296 row->xheight = lineheight;
297 row->all_caps =
true;
299 row->ascrise = (fullheight - lineheight) * row->xheight / fullheight;
301 row->xheight -= row->ascrise;
302 row->all_caps =
true;
304 if (row->ascrise < minascheight) {
308 if (row->descdrop > mindescheight) {
312 row->descdrop = descheight;
318 return static_cast<int>(lineheight);
327 void Textord::find_textlines(TO_BLOCK *block,
332 bool holed_line =
false;
343 blobcount = row->blob_list()->length();
345 std::vector<char> partids(blobcount);
347 std::vector<int> xcoords(blobcount);
349 std::vector<int> ycoords(blobcount);
351 std::vector<TBOX> blobcoords(blobcount);
353 std::vector<float> ydiffs(blobcount);
355 lineheight =
get_blob_coords(row,
static_cast<int>(block->line_size), &blobcoords[0], holed_line,
358 jumplimit = lineheight * textord_oldbl_jumplimit;
364 tprintf(
"\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", block->line_size,
365 lineheight, jumplimit);
370 make_first_baseline(&blobcoords[0], blobcount, &xcoords[0], &ycoords[0], spline, &row->baseline,
373 #ifndef GRAPHICS_DISABLED
379 bestpart =
partition_line(&blobcoords[0], blobcount, &partcount, &partids[0], partsizes,
380 &row->baseline, jumplimit, &ydiffs[0]);
381 pointcount =
partition_coords(&blobcoords[0], blobcount, &partids[0], bestpart, &xcoords[0],
383 segments =
segment_spline(&blobcoords[0], blobcount, &xcoords[0], &ycoords[0], degree,
384 pointcount, xstarts);
387 row->baseline = QSPLINE(xstarts, segments, &xcoords[0], &ycoords[0], pointcount, degree);
388 }
while (textord_oldbl_split_splines &&
391 find_lesser_parts(row, &blobcoords[0], blobcount, &partids[0], partsizes, partcount, bestpart);
394 row->xheight = -1.0f;
395 row->descdrop = 0.0f;
398 row->baseline.extrapolate(row->line_m(), block->block->pdblk.bounding_box().left(),
399 block->block->pdblk.bounding_box().right());
401 if (textord_really_old_xheight) {
402 old_first_xheight(row, &blobcoords[0], lineheight, blobcount, &row->baseline, jumplimit);
404 make_first_xheight(row, &blobcoords[0], lineheight,
static_cast<int>(block->line_size),
405 blobcount, &row->baseline, jumplimit);
407 compute_row_xheight(row, block->block->classify_rotation(), row->line_m(), block->line_size);
433 if (blob_it.empty()) {
438 blob_it.mark_cycle_pt();
442 if (blobcoords[blobindex].height() > lineheight * 0.25) {
443 heightstat.
add(blobcoords[blobindex].height(), 1);
445 if (blobindex == 0 || blobcoords[blobindex].height() > lineheight * 0.25 ||
446 blob_it.cycled_list()) {
450 if (blobcoords[blobindex].height() < blobcoords[blobindex].width() * oldbl_dot_error_size &&
451 blobcoords[blobindex].width() < blobcoords[blobindex].height() * oldbl_dot_error_size) {
457 if (losscount > maxlosscount) {
459 maxlosscount = losscount;
463 }
while (!blob_it.cycled_list());
465 holed_line = maxlosscount > oldbl_holed_losscount;
466 outcount = blobindex;
470 return static_cast<int>(heightstat.
ile(0.25));
472 return blobcoords[0].
height();
497 float prevy, thisy, nexty;
499 float maxmax, minmin;
510 leftedge = blobcoords[0].
left();
512 rightedge = blobcoords[blobcount - 1].
right();
513 if (spline ==
nullptr
514 || spline->segments < 3
516 || spline->xcoords[1] > leftedge +
MAXOVERLAP * (rightedge - leftedge) ||
517 spline->xcoords[spline->segments - 1] < rightedge -
MAXOVERLAP * (rightedge - leftedge)) {
518 if (textord_oldbl_paradef) {
521 xstarts[0] = blobcoords[0].
left() - 1;
522 for (blobindex = 0; blobindex < blobcount; blobindex++) {
523 xcoords[blobindex] = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
524 ycoords[blobindex] = blobcoords[blobindex].
bottom();
526 xstarts[1] = blobcoords[blobcount - 1].
right() + 1;
530 *
baseline =
QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
532 if (blobcount >= 3) {
536 maxmax = minmin = 0.0f;
537 thisy = ycoords[0] -
baseline->y(xcoords[0]);
538 nexty = ycoords[1] -
baseline->y(xcoords[1]);
539 for (blobindex = 2; blobindex < blobcount; blobindex++) {
542 nexty = ycoords[blobindex] -
baseline->y(xcoords[blobindex]);
544 if (
ABS(thisy - prevy) < jumplimit &&
ABS(thisy - nexty) < jumplimit) {
550 if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
552 || (y1 > y2 && y2 <= y3))) {
555 xturns[segment] = x2;
556 yturns[segment] = y2;
561 maxmax = minmin = y3;
571 x2 = blobcoords[blobindex - 1].
right();
577 if (maxmax - minmin > jumplimit) {
579 for (blobindex = 0, segment = 1; blobindex < ycount; blobindex++) {
580 if (yturns[blobindex] > minmin + jumplimit || yturns[blobindex] < maxmax - jumplimit) {
582 if (segment == 1 || yturns[blobindex] > prevy + jumplimit ||
583 yturns[blobindex] < prevy - jumplimit) {
585 xstarts[segment] = xturns[blobindex];
587 prevy = yturns[blobindex];
590 else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
592 || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
593 xstarts[segment - 1] = xturns[blobindex];
595 prevy = yturns[blobindex];
599 xstarts[segment] = blobcoords[blobcount - 1].
right() + 1;
602 *
baseline =
QSPLINE(xstarts, segments, xcoords, ycoords, blobcount, 1);
608 ICOORD(0,
static_cast<int16_t
>(blobcoords[0].bottom() - spline->
y(blobcoords[0].
right())));
640 leftedge = blobcoords[0].
left();
642 rightedge = blobcoords[blobcount - 1].
right();
643 for (blobindex = 0; blobindex < blobcount; blobindex++) {
644 lms.
Add(
ICOORD((blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2,
645 blobcoords[blobindex].bottom()));
648 xstarts[0] = leftedge;
649 xstarts[1] = rightedge;
651 coeffs[1] = gradient;
654 if (spline !=
nullptr
655 && spline->segments >= 3
657 && spline->xcoords[1] <= leftedge +
MAXOVERLAP * (rightedge - leftedge) &&
658 spline->xcoords[spline->segments - 1] >= rightedge -
MAXOVERLAP * (rightedge - leftedge)) {
660 x = (leftedge + rightedge) / 2.0;
661 shift =
ICOORD(0,
static_cast<int16_t
>(gradient * x + c - spline->
y(x)));
692 for (bestpart = 0; bestpart <
MAXPARTS; bestpart++) {
693 partsizes[bestpart] = 0;
696 startx =
get_ydiffs(blobcoords, blobcount, spline, ydiffs);
700 float last_delta = 0.0f;
701 for (blobindex = startx; blobindex < blobcount; blobindex++) {
703 diff = ydiffs[blobindex];
705 tprintf(
"%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
706 blobcoords[blobindex].bottom());
709 choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
711 partids[blobindex] = bestpart;
712 partsizes[bestpart]++;
720 for (blobindex = startx; blobindex >= 0; blobindex--) {
721 diff = ydiffs[blobindex];
723 tprintf(
"%d(%d,%d), ", blobindex, blobcoords[blobindex].left(),
724 blobcoords[blobindex].bottom());
727 choose_partition(diff, partdiffs, bestpart, jumplimit, &drift, &last_delta, numparts);
729 partids[blobindex] = bestpart;
730 partsizes[bestpart]++;
733 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) {
734 if (partsizes[bestpart] >= partsizes[biggestpart]) {
735 biggestpart = bestpart;
738 if (textord_oldbl_merge_parts) {
739 merge_oldbl_parts(blobcoords, blobcount, partids, partsizes, biggestpart, jumplimit);
771 prevpart = biggestpart;
774 for (blobindex = 0; blobindex < blobcount; blobindex++) {
775 if (partids[blobindex] != prevpart) {
780 if (prevpart != biggestpart && runlength >
MAXBADRUN) {
782 for (test_blob = startx; test_blob < blobindex; test_blob++) {
783 coord =
FCOORD((blobcoords[test_blob].left() + blobcoords[test_blob].right()) / 2.0,
784 blobcoords[test_blob].bottom());
785 stats.
add(coord.
x(), coord.
y());
791 tprintf(
"Fitted line y=%g x + %g\n", m, c);
796 !found_one && (startx - test_blob >= 0 || blobindex + test_blob <= blobcount);
798 if (startx - test_blob >= 0 && partids[startx - test_blob] == biggestpart) {
801 (blobcoords[startx - test_blob].left() + blobcoords[startx - test_blob].right()) /
803 blobcoords[startx - test_blob].bottom());
804 diff = m * coord.
x() + c - coord.
y();
806 tprintf(
"Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.
x(),
809 if (diff < jumplimit && -diff < jumplimit) {
813 if (blobindex + test_blob <= blobcount &&
814 partids[blobindex + test_blob - 1] == biggestpart) {
816 coord =
FCOORD((blobcoords[blobindex + test_blob - 1].left() +
817 blobcoords[blobindex + test_blob - 1].right()) /
819 blobcoords[blobindex + test_blob - 1].bottom());
820 diff = m * coord.
x() + c - coord.
y();
822 tprintf(
"Diff of common blob to suspect part=%g at (%g,%g)\n", diff, coord.
x(),
825 if (diff < jumplimit && -diff < jumplimit) {
833 "Merged %d blobs back into part %d from %d starting at "
835 runlength, biggestpart, prevpart, blobcoords[startx].left(),
836 blobcoords[startx].bottom());
839 partsizes[prevpart] -= runlength;
840 for (test_blob = startx; test_blob < blobindex; test_blob++) {
841 partids[test_blob] = biggestpart;
845 prevpart = partids[blobindex];
879 bestsum =
static_cast<float>(INT32_MAX);
881 lastx = blobcoords[0].
left();
883 for (blobindex = 0; blobindex < blobcount; blobindex++) {
885 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) >> 1;
887 drift += spline->
step(lastx, xcentre);
889 diff = blobcoords[blobindex].
bottom();
890 diff -= spline->
y(xcentre);
892 ydiffs[blobindex] = diff;
895 diffsum -=
ABS(ydiffs[blobindex - 3]);
897 diffsum +=
ABS(diff);
898 if (blobindex >= 2 && diffsum < bestsum) {
900 bestindex = blobindex - 1;
917 float *drift,
float *lastdelta,
int *partcount
931 delta = diff - partdiffs[lastpart] - *drift;
933 tprintf(
"Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
935 if (
ABS(delta) > jumplimit / 2) {
937 bestdelta = diff - partdiffs[0] - *drift;
939 for (partition = 1; partition < *partcount; partition++) {
940 delta = diff - partdiffs[partition] - *drift;
941 if (
ABS(delta) <
ABS(bestdelta)) {
943 bestpart = partition;
948 if (
ABS(bestdelta) > jumplimit && *partcount <
MAXPARTS) {
949 bestpart = (*partcount)++;
951 partdiffs[bestpart] = diff - *drift;
958 if (bestpart == lastpart &&
959 (
ABS(delta - *lastdelta) < jumplimit / 2 ||
ABS(delta) < jumplimit / 2)) {
961 *drift = (3 * *drift + delta) / 3;
991 for (blobindex = 0; blobindex < blobcount; blobindex++) {
992 if (partids[blobindex] == bestpart) {
994 xcoords[pointcount] = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) >> 1;
995 ycoords[pointcount++] = blobcoords[blobindex].
bottom();
1013 int degree,
int pointcount,
1018 int lastmin, lastmax;
1023 xstarts[0] = xcoords[0] - 1;
1024 max_x = xcoords[pointcount - 1] + 1;
1029 if (pointcount > 3) {
1031 lastmax = lastmin = 0;
1032 while (ptindex < pointcount - 1 && turncount <
SPLINESIZE - 1) {
1034 if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1035 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT) {
1036 if (turncount == 0 || turnpoints[turncount - 1] != lastmax) {
1038 turnpoints[turncount++] = lastmax;
1041 }
else if (ycoords[ptindex] < ycoords[lastmin]) {
1047 if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1048 if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT) {
1049 if (turncount == 0 || turnpoints[turncount - 1] != lastmin) {
1051 turnpoints[turncount++] = lastmin;
1054 }
else if (ycoords[ptindex] > ycoords[lastmax]) {
1061 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT &&
1062 (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1065 turnpoints[turncount++] = lastmax;
1068 turnpoints[turncount++] = ptindex;
1070 }
else if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT
1072 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1075 turnpoints[turncount++] = lastmin;
1078 turnpoints[turncount++] = ptindex;
1080 }
else if (turncount > 0 && turnpoints[turncount - 1] == lastmin &&
1082 if (ycoords[ptindex] > ycoords[lastmax]) {
1083 turnpoints[turncount++] = ptindex;
1085 turnpoints[turncount++] = lastmax;
1087 }
else if (turncount > 0 && turnpoints[turncount - 1] == lastmax &&
1089 if (ycoords[ptindex] < ycoords[lastmin]) {
1090 turnpoints[turncount++] = ptindex;
1092 turnpoints[turncount++] = lastmin;
1098 tprintf(
"First turn is %d at (%d,%d)\n", turnpoints[0], xcoords[turnpoints[0]],
1099 ycoords[turnpoints[0]]);
1101 for (segment = 1; segment < turncount; segment++) {
1103 lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1106 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) {
1108 for (ptindex = turnpoints[segment - 1] + 1;
1109 ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++) {
1114 for (ptindex = turnpoints[segment - 1] + 1;
1115 ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++) {
1121 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] + xcoords[turnpoints[segment - 1]] +
1122 xcoords[turnpoints[segment]] + 2) /
1126 tprintf(
"Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", segment,
1127 turnpoints[segment], xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1128 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1132 xstarts[segment] = max_x;
1152 int startindex, centreindex, endindex;
1153 float leftcoord, rightcoord;
1154 int leftindex, rightindex;
1159 for (segment = 1; segment < segments - 1; segment++) {
1160 step =
baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1161 (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1165 if (step > jumplimit) {
1166 while (xcoords[startindex] < xstarts[segment - 1]) {
1169 centreindex = startindex;
1170 while (xcoords[centreindex] < xstarts[segment]) {
1173 endindex = centreindex;
1174 while (xcoords[endindex] < xstarts[segment + 1]) {
1178 if (textord_debug_baselines) {
1179 tprintf(
"Too many segments to resegment spline!!\n");
1188 leftindex = (startindex + startindex + centreindex) / 3;
1189 rightindex = (centreindex + endindex + endindex) / 3;
1190 leftcoord = (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1191 rightcoord = (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1192 while (xcoords[leftindex] > leftcoord &&
1196 while (xcoords[leftindex] < leftcoord &&
1200 if (xcoords[leftindex] - leftcoord > leftcoord - xcoords[leftindex - 1]) {
1203 while (xcoords[rightindex] > rightcoord &&
1207 while (xcoords[rightindex] < rightcoord &&
1211 if (xcoords[rightindex] - rightcoord > rightcoord - xcoords[rightindex - 1]) {
1214 if (textord_debug_baselines) {
1215 tprintf(
"Splitting spline at %d with step %g at (%d,%d)\n", xstarts[segment],
1216 baseline->step((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1217 (xstarts[segment] + xstarts[segment + 1]) / 2.0),
1218 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1219 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1222 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2, segments);
1224 }
else if (textord_debug_baselines) {
1225 tprintf(
"Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", startindex,
1247 int coord2,
int &segments
1251 for (index = segments; index > segment; index--) {
1252 xstarts[index + 1] = xstarts[index];
1255 xstarts[segment] = coord1;
1256 xstarts[segment + 1] = coord2;
1286 for (partition = 0; partition < partcount; partition++) {
1287 partsteps[partition] = 0.0;
1289 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1290 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) >> 1;
1292 int part_id =
static_cast<int>(
static_cast<unsigned char>(partids[blobindex]));
1293 if (part_id != bestpart) {
1295 if (runlength > biggestrun) {
1296 biggestrun = runlength;
1298 partsteps[part_id] += blobcoords[blobindex].
bottom() - row->
baseline.
y(xcentre);
1308 poscount = negcount = 0;
1310 for (partition = 0; partition < partcount; partition++) {
1311 if (partition != bestpart) {
1313 if (partsizes[partition] == 0) {
1314 partsteps[partition] = 0;
1316 partsteps[partition] /= partsizes[partition];
1320 if (partsteps[partition] >=
MINASCRISE && partsizes[partition] > poscount) {
1321 poscount = partsizes[partition];
1323 if (partsteps[partition] <= -
MINASCRISE && partsizes[partition] > negcount) {
1325 bestneg = partsteps[partition];
1327 negcount = partsizes[partition];
1332 partsteps[bestpart] /= blobcount;
1364 if (blobcount > 1) {
1365 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1366 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
1368 height =
static_cast<int>(blobcoords[blobindex].
top() -
baseline->y(xcentre) + 0.5);
1370 heightstat.
add(height, 1);
1374 lineheight =
static_cast<int>(heightstat.
ile(0.25));
1375 if (lineheight <= 0) {
1376 lineheight =
static_cast<int>(heightstat.
ile(0.5));
1379 lineheight = initialheight;
1383 static_cast<int>(blobcoords[0].
top() -
1384 baseline->y((blobcoords[0].left() + blobcoords[0].
right()) / 2) + 0.5);
1389 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1390 xcentre = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
1391 diff = blobcoords[blobindex].
top() -
baseline->y(xcentre);
1393 if (diff > lineheight + jumplimit) {
1396 }
else if (diff > lineheight - jumplimit) {
1404 xsum =
static_cast<float>(lineheight);
1408 row->
ascrise = ascenders / asccount - xsum;
1429 int init_lineheight,
1442 const int kBaselineTouch = 2;
1443 const int kGoodStrength = 8;
1444 const float kMinHeight = 0.25;
1446 sign_bit = row->
xheight > 0 ? 1 : -1;
1451 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1452 int xcenter = (blobcoords[blobindex].
left() + blobcoords[blobindex].
right()) / 2;
1454 float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom());
1455 int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1456 int height =
static_cast<int>(blobcoords[blobindex].
top() - base + 0.5);
1457 if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) {
1459 heightstat.
add(height, strength);
1461 if (xcenter > rights[height]) {
1462 rights[height] = xcenter;
1464 if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) {
1465 lefts[height] = xcenter;
1469 mode_count += strength;
1473 mode_threshold =
static_cast<int>(blobcount * 0.1);
1474 if (oldbl_dot_error_size > 1 || oldbl_xhfix) {
1475 mode_threshold =
static_cast<int>(mode_count * 0.1);
1479 tprintf(
"blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, mode_threshold);
1483 for (blobindex = 0; blobindex <
MODENUM; blobindex++) {
1484 tprintf(
"mode[%d]=%d ", blobindex, modelist[blobindex]);
1488 pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1515 int modelist[],
int modenum
1519 int last_max = INT32_MAX;
1525 for (mode_count = 0; mode_count < modenum; mode_count++) {
1527 for (i = 0; i < statnum; i++) {
1530 ((stats->
pile_count(i) == last_max) && (i > last_i))) {
1537 total_max += last_max;
1538 if (last_max <= total_max / mode_factor) {
1541 modelist[mode_count] = mode;
1552 int modelist[],
int lefts[],
int rights[],
STATS *heightstat,
1553 int mode_threshold) {
1558 int found_one_bigger =
false;
1559 int best_x_height = 0;
1563 for (x = 0; x <
MODENUM; x++) {
1564 for (y = 0; y <
MODENUM; y++) {
1566 if (modelist[x] && modelist[y] && heightstat->
pile_count(modelist[x]) > mode_threshold &&
1567 (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
1568 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1569 ratio =
static_cast<float>(modelist[y]) /
static_cast<float>(modelist[x]);
1570 if (1.2 < ratio && ratio < 1.8) {
1572 best_x_height = modelist[x];
1573 num_in_best = heightstat->
pile_count(modelist[x]);
1577 found_one_bigger =
false;
1578 for (z = 0; z <
MODENUM; z++) {
1579 if (modelist[z] == best_x_height + 1 &&
1580 (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
1581 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1582 ratio =
static_cast<float>(modelist[y]) /
static_cast<float>(modelist[z]);
1583 if ((1.2 < ratio && ratio < 1.8) &&
1585 heightstat->
pile_count(modelist[z]) > num_in_best * 0.5) {
1587 found_one_bigger =
true;
1592 }
while (found_one_bigger);
1596 best_asc = modelist[y];
1597 num_in_best = heightstat->
pile_count(modelist[y]);
1601 found_one_bigger =
false;
1602 for (z = 0; z <
MODENUM; z++) {
1603 if (modelist[z] > best_asc &&
1604 (!textord_ocropus_mode || std::min(rights[modelist[x]], rights[modelist[y]]) >
1605 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1606 ratio =
static_cast<float>(modelist[z]) /
static_cast<float>(best_x_height);
1607 if ((1.2 < ratio && ratio < 1.8) &&
1609 heightstat->
pile_count(modelist[z]) > num_in_best * 0.5) {
1610 best_asc = modelist[z];
1611 found_one_bigger =
true;
1616 }
while (found_one_bigger);
1618 row->
xheight =
static_cast<float>(best_x_height);
1619 row->
ascrise =
static_cast<float>(best_asc) - best_x_height;
1626 best_x_height = modelist[0];
1627 num_in_best = heightstat->
pile_count(best_x_height);
1630 found_one_bigger =
false;
1631 for (z = 1; z <
MODENUM; z++) {
1633 if ((modelist[z] == best_x_height + 1) &&
1634 (heightstat->
pile_count(modelist[z]) > num_in_best * 0.5)) {
1636 found_one_bigger =
true;
1640 }
while (found_one_bigger);
1643 row->
xheight =
static_cast<float>(best_x_height);
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
#define double_VAR(name, val, comment)
#define DESCENDER_FRACTION
#define X_HEIGHT_FRACTION
#define MIN_DESC_FRACTION
#define MAXHEIGHTVARIANCE
int segment_spline(TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
int get_ydiffs(TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
bool textord_show_final_rows
void make_first_baseline(TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
void find_top_modes(STATS *stats, int statnum, int modelist[], int modenum)
void tprintf(const char *format,...)
void pick_x_height(TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)
void insert_spline_point(int xstarts[], int segment, int coord1, int coord2, int &segments)
int textord_spline_medianwin
void old_first_xheight(TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
int partition_coords(TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])
int choose_partition(float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
void make_holed_baseline(TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
const int kMinModeFactorOcropus
bool split_stepped_spline(QSPLINE *baseline, float jumplimit, int *xcoords, int *xstarts, int &segments)
int partition_line(TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
void find_lesser_parts(TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
void make_first_xheight(TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
void merge_oldbl_parts(TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
int get_blob_coords(TO_ROW *row, int32_t lineheight, TBOX *blobcoords, bool &holed_line, int &outcount)
BLOBNBOX_LIST * blob_list()
static const double kXHeightFraction
void Add(const ICOORD &pt)
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
void add(double x, double y)
double step(double x1, double x2)
TDimension height() const
TDimension bottom() const
void add(int32_t value, int32_t count)
int32_t pile_count(int32_t value) const
int32_t get_total() const
double ile(double frac) const
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
void compute_block_xheight(TO_BLOCK *block, float gradient)