tesseract  5.0.0
cjkpitch.cpp
Go to the documentation of this file.
1 // File: cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 // for CJK text.
5 // Author: takenaka@google.com (Hiroshi Takenaka)
6 //
7 // Copyright 2011 Google Inc. All Rights Reserved.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "cjkpitch.h"
21 #include "topitch.h"
22 #include "tovars.h"
23 
24 #include <algorithm> // for std::sort
25 #include <cmath>
26 #include <vector> // for std::vector
27 
28 namespace tesseract {
29 
30 static BOOL_VAR(textord_space_size_is_variable, false,
31  "If true, word delimiter spaces are assumed to have "
32  "variable width, even though characters have fixed pitch.");
33 
34 // Allow +/-10% error for character pitch / body size.
35 static const float kFPTolerance = 0.1f;
36 
37 // Minimum ratio of "good" character pitch for a row to be considered
38 // to be fixed-pitch.
39 static const float kFixedPitchThreshold = 0.35f;
40 
41 // rank statistics for a small collection of float values.
42 class SimpleStats {
43 public:
44  SimpleStats() = default;
45  ~SimpleStats() = default;
46 
47  void Clear() {
48  values_.clear();
49  finalized_ = false;
50  }
51 
52  void Add(float value) {
53  values_.push_back(value);
54  finalized_ = false;
55  }
56 
57  void Finish() {
58  std::sort(values_.begin(), values_.end());
59  finalized_ = true;
60  }
61 
62  float ile(double frac) {
63  if (!finalized_) {
64  Finish();
65  }
66  if (values_.empty()) {
67  return 0.0f;
68  }
69  if (frac >= 1.0) {
70  return values_.back();
71  }
72  if (frac <= 0.0 || values_.size() == 1) {
73  return values_[0];
74  }
75  int index = static_cast<int>((values_.size() - 1) * frac);
76  float reminder = (values_.size() - 1) * frac - index;
77 
78  return values_[index] * (1.0f - reminder) + values_[index + 1] * reminder;
79  }
80 
81  float median() {
82  return ile(0.5);
83  }
84 
85  float minimum() {
86  if (!finalized_) {
87  Finish();
88  }
89  if (values_.empty()) {
90  return 0.0f;
91  }
92  return values_[0];
93  }
94 
95  bool empty() const {
96  return values_.empty();
97  }
98 
99  int size() const {
100  return values_.size();
101  }
102 
103 private:
104  bool finalized_ = false;
105  std::vector<float> values_;
106 };
107 
108 // statistics for a small collection of float pairs (x, y).
109 // EstimateYFor(x, r) returns the estimated y at x, based on
110 // existing samples between x*(1-r) ~ x*(1+r).
112 public:
113  struct float_pair {
114  float x, y;
115  int vote;
116  };
117 
118  LocalCorrelation() : finalized_(false) {}
119  ~LocalCorrelation() = default;
120 
121  void Finish() {
122  std::sort(values_.begin(), values_.end(), float_pair_compare);
123  finalized_ = true;
124  }
125 
126  void Clear() {
127  finalized_ = false;
128  }
129 
130  void Add(float x, float y, int v) {
131  struct float_pair value;
132  value.x = x;
133  value.y = y;
134  value.vote = v;
135  values_.push_back(value);
136  finalized_ = false;
137  }
138 
139  float EstimateYFor(float x, float r) {
140  ASSERT_HOST(finalized_);
141  unsigned start = 0, end = values_.size();
142  // Because the number of samples (used_) is assumed to be small,
143  // just use linear search to find values within the range.
144  while (start < values_.size() && values_[start].x < x * (1 - r)) {
145  start++;
146  }
147  while (end > 0 && values_[end - 1].x > x * (1 + r)) {
148  end--;
149  }
150 
151  // Fall back to the global average if there are no data within r
152  // of x.
153  if (start >= end) {
154  start = 0;
155  end = values_.size();
156  }
157 
158  // Compute weighted average of the values.
159  float rc = 0;
160  int vote = 0;
161  for (auto i = start; i < end; i++) {
162  rc += values_[i].vote * x * values_[i].y / values_[i].x;
163  vote += values_[i].vote;
164  }
165 
166  return vote == 0 ? 0.0f : rc / vote;
167  }
168 
169 private:
170  static bool float_pair_compare(const float_pair f_a, const float_pair f_b) {
171  return f_a.x < f_b.x;
172  }
173 
174  bool finalized_;
175  std::vector<struct float_pair> values_;
176 };
177 
178 // Class to represent a character on a fixed pitch row. A FPChar may
179 // consist of multiple blobs (BLOBNBOX's).
180 class FPChar {
181 public:
183 
185  : box_()
186  , real_body_()
187  , from_(nullptr)
188  , to_(nullptr)
189  , num_blobs_(0)
190  , max_gap_(0)
191  , final_(false)
192  , alignment_(ALIGN_UNKNOWN)
193  , merge_to_prev_(false)
194  , delete_flag_(false) {}
195 
196  // Initialize from blob.
197  void Init(BLOBNBOX *blob) {
198  box_ = blob->bounding_box();
199  real_body_ = box_;
200  from_ = to_ = blob;
201  num_blobs_ = 1;
202  }
203 
204  // Merge this character with "next". The "next" character should
205  // consist of succeeding blobs on the same row.
206  void Merge(const FPChar &next) {
207  int gap = real_body_.x_gap(next.real_body_);
208  if (gap > max_gap_) {
209  max_gap_ = gap;
210  }
211 
212  box_ += next.box_;
213  real_body_ += next.real_body_;
214  to_ = next.to_;
215  num_blobs_ += next.num_blobs_;
216  }
217 
218  // Accessors.
219  const TBOX &box() const {
220  return box_;
221  }
222  void set_box(const TBOX &box) {
223  box_ = box;
224  }
225  const TBOX &real_body() const {
226  return real_body_;
227  }
228 
229  bool is_final() const {
230  return final_;
231  }
232  void set_final(bool flag) {
233  final_ = flag;
234  }
235 
236  const Alignment &alignment() const {
237  return alignment_;
238  }
240  alignment_ = alignment;
241  }
242 
243  bool merge_to_prev() const {
244  return merge_to_prev_;
245  }
246  void set_merge_to_prev(bool flag) {
247  merge_to_prev_ = flag;
248  }
249 
250  bool delete_flag() const {
251  return delete_flag_;
252  }
253  void set_delete_flag(bool flag) {
254  delete_flag_ = flag;
255  }
256 
257  int max_gap() const {
258  return max_gap_;
259  }
260 
261  int num_blobs() const {
262  return num_blobs_;
263  }
264 
265 private:
266  TBOX box_; // Rectangle region considered to be occupied by this
267  // character. It could be bigger than the bounding box.
268  TBOX real_body_; // Real bounding box of this character.
269  BLOBNBOX *from_; // The first blob of this character.
270  BLOBNBOX *to_; // The last blob of this character.
271  int num_blobs_; // Number of blobs that belong to this character.
272  int max_gap_; // Maximum x gap between the blobs.
273 
274  bool final_; // True if alignment/fragmentation decision for this
275  // character is finalized.
276 
277  Alignment alignment_; // Alignment status.
278  bool merge_to_prev_; // True if this is a fragmented blob that
279  // needs to be merged to the previous
280  // character.
281 
282  int delete_flag_; // True if this character is merged to another
283  // one and needs to be deleted.
284 };
285 
286 // Class to represent a fixed pitch row, as a linear collection of
287 // FPChar's.
288 class FPRow {
289 public:
290  FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), heights_(), characters_() {}
291 
292  ~FPRow() = default;
293 
294  // Initialize from TD_ROW.
295  void Init(TO_ROW *row);
296 
297  // Estimate character pitch of this row, based on current alignment
298  // status of underlying FPChar's. The argument pass1 can be set to
299  // true if the function is called after Pass1Analyze(), to eliminate
300  // some redundant computation.
301  void EstimatePitch(bool pass1);
302 
303  // Check each character if it has good character pitches between its
304  // predecessor and its successor and set its alignment status. If
305  // we already calculated the estimated pitch for this row, the value
306  // is used. If we didn't, a character is considered to be good, if
307  // the pitches between its predecessor and its successor are almost
308  // equal.
309  void Pass1Analyze();
310 
311  // Find characters that fit nicely into one imaginary body next to a
312  // character which is already finalized. Then mark them as character
313  // fragments.
314  bool Pass2Analyze();
315 
316  // Merge FPChars marked as character fragments into one.
317  void MergeFragments();
318 
319  // Finalize characters that are already large enough and cannot be
320  // merged with others any more.
321  void FinalizeLargeChars();
322 
323  // Output pitch estimation results to attributes of TD_ROW.
324  void OutputEstimations();
325 
326  void DebugOutputResult(int row_index);
327 
328  int good_pitches() {
329  return good_pitches_.size();
330  }
331 
332  float pitch() {
333  return pitch_;
334  }
335 
336  float estimated_pitch() {
337  return estimated_pitch_;
338  }
339 
340  void set_estimated_pitch(float v) {
341  estimated_pitch_ = v;
342  }
343 
344  float height() {
345  return height_;
346  }
347 
349  if (good_pitches_.size() < 2) {
350  return -1.0;
351  }
352  return height_ / good_pitches_.median();
353  }
354 
355  float gap() {
356  return gap_;
357  }
358 
359  size_t num_chars() {
360  return characters_.size();
361  }
362  FPChar *character(int i) {
363  return &characters_[i];
364  }
365 
366  const TBOX &box(int i) {
367  return characters_[i].box();
368  }
369 
370  const TBOX &real_body(int i) {
371  return characters_[i].real_body();
372  }
373 
374  bool is_box_modified(int i) {
375  return !(characters_[i].box() == characters_[i].real_body());
376  }
377 
378  float center_x(int i) {
379  return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
380  }
381 
382  bool is_final(int i) {
383  return characters_[i].is_final();
384  }
385 
386  void finalize(int i) {
387  characters_[i].set_final(true);
388  }
389 
390  bool is_good(int i) {
391  return characters_[i].alignment() == FPChar::ALIGN_GOOD;
392  }
393 
394  void mark_good(int i) {
395  characters_[i].set_alignment(FPChar::ALIGN_GOOD);
396  }
397 
398  void mark_bad(int i) {
399  characters_[i].set_alignment(FPChar::ALIGN_BAD);
400  }
401 
402  void clear_alignment(int i) {
403  characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
404  }
405 
406 private:
407  static float x_overlap_fraction(const TBOX &box1, const TBOX &box2) {
408  if (std::min(box1.width(), box2.width()) == 0) {
409  return 0.0;
410  }
411  return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
412  }
413 
414  static bool mostly_overlap(const TBOX &box1, const TBOX &box2) {
415  return x_overlap_fraction(box1, box2) > 0.9;
416  }
417 
418  static bool significant_overlap(const TBOX &box1, const TBOX &box2) {
419  if (std::min(box1.width(), box2.width()) == 0) {
420  return false;
421  }
422  int overlap = -box1.x_gap(box2);
423  return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
424  }
425 
426  static float box_pitch(const TBOX &ref, const TBOX &box) {
427  return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
428  }
429 
430  // Check if two neighboring characters satisfy the fixed pitch model.
431  static bool is_good_pitch(float pitch, const TBOX &box1, const TBOX &box2) {
432  // Character box shouldn't exceed pitch.
433  if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
434  box2.width() >= pitch * (1.0 + kFPTolerance) ||
435  box1.height() >= pitch * (1.0 + kFPTolerance) ||
436  box2.height() >= pitch * (1.0 + kFPTolerance)) {
437  return false;
438  }
439 
440  const float real_pitch = box_pitch(box1, box2);
441  if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) {
442  return true;
443  }
444 
445  if (textord_space_size_is_variable) {
446  // Hangul characters usually have fixed pitch, but words are
447  // delimited by space which can be narrower than characters.
448  if (real_pitch > pitch && real_pitch < pitch * 2.0 && real_pitch - box1.x_gap(box2) < pitch) {
449  return true;
450  }
451  }
452  return false;
453  }
454 
455  static bool is_interesting_blob(const BLOBNBOX *blob) {
456  return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
457  }
458 
459  // Cleanup chars that are already merged to others.
460  void DeleteChars() {
461  unsigned index = 0;
462  for (unsigned i = 0; i < characters_.size(); ++i) {
463  if (!characters_[i].delete_flag()) {
464  if (index != i) {
465  characters_[index] = characters_[i];
466  }
467  index++;
468  }
469  }
470  characters_.resize(index);
471  }
472 
473  float pitch_ = 0.0f; // Character pitch.
474  float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
475  // to be good enough.
476  float height_ = 0.0f; // Character height.
477  float gap_ = 0.0f; // Minimum gap between characters.
478 
479  // Pitches between any two successive characters.
480  SimpleStats all_pitches_;
481  // Gaps between any two successive characters.
482  SimpleStats all_gaps_;
483  // Pitches between any two successive characters that are consistent
484  // with the fixed pitch model.
485  SimpleStats good_pitches_;
486  // Gaps between any two successive characters that are consistent
487  // with the fixed pitch model.
488  SimpleStats good_gaps_;
489 
490  SimpleStats heights_;
491 
492  std::vector<FPChar> characters_;
493  TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
494 };
495 
496 void FPRow::Init(TO_ROW *row) {
497  ASSERT_HOST(row != nullptr);
498  ASSERT_HOST(row->xheight > 0);
499  real_row_ = row;
500  real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
501 
502  BLOBNBOX_IT blob_it = row->blob_list();
503  // Initialize characters_ and compute the initial estimation of
504  // character height.
505  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
506  if (is_interesting_blob(blob_it.data())) {
507  FPChar fp_char;
508  fp_char.Init(blob_it.data());
509  // Merge unconditionally if two blobs overlap.
510  if (!characters_.empty() && significant_overlap(fp_char.box(), characters_.back().box())) {
511  characters_.back().Merge(fp_char);
512  } else {
513  characters_.push_back(fp_char);
514  }
515  TBOX bound = blob_it.data()->bounding_box();
516  if (bound.height() * 3.0 > bound.width()) {
517  heights_.Add(bound.height());
518  }
519  }
520  }
521  heights_.Finish();
522  height_ = heights_.ile(0.875);
523 }
524 
526  if (good_pitches_.empty()) {
527  pitch_ = 0.0f;
528  real_row_->pitch_decision = PITCH_CORR_PROP;
529  return;
530  }
531 
532  pitch_ = good_pitches_.median();
533  real_row_->fixed_pitch = pitch_;
534  // good_gaps_.ile(0.125) can be large if most characters on the row
535  // are skinny. Use pitch_ - height_ instead if it's smaller, but
536  // positive.
537  real_row_->kern_size = real_row_->pr_nonsp =
538  std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
539  real_row_->body_size = pitch_ - real_row_->kern_size;
540 
541  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
542  // If more than half of the characters of a line don't fit to the
543  // fixed pitch model, consider the line to be proportional. 50%
544  // seems to be a good threshold in practice as well.
545  // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
546  // real_row_ as a partial estimation result and try to use them in the
547  // normalization process.
548  real_row_->pitch_decision = PITCH_CORR_PROP;
549  return;
550  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
551  real_row_->pitch_decision = PITCH_DEF_FIXED;
552  } else {
553  real_row_->pitch_decision = PITCH_CORR_FIXED;
554  }
555 
556  real_row_->space_size = real_row_->pr_space = pitch_;
557  // Set min_space to 50% of character pitch so that we can break CJK
558  // text at a half-width space after punctuation.
559  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
560 
561  // Don't consider a quarter space as a real space, because it's used
562  // for line justification in traditional Japanese books.
563  real_row_->max_nonspace =
564  std::max(pitch_ * 0.25 + good_gaps_.minimum(), static_cast<double>(good_gaps_.ile(0.875)));
565 
566  int space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
567  static_cast<int>(real_row_->xheight));
568 
569  // Make max_nonspace larger than any intra-character gap so that
570  // make_prop_words() won't break a row at the middle of a character.
571  for (size_t i = 0; i < num_chars(); ++i) {
572  if (characters_[i].max_gap() > real_row_->max_nonspace) {
573  real_row_->max_nonspace = characters_[i].max_gap();
574  }
575  }
576  real_row_->space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
577  static_cast<int>(real_row_->xheight));
578  real_row_->used_dm_model = false;
579 
580  // Setup char_cells.
581  ICOORDELT_IT cell_it = &real_row_->char_cells;
582  auto *cell = new ICOORDELT(real_body(0).left(), 0);
583  cell_it.add_after_then_move(cell);
584 
585  int right = real_body(0).right();
586  for (size_t i = 1; i < num_chars(); ++i) {
587  // Put a word break if gap between two characters is bigger than
588  // space_threshold. Don't break if none of two characters
589  // couldn't be "finalized", because maybe they need to be merged
590  // to one character.
591  if ((is_final(i - 1) || is_final(i)) &&
592  real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
593  cell = new ICOORDELT(right + 1, 0);
594  cell_it.add_after_then_move(cell);
595  while (right + pitch_ < box(i).left()) {
596  right += pitch_;
597  cell = new ICOORDELT(right + 1, 0);
598  cell_it.add_after_then_move(cell);
599  }
600  right = box(i).left();
601  }
602  cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
603  cell_it.add_after_then_move(cell);
604  right = real_body(i).right();
605  }
606 
607  cell = new ICOORDELT(right + 1, 0);
608  cell_it.add_after_then_move(cell);
609 
610  // TODO(takenaka): add code to store alignment/fragmentation
611  // information to blobs so that it can be reused later, e.g. in
612  // recognition phase.
613 }
614 
615 void FPRow::EstimatePitch(bool pass1) {
616  good_pitches_.Clear();
617  all_pitches_.Clear();
618  good_gaps_.Clear();
619  all_gaps_.Clear();
620  heights_.Clear();
621  if (num_chars() == 0) {
622  return;
623  }
624 
625  int32_t cx0, cx1;
626  bool prev_was_good = is_good(0);
627  cx0 = center_x(0);
628 
629  heights_.Add(box(0).height());
630  for (size_t i = 1; i < num_chars(); i++) {
631  cx1 = center_x(i);
632  int32_t pitch = cx1 - cx0;
633  int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
634 
635  heights_.Add(box(i).height());
636  // Ignore if the pitch is too close. But don't ignore wide pitch
637  // may be the result of large tracking.
638  if (pitch > height_ * 0.5) {
639  all_pitches_.Add(pitch);
640  all_gaps_.Add(gap);
641  if (is_good(i)) {
642  // In pass1 (after Pass1Analyze()), all characters marked as
643  // "good" have a good consistent pitch with their previous
644  // characters. However, it's not true in pass2 and a good
645  // character may have a good pitch only between its successor.
646  // So we collect only pitch values between two good
647  // characters. and within tolerance in pass2.
648  if (pass1 ||
649  (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
650  good_pitches_.Add(pitch);
651  if (!is_box_modified(i - 1) && !is_box_modified(i)) {
652  good_gaps_.Add(gap);
653  }
654  }
655  prev_was_good = true;
656  } else {
657  prev_was_good = false;
658  }
659  }
660  cx0 = cx1;
661  }
662 
663  good_pitches_.Finish();
664  all_pitches_.Finish();
665  good_gaps_.Finish();
666  all_gaps_.Finish();
667  heights_.Finish();
668 
669  height_ = heights_.ile(0.875);
670  if (all_pitches_.empty()) {
671  pitch_ = 0.0f;
672  gap_ = 0.0f;
673  } else if (good_pitches_.size() < 2) {
674  // We don't have enough data to estimate the pitch of this row yet.
675  // Use median of all pitches as the initial guess.
676  pitch_ = all_pitches_.median();
677  ASSERT_HOST(pitch_ > 0.0f);
678  gap_ = all_gaps_.ile(0.125);
679  } else {
680  pitch_ = good_pitches_.median();
681  ASSERT_HOST(pitch_ > 0.0f);
682  gap_ = good_gaps_.ile(0.125);
683  }
684 }
685 
686 void FPRow::DebugOutputResult(int row_index) {
687  if (num_chars() > 0) {
688  tprintf(
689  "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
690  "space_size=%f, space_threshold=%d, xheight=%f\n",
691  row_index, static_cast<int>(real_row_->pitch_decision), real_row_->fixed_pitch,
692  real_row_->max_nonspace, real_row_->space_size, real_row_->space_threshold,
693  real_row_->xheight);
694 
695  for (unsigned i = 0; i < num_chars(); i++) {
696  tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), is_good(i),
697  character(i)->num_blobs());
698  box(i).print();
699  }
700  }
701 }
702 
704  if (num_chars() < 2) {
705  return;
706  }
707 
708  if (estimated_pitch_ > 0.0f) {
709  for (size_t i = 2; i < num_chars(); i++) {
710  if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) &&
711  is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
712  mark_good(i - 1);
713  }
714  }
715  } else {
716  for (size_t i = 2; i < num_chars(); i++) {
717  if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), box(i))) {
718  mark_good(i - 1);
719  }
720  }
721  }
722  character(0)->set_alignment(character(1)->alignment());
723  character(num_chars() - 1)->set_alignment(character(num_chars() - 2)->alignment());
724 }
725 
727  bool changed = false;
728  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
729  return false;
730  }
731  for (size_t i = 0; i < num_chars(); i++) {
732  if (is_final(i)) {
733  continue;
734  }
735 
736  FPChar::Alignment alignment = character(i)->alignment();
737  bool intersecting = false;
738  bool not_intersecting = false;
739 
740  if (i < num_chars() - 1 && is_final(i + 1)) {
741  // Next character is already finalized. Estimate the imaginary
742  // body including this character based on the character. Skip
743  // whitespace if necessary.
744  bool skipped_whitespaces = false;
745  float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
746  while (c1 > box(i).right()) {
747  skipped_whitespaces = true;
748  c1 -= estimated_pitch_;
749  }
750  TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
751 
752  // Collect all characters that mostly fit in the region.
753  // Also, their union height shouldn't be too big.
754  int j = i;
755  TBOX merged;
756  while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
757  merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
758  merged += box(j);
759  j--;
760  }
761 
762  if (j >= 0 && significant_overlap(ibody, box(j))) {
763  // character(j) lies on the character boundary and doesn't fit
764  // well into the imaginary body.
765  if (!is_final(j)) {
766  intersecting = true;
767  }
768  } else {
769  not_intersecting = true;
770  if (i - j > 0) {
771  // Merge character(j+1) ... character(i) because they fit
772  // into the body nicely.
773  if (i - j == 1) {
774  // Only one char in the imaginary body.
775  if (!skipped_whitespaces) {
776  mark_good(i);
777  }
778  // set ibody as bounding box of this character to get
779  // better pitch analysis result for halfwidth glyphs
780  // followed by a halfwidth space.
781  if (box(i).width() <= estimated_pitch_ * 0.5) {
782  ibody += box(i);
783  character(i)->set_box(ibody);
784  }
785  character(i)->set_merge_to_prev(false);
786  finalize(i);
787  } else {
788  for (int k = i; k > j + 1; k--) {
789  character(k)->set_merge_to_prev(true);
790  }
791  }
792  }
793  }
794  }
795  if (i > 0 && is_final(i - 1)) {
796  // Now we repeat everything from the opposite side. Previous
797  // character is already finalized. Estimate the imaginary body
798  // including this character based on the character.
799  bool skipped_whitespaces = false;
800  float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
801  while (c1 < box(i).left()) {
802  skipped_whitespaces = true;
803  c1 += estimated_pitch_;
804  }
805  TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
806 
807  size_t j = i;
808  TBOX merged;
809  while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
810  merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
811  merged += box(j);
812  j++;
813  }
814 
815  if (j < num_chars() && significant_overlap(ibody, box(j))) {
816  if (!is_final(j)) {
817  intersecting = true;
818  }
819  } else {
820  not_intersecting = true;
821  if (j - i > 0) {
822  if (j - i == 1) {
823  if (!skipped_whitespaces) {
824  mark_good(i);
825  }
826  if (box(i).width() <= estimated_pitch_ * 0.5) {
827  ibody += box(i);
828  character(i)->set_box(ibody);
829  }
830  character(i)->set_merge_to_prev(false);
831  finalize(i);
832  } else {
833  for (size_t k = i + 1; k < j; k++) {
834  character(k)->set_merge_to_prev(true);
835  }
836  }
837  }
838  }
839  }
840 
841  // This character doesn't fit well into the estimated imaginary
842  // bodies. Mark it as bad.
843  if (intersecting && !not_intersecting) {
844  mark_bad(i);
845  }
846  if (character(i)->alignment() != alignment || character(i)->merge_to_prev()) {
847  changed = true;
848  }
849  }
850 
851  return changed;
852 }
853 
855  int last_char = 0;
856 
857  for (size_t j = 0; j < num_chars(); ++j) {
858  if (character(j)->merge_to_prev()) {
859  character(last_char)->Merge(*character(j));
860  character(j)->set_delete_flag(true);
861  clear_alignment(last_char);
862  character(j - 1)->set_merge_to_prev(false);
863  } else {
864  last_char = j;
865  }
866  }
867  DeleteChars();
868 }
869 
871  float row_pitch = estimated_pitch();
872  for (size_t i = 0; i < num_chars(); i++) {
873  if (is_final(i)) {
874  continue;
875  }
876 
877  // Finalize if both neighbors are finalized. We have no other choice.
878  if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
879  finalize(i);
880  continue;
881  }
882 
883  float cx = center_x(i);
884  TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
885  if (i > 0) {
886  // The preceding character significantly intersects with the
887  // imaginary body of this character. Let Pass2Analyze() handle
888  // this case.
889  if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) {
890  continue;
891  }
892  if (!is_final(i - 1)) {
893  TBOX merged = box(i);
894  merged += box(i - 1);
895  if (merged.width() < row_pitch) {
896  continue;
897  }
898  // This character cannot be finalized yet because it can be
899  // merged with the previous one. Again, let Pass2Analyze()
900  // handle this case.
901  }
902  }
903  if (i < num_chars() - 1) {
904  if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) {
905  continue;
906  }
907  if (!is_final(i + 1)) {
908  TBOX merged = box(i);
909  merged += box(i + 1);
910  if (merged.width() < row_pitch) {
911  continue;
912  }
913  }
914  }
915  finalize(i);
916  }
917 
918  // Update alignment decision. We only consider finalized characters
919  // in pass2. E.g. if a finalized character C has another finalized
920  // character L on its left and a not-finalized character R on its
921  // right, we mark C as good if the pitch between C and L is good,
922  // regardless of the pitch between C and R.
923  for (size_t i = 0; i < num_chars(); i++) {
924  if (!is_final(i)) {
925  continue;
926  }
927  bool good_pitch = false;
928  bool bad_pitch = false;
929  if (i > 0 && is_final(i - 1)) {
930  if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
931  good_pitch = true;
932  } else {
933  bad_pitch = true;
934  }
935  }
936  if (i < num_chars() - 1 && is_final(i + 1)) {
937  if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
938  good_pitch = true;
939  } else {
940  bad_pitch = true;
941  }
942  }
943  if (good_pitch && !bad_pitch) {
944  mark_good(i);
945  } else if (!good_pitch && bad_pitch) {
946  mark_bad(i);
947  }
948  }
949 }
950 
951 class FPAnalyzer {
952 public:
953  FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
954  ~FPAnalyzer() = default;
955 
956  void Pass1Analyze() {
957  for (auto &row : rows_) {
958  row.Pass1Analyze();
959  }
960  }
961 
962  // Estimate character pitch for each row. The argument pass1 can be
963  // set to true if the function is called after Pass1Analyze(), to
964  // eliminate some redundant computation.
965  void EstimatePitch(bool pass1);
966 
968  if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) {
969  return false;
970  }
971  return true;
972  }
973 
974  void MergeFragments() {
975  for (auto &row : rows_) {
976  row.MergeFragments();
977  }
978  }
979 
981  for (auto &row : rows_) {
982  row.FinalizeLargeChars();
983  }
984  }
985 
986  bool Pass2Analyze() {
987  bool changed = false;
988  for (auto &row : rows_) {
989  if (row.Pass2Analyze()) {
990  changed = true;
991  }
992  }
993  return changed;
994  }
995 
997  for (auto &row : rows_) {
998  row.OutputEstimations();
999  }
1000  // Don't we need page-level estimation of gaps/spaces?
1001  }
1002 
1004  tprintf("FPAnalyzer: final result\n");
1005  for (size_t i = 0; i < rows_.size(); i++) {
1006  rows_[i].DebugOutputResult(i);
1007  }
1008  }
1009 
1010  size_t num_rows() {
1011  return rows_.size();
1012  }
1013 
1014  // Returns the upper limit for pass2 loop iteration.
1015  unsigned max_iteration() {
1016  // We're fixing at least one character per iteration. So basically
1017  // we shouldn't require more than max_chars_per_row_ iterations.
1018  return max_chars_per_row_ + 100;
1019  }
1020 
1021 private:
1022  ICOORD page_tr_;
1023  std::vector<FPRow> rows_;
1024  unsigned num_tall_rows_;
1025  unsigned num_bad_rows_;
1026  // TODO: num_empty_rows_ is incremented, but never used otherwise.
1027  unsigned num_empty_rows_;
1028  unsigned max_chars_per_row_;
1029 };
1030 
1031 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
1032  : page_tr_(page_tr)
1033  , num_tall_rows_(0)
1034  , num_bad_rows_(0)
1035  , num_empty_rows_(0)
1036  , max_chars_per_row_(0) {
1037  TO_BLOCK_IT block_it(port_blocks);
1038 
1039  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1040  TO_BLOCK *block = block_it.data();
1041  if (!block->get_rows()->empty()) {
1042  ASSERT_HOST(block->xheight > 0);
1043  find_repeated_chars(block, false);
1044  }
1045  }
1046 
1047  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1048  TO_ROW_IT row_it = block_it.data()->get_rows();
1049  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1050  FPRow row;
1051  row.Init(row_it.data());
1052  rows_.push_back(row);
1053  size_t num_chars = rows_.back().num_chars();
1054  if (num_chars <= 1) {
1055  num_empty_rows_++;
1056  }
1057  if (num_chars > max_chars_per_row_) {
1058  max_chars_per_row_ = num_chars;
1059  }
1060  }
1061  }
1062 }
1063 
1064 void FPAnalyzer::EstimatePitch(bool pass1) {
1065  LocalCorrelation pitch_height_stats;
1066 
1067  num_tall_rows_ = 0;
1068  num_bad_rows_ = 0;
1069  pitch_height_stats.Clear();
1070  for (auto &row : rows_) {
1071  row.EstimatePitch(pass1);
1072  if (row.good_pitches()) {
1073  pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches());
1074  if (row.height_pitch_ratio() > 1.1) {
1075  num_tall_rows_++;
1076  }
1077  } else {
1078  num_bad_rows_++;
1079  }
1080  }
1081 
1082  pitch_height_stats.Finish();
1083  for (auto &row : rows_) {
1084  if (row.good_pitches() >= 5) {
1085  // We have enough evidences. Just use the pitch estimation
1086  // from this row.
1087  row.set_estimated_pitch(row.pitch());
1088  } else if (row.num_chars() > 1) {
1089  float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f);
1090  // CJK characters are more likely to be fragmented than poorly
1091  // chopped. So trust the page-level estimation of character
1092  // pitch only if it's larger than row-level estimation or
1093  // row-level estimation is too large (2x bigger than row height).
1094  if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) {
1095  row.set_estimated_pitch(estimated_pitch);
1096  } else {
1097  row.set_estimated_pitch(row.pitch());
1098  }
1099  }
1100  }
1101 }
1102 
1103 void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
1104  FPAnalyzer analyzer(page_tr, port_blocks);
1105  if (analyzer.num_rows() == 0) {
1106  return;
1107  }
1108 
1109  analyzer.Pass1Analyze();
1110  analyzer.EstimatePitch(true);
1111 
1112  // Perform pass1 analysis again with the initial estimation of row
1113  // pitches, for better estimation.
1114  analyzer.Pass1Analyze();
1115  analyzer.EstimatePitch(true);
1116 
1117  // Early exit if the page doesn't seem to contain fixed pitch rows.
1118  if (!analyzer.maybe_fixed_pitch()) {
1120  tprintf("Page doesn't seem to contain fixed pitch rows\n");
1121  }
1122  return;
1123  }
1124 
1125  unsigned iteration = 0;
1126  do {
1127  analyzer.MergeFragments();
1128  analyzer.FinalizeLargeChars();
1129  analyzer.EstimatePitch(false);
1130  iteration++;
1131  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1132 
1134  tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration,
1135  analyzer.max_iteration());
1136  }
1137 
1138  analyzer.OutputEstimations();
1140  analyzer.DebugOutputResult();
1141  }
1142 }
1143 
1144 } // namespace tesseract
#define ASSERT_HOST(x)
Definition: errcode.h:59
#define BOOL_VAR(name, val, comment)
Definition: params.h:359
@ TBOX
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1661
@ PITCH_DEF_FIXED
Definition: blobbox.h:49
@ PITCH_CORR_FIXED
Definition: blobbox.h:53
@ PITCH_CORR_PROP
Definition: blobbox.h:54
bool textord_debug_pitch_test
Definition: topitch.cpp:42
@ BTFT_LEADER
Definition: blobbox.h:117
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1103
const TBOX & bounding_box() const
Definition: blobbox.h:239
int32_t min_space
Definition: blobbox.h:669
ICOORDELT_LIST char_cells
Definition: blobbox.h:675
int32_t max_nonspace
Definition: blobbox.h:670
bool used_dm_model
Definition: blobbox.h:653
float space_size
Definition: blobbox.h:673
float fixed_pitch
Definition: blobbox.h:657
int32_t space_threshold
Definition: blobbox.h:671
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:608
PITCH_TYPE pitch_decision
Definition: blobbox.h:656
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
int x_gap(const TBOX &box) const
Definition: rect.h:238
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:128
void print() const
Definition: rect.h:289
TDimension right() const
Definition: rect.h:89
float ile(double frac)
Definition: cjkpitch.cpp:62
bool empty() const
Definition: cjkpitch.cpp:95
void Add(float value)
Definition: cjkpitch.cpp:52
void Add(float x, float y, int v)
Definition: cjkpitch.cpp:130
float EstimateYFor(float x, float r)
Definition: cjkpitch.cpp:139
bool merge_to_prev() const
Definition: cjkpitch.cpp:243
const Alignment & alignment() const
Definition: cjkpitch.cpp:236
bool delete_flag() const
Definition: cjkpitch.cpp:250
void Merge(const FPChar &next)
Definition: cjkpitch.cpp:206
const TBOX & real_body() const
Definition: cjkpitch.cpp:225
int max_gap() const
Definition: cjkpitch.cpp:257
void set_delete_flag(bool flag)
Definition: cjkpitch.cpp:253
void set_merge_to_prev(bool flag)
Definition: cjkpitch.cpp:246
int num_blobs() const
Definition: cjkpitch.cpp:261
const TBOX & box() const
Definition: cjkpitch.cpp:219
void set_box(const TBOX &box)
Definition: cjkpitch.cpp:222
void set_alignment(Alignment alignment)
Definition: cjkpitch.cpp:239
bool is_final() const
Definition: cjkpitch.cpp:229
void Init(BLOBNBOX *blob)
Definition: cjkpitch.cpp:197
void set_final(bool flag)
Definition: cjkpitch.cpp:232
void FinalizeLargeChars()
Definition: cjkpitch.cpp:870
void Init(TO_ROW *row)
Definition: cjkpitch.cpp:496
void MergeFragments()
Definition: cjkpitch.cpp:854
bool is_good(int i)
Definition: cjkpitch.cpp:390
void Pass1Analyze()
Definition: cjkpitch.cpp:703
void mark_bad(int i)
Definition: cjkpitch.cpp:398
bool is_box_modified(int i)
Definition: cjkpitch.cpp:374
const TBOX & real_body(int i)
Definition: cjkpitch.cpp:370
void mark_good(int i)
Definition: cjkpitch.cpp:394
const TBOX & box(int i)
Definition: cjkpitch.cpp:366
void DebugOutputResult(int row_index)
Definition: cjkpitch.cpp:686
bool is_final(int i)
Definition: cjkpitch.cpp:382
void finalize(int i)
Definition: cjkpitch.cpp:386
void clear_alignment(int i)
Definition: cjkpitch.cpp:402
FPChar * character(int i)
Definition: cjkpitch.cpp:362
void EstimatePitch(bool pass1)
Definition: cjkpitch.cpp:615
void OutputEstimations()
Definition: cjkpitch.cpp:525
float center_x(int i)
Definition: cjkpitch.cpp:378
float estimated_pitch()
Definition: cjkpitch.cpp:336
~FPRow()=default
bool Pass2Analyze()
Definition: cjkpitch.cpp:726
float height_pitch_ratio()
Definition: cjkpitch.cpp:348
size_t num_chars()
Definition: cjkpitch.cpp:359
void set_estimated_pitch(float v)
Definition: cjkpitch.cpp:340
unsigned max_iteration()
Definition: cjkpitch.cpp:1015
void EstimatePitch(bool pass1)
Definition: cjkpitch.cpp:1064
FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1031