tesseract  5.0.0
tesseract::GAPMAP Class Reference

#include <gap_map.h>

Public Member Functions

 GAPMAP (TO_BLOCK *block)
 
 ~GAPMAP ()
 
bool table_gap (int16_t left, int16_t right)
 

Detailed Description

Definition at line 18 of file gap_map.h.

Constructor & Destructor Documentation

◆ GAPMAP()

tesseract::GAPMAP::GAPMAP ( TO_BLOCK block)

Definition at line 36 of file gap_map.cpp.

38  {
39  TO_ROW *row; // current row
40  BLOBNBOX_IT blob_it; // iterator
41  TBOX blob_box;
42  TBOX prev_blob_box;
43  int16_t gap_width;
44  int16_t start_of_row;
45  int16_t end_of_row;
46  STATS xht_stats(0, 128);
47  int16_t min_quantum;
48  int16_t max_quantum;
49  int16_t i;
50 
51  /*
52  Find left and right extremes and bucket size
53 */
54  map = nullptr;
55  min_left = INT16_MAX;
56  max_right = -INT16_MAX;
57  total_rows = 0;
58  any_tabs = false;
59 
60  // row iterator
61  TO_ROW_IT row_it(block->get_rows());
62  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
63  row = row_it.data();
64  if (!row->blob_list()->empty()) {
65  total_rows++;
66  xht_stats.add(static_cast<int16_t>(floor(row->xheight + 0.5)), 1);
67  blob_it.set_to_list(row->blob_list());
68  start_of_row = blob_it.data()->bounding_box().left();
69  end_of_row = blob_it.data_relative(-1)->bounding_box().right();
70  if (min_left > start_of_row) {
71  min_left = start_of_row;
72  }
73  if (max_right < end_of_row) {
74  max_right = end_of_row;
75  }
76  }
77  }
78  if ((total_rows < 3) || (min_left >= max_right)) {
79  bucket_size = 0;
80  map_max = 0;
81  total_rows = 0;
82  min_left = max_right = 0;
83  return;
84  }
85  bucket_size = static_cast<int16_t>(floor(xht_stats.median() + 0.5)) / 2;
86  map_max = (max_right - min_left) / bucket_size;
87  map = new int16_t[map_max + 1];
88  for (i = 0; i <= map_max; i++) {
89  map[i] = 0;
90  }
91 
92  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
93  row = row_it.data();
94  if (!row->blob_list()->empty()) {
95  blob_it.set_to_list(row->blob_list());
96  blob_it.mark_cycle_pt();
97  blob_box = box_next(&blob_it);
98  prev_blob_box = blob_box;
99  if (gapmap_use_ends) {
100  /* Leading space */
101  gap_width = blob_box.left() - min_left;
102  if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) {
103  max_quantum = (blob_box.left() - min_left) / bucket_size;
104  if (max_quantum > map_max) {
105  max_quantum = map_max;
106  }
107  for (i = 0; i <= max_quantum; i++) {
108  map[i]++;
109  }
110  }
111  }
112  while (!blob_it.cycled_list()) {
113  blob_box = box_next(&blob_it);
114  gap_width = blob_box.left() - prev_blob_box.right();
115  if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) {
116  min_quantum = (prev_blob_box.right() - min_left) / bucket_size;
117  max_quantum = (blob_box.left() - min_left) / bucket_size;
118  if (max_quantum > map_max) {
119  max_quantum = map_max;
120  }
121  for (i = min_quantum; i <= max_quantum; i++) {
122  map[i]++;
123  }
124  }
125  prev_blob_box = blob_box;
126  }
127  if (gapmap_use_ends) {
128  /* Trailing space */
129  gap_width = max_right - prev_blob_box.right();
130  if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) {
131  min_quantum = (prev_blob_box.right() - min_left) / bucket_size;
132  if (min_quantum < 0) {
133  min_quantum = 0;
134  }
135  for (i = min_quantum; i <= map_max; i++) {
136  map[i]++;
137  }
138  }
139  }
140  }
141  }
142  for (i = 0; i <= map_max; i++) {
143  if (map[i] > total_rows / 2) {
145  (((i == 0) && (map[i + 1] <= total_rows / 2)) ||
146  ((i == map_max) && (map[i - 1] <= total_rows / 2)) ||
147  ((i > 0) && (i < map_max) && (map[i - 1] <= total_rows / 2) &&
148  (map[i + 1] <= total_rows / 2)))) {
149  map[i] = 0; // prevent isolated quantum
150  } else {
151  any_tabs = true;
152  }
153  }
154  }
155  if (gapmap_debug && any_tabs) {
156  tprintf("Table found\n");
157  }
158 }
@ TBOX
bool gapmap_no_isolated_quanta
Definition: gap_map.cpp:19
double gapmap_big_gaps
Definition: gap_map.cpp:20
bool gapmap_use_ends
Definition: gap_map.cpp:18
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool gapmap_debug
Definition: gap_map.cpp:17
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:638

◆ ~GAPMAP()

tesseract::GAPMAP::~GAPMAP ( )
inline

Definition at line 23 of file gap_map.h.

23  { // destructor
24  delete[] map;
25  }

Member Function Documentation

◆ table_gap()

bool tesseract::GAPMAP::table_gap ( int16_t  left,
int16_t  right 
)

Definition at line 166 of file gap_map.cpp.

169  {
170  int16_t min_quantum;
171  int16_t max_quantum;
172  int16_t i;
173  bool tab_found = false;
174 
175  if (!any_tabs) {
176  return false;
177  }
178 
179  min_quantum = (left - min_left) / bucket_size;
180  max_quantum = (right - min_left) / bucket_size;
181  // Clip to the bounds of the array. In some circumstances (big blob followed
182  // by small blob) max_quantum can exceed the map_max bounds, but we clip
183  // here instead, as it provides better long-term safety.
184  if (min_quantum < 0) {
185  min_quantum = 0;
186  }
187  if (max_quantum > map_max) {
188  max_quantum = map_max;
189  }
190  for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++) {
191  if (map[i] > total_rows / 2) {
192  tab_found = true;
193  }
194  }
195  return tab_found;
196 }

The documentation for this class was generated from the following files: