tesseract  5.0.0
tesseract::BLOCK Class Reference

#include <ocrblock.h>

Inheritance diagram for tesseract::BLOCK:
tesseract::ELIST_LINK

Public Member Functions

 BLOCK ()
 
 BLOCK (const char *name, bool prop, int16_t kern, int16_t space, TDimension xmin, TDimension ymin, TDimension xmax, TDimension ymax)
 
 ~BLOCK ()=default
 
void set_stats (bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
 
void set_xheight (int32_t height)
 set char size More...
 
void set_font_class (int16_t font)
 set font class More...
 
bool prop () const
 return proportional More...
 
bool right_to_left () const
 
void set_right_to_left (bool value)
 
int32_t fixed_pitch () const
 return pitch More...
 
int16_t kern () const
 return kerning More...
 
int16_t font () const
 return font class More...
 
int16_t space () const
 return spacing More...
 
const char * name () const
 return filename More...
 
int32_t x_height () const
 return xheight More...
 
float cell_over_xheight () const
 
void set_cell_over_xheight (float ratio)
 
ROW_LIST * row_list ()
 get rows More...
 
void compute_row_margins ()
 
PARA_LIST * para_list ()
 
C_BLOB_LIST * blob_list ()
 get blobs More...
 
C_BLOB_LIST * reject_blobs ()
 
FCOORD re_rotation () const
 
void set_re_rotation (const FCOORD &rotation)
 
FCOORD classify_rotation () const
 
void set_classify_rotation (const FCOORD &rotation)
 
FCOORD skew () const
 
void set_skew (const FCOORD &skew)
 
const ICOORDmedian_size () const
 
void set_median_size (int x, int y)
 
Image render_mask (TBOX *mask_box)
 
TBOX restricted_bounding_box (bool upper_dots, bool lower_dots) const
 
void reflect_polygon_in_y_axis ()
 
void rotate (const FCOORD &rotation)
 
void sort_rows ()
 decreasing y order More...
 
void compress ()
 shrink white space More...
 
void check_pitch ()
 check proportional More...
 
void compress (const ICOORD vec)
 shrink white space and move by vector More...
 
void print (FILE *fp, bool dump)
 dump whole table More...
 
BLOCKoperator= (const BLOCK &source)
 
- Public Member Functions inherited from tesseract::ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Public Attributes

PDBLK pdblk
 Page Description Block. More...
 

Friends

class BLOCK_RECT_IT
 

Detailed Description

Definition at line 32 of file ocrblock.h.

Constructor & Destructor Documentation

◆ BLOCK() [1/2]

tesseract::BLOCK::BLOCK ( )
inline

Definition at line 37 of file ocrblock.h.

37 : re_rotation_(1.0f, 0.0f), classify_rotation_(1.0f, 0.0f), skew_(1.0f, 0.0f) {}

◆ BLOCK() [2/2]

tesseract::BLOCK::BLOCK ( const char *  name,
bool  prop,
int16_t  kern,
int16_t  space,
TDimension  xmin,
TDimension  ymin,
TDimension  xmax,
TDimension  ymax 
)

BLOCK::BLOCK

Constructor for a simple rectangular block.

Parameters
namefilename
propproportional
kernkerning
spacespacing
xminbottom left
xmaxtop right

Definition at line 34 of file ocrblock.cpp.

42  : pdblk(xmin, ymin, xmax, ymax)
43  , filename(name)
44  , re_rotation_(1.0f, 0.0f)
45  , classify_rotation_(1.0f, 0.0f)
46  , skew_(1.0f, 0.0f) {
47  ICOORDELT_IT left_it = &pdblk.leftside;
48  ICOORDELT_IT right_it = &pdblk.rightside;
49 
50  proportional = prop;
51  kerning = kern;
52  spacing = space;
53  font_class = -1; // not assigned
54  cell_over_xheight_ = 2.0f;
55  pdblk.hand_poly = nullptr;
56  left_it.set_to_list(&pdblk.leftside);
57  right_it.set_to_list(&pdblk.rightside);
58  // make default box
59  left_it.add_to_end(new ICOORDELT(xmin, ymin));
60  left_it.add_to_end(new ICOORDELT(xmin, ymax));
61  right_it.add_to_end(new ICOORDELT(xmax, ymin));
62  right_it.add_to_end(new ICOORDELT(xmax, ymax));
63 }
const char * name() const
return filename
Definition: ocrblock.h:97
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
bool prop() const
return proportional
Definition: ocrblock.h:71
int16_t kern() const
return kerning
Definition: ocrblock.h:85
int16_t space() const
return spacing
Definition: ocrblock.h:93
ICOORDELT_LIST rightside
right side vertices
Definition: pdblock.h:111
POLY_BLOCK * hand_poly
weird as well
Definition: pdblock.h:109
ICOORDELT_LIST leftside
left side vertices
Definition: pdblock.h:110

◆ ~BLOCK()

tesseract::BLOCK::~BLOCK ( )
default

Member Function Documentation

◆ blob_list()

C_BLOB_LIST* tesseract::BLOCK::blob_list ( )
inline

get blobs

Definition at line 123 of file ocrblock.h.

123  {
124  return &c_blobs;
125  }

◆ cell_over_xheight()

float tesseract::BLOCK::cell_over_xheight ( ) const
inline

Definition at line 104 of file ocrblock.h.

104  {
105  return cell_over_xheight_;
106  }

◆ check_pitch()

void tesseract::BLOCK::check_pitch ( )

check proportional

BLOCK::check_pitch

Check whether the block is fixed or prop, set the flag, and set the pitch if it is fixed.

Definition at line 164 of file ocrblock.cpp.

164  { // check prop
165  // tprintf("Missing FFT fixed pitch stuff!\n");
166  pitch = -1;
167 }

◆ classify_rotation()

FCOORD tesseract::BLOCK::classify_rotation ( ) const
inline

Definition at line 135 of file ocrblock.h.

135  {
136  return classify_rotation_; // Apply this before classifying.
137  }

◆ compress() [1/2]

void tesseract::BLOCK::compress ( )

shrink white space

BLOCK::compress

Delete space between the rows. (And maybe one day, compress the rows) Fill space of block from top down, left aligning rows.

Definition at line 128 of file ocrblock.cpp.

128  { // squash it up
129 #define ROW_SPACING 5
130 
131  ROW_IT row_it(&rows);
132  ROW *row;
133  ICOORD row_spacing(0, ROW_SPACING);
134 
135  ICOORDELT_IT icoordelt_it;
136 
137  sort_rows();
138 
141  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
142  row = row_it.data();
143  row->move(pdblk.box.botleft() - row_spacing - row->bounding_box().topleft());
144  pdblk.box += row->bounding_box();
145  }
146 
147  pdblk.leftside.clear();
148  icoordelt_it.set_to_list(&pdblk.leftside);
149  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.bottom()));
150  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.top()));
151  pdblk.rightside.clear();
152  icoordelt_it.set_to_list(&pdblk.rightside);
153  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.bottom()));
154  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.top()));
155 }
#define ROW_SPACING
@ TBOX
void sort_rows()
decreasing y order
Definition: ocrblock.cpp:115
TBOX box
bounding box
Definition: pdblock.h:112
TDimension left() const
Definition: rect.h:82
const ICOORD & botleft() const
Definition: rect.h:102
void move_bottom_edge(const TDimension y)
Definition: rect.h:150
TDimension top() const
Definition: rect.h:68
ICOORD topleft() const
Definition: rect.h:110
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75

◆ compress() [2/2]

void tesseract::BLOCK::compress ( const ICOORD  vec)

shrink white space and move by vector

BLOCK::compress

Compress and move in a single operation.

Definition at line 175 of file ocrblock.cpp.

177  {
178  pdblk.box.move(vec);
179  compress();
180 }
void compress()
shrink white space
Definition: ocrblock.cpp:128
void move(const ICOORD vec)
Definition: rect.h:170

◆ compute_row_margins()

void tesseract::BLOCK::compute_row_margins ( )

Definition at line 330 of file ocrblock.cpp.

330  {
331  if (row_list()->empty() || row_list()->singleton()) {
332  return;
333  }
334 
335  // If Layout analysis was not called, default to this.
336  POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT);
337  POLY_BLOCK *pblock = &rect_block;
338  if (pdblk.poly_block() != nullptr) {
339  pblock = pdblk.poly_block();
340  }
341 
342  // Step One: Determine if there is a drop-cap.
343  // TODO(eger): Fix up drop cap code for RTL languages.
344  ROW_IT r_it(row_list());
345  ROW *first_row = r_it.data();
346  ROW *second_row = r_it.data_relative(1);
347 
348  // initialize the bottom of a fictitious drop cap far above the first line.
349  int drop_cap_bottom = first_row->bounding_box().top() + first_row->bounding_box().height();
350  int drop_cap_right = first_row->bounding_box().left();
351  int mid_second_line = second_row->bounding_box().top() - second_row->bounding_box().height() / 2;
352  WERD_IT werd_it(r_it.data()->word_list()); // words of line one
353  if (!werd_it.empty()) {
354  C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
355  for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
356  TBOX bbox = cblob_it.data()->bounding_box();
357  if (bbox.bottom() <= mid_second_line) {
358  // we found a real drop cap
359  first_row->set_has_drop_cap(true);
360  if (drop_cap_bottom > bbox.bottom()) {
361  drop_cap_bottom = bbox.bottom();
362  }
363  if (drop_cap_right < bbox.right()) {
364  drop_cap_right = bbox.right();
365  }
366  }
367  }
368  }
369 
370  // Step Two: Calculate the margin from the text of each row to the block
371  // (or drop-cap) boundaries.
372  PB_LINE_IT lines(pblock);
373  r_it.set_to_list(row_list());
374  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
375  ROW *row = r_it.data();
376  TBOX row_box = row->bounding_box();
377  int left_y = row->base_line(row_box.left()) + row->x_height();
378  int left_margin;
379  const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y));
380  LeftMargin(segments_left.get(), row_box.left(), &left_margin);
381 
382  if (row_box.top() >= drop_cap_bottom) {
383  int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
384  if (drop_cap_distance < 0) {
385  drop_cap_distance = 0;
386  }
387  if (drop_cap_distance < left_margin) {
388  left_margin = drop_cap_distance;
389  }
390  }
391 
392  int right_y = row->base_line(row_box.right()) + row->x_height();
393  int right_margin;
394  const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y));
395  RightMargin(segments_right.get(), row_box.right(), &right_margin);
396  row->set_lmargin(left_margin);
397  row->set_rmargin(right_margin);
398  }
399 }
@ PT_FLOWING_TEXT
Definition: publictypes.h:55
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:111
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67

◆ fixed_pitch()

int32_t tesseract::BLOCK::fixed_pitch ( ) const
inline

return pitch

Definition at line 81 of file ocrblock.h.

81  {
82  return pitch;
83  }

◆ font()

int16_t tesseract::BLOCK::font ( ) const
inline

return font class

Definition at line 89 of file ocrblock.h.

89  {
90  return font_class;
91  }

◆ kern()

int16_t tesseract::BLOCK::kern ( ) const
inline

return kerning

Definition at line 85 of file ocrblock.h.

85  {
86  return kerning;
87  }

◆ median_size()

const ICOORD& tesseract::BLOCK::median_size ( ) const
inline

Definition at line 147 of file ocrblock.h.

147  {
148  return median_size_;
149  }

◆ name()

const char* tesseract::BLOCK::name ( ) const
inline

return filename

Definition at line 97 of file ocrblock.h.

97  {
98  return filename.c_str();
99  }

◆ operator=()

BLOCK & tesseract::BLOCK::operator= ( const BLOCK source)

BLOCK::operator=

Assignment - duplicate the block structure, but with an EMPTY row list.

Definition at line 222 of file ocrblock.cpp.

224  {
225  this->ELIST_LINK::operator=(source);
226  pdblk = source.pdblk;
227  proportional = source.proportional;
228  kerning = source.kerning;
229  spacing = source.spacing;
230  filename = source.filename; // STRINGs assign ok
231  if (!rows.empty()) {
232  rows.clear();
233  }
234  re_rotation_ = source.re_rotation_;
235  classify_rotation_ = source.classify_rotation_;
236  skew_ = source.skew_;
237  return *this;
238 }
void operator=(const ELIST_LINK &)
Definition: elst.h:100

◆ para_list()

PARA_LIST* tesseract::BLOCK::para_list ( )
inline

Definition at line 119 of file ocrblock.h.

119  {
120  return &paras_;
121  }

◆ print()

void tesseract::BLOCK::print ( FILE *  fp,
bool  dump 
)

dump whole table

BLOCK::print

Print the info on a block

Parameters
fpfile to print on
dumpprint full detail

Definition at line 188 of file ocrblock.cpp.

191  {
192  ICOORDELT_IT it = &pdblk.leftside; // iterator
193 
194  pdblk.box.print();
195  tprintf("Proportional= %s\n", proportional ? "TRUE" : "FALSE");
196  tprintf("Kerning= %d\n", kerning);
197  tprintf("Spacing= %d\n", spacing);
198  tprintf("Fixed_pitch=%d\n", pitch);
199  tprintf("Filename= %s\n", filename.c_str());
200 
201  if (dump) {
202  tprintf("Left side coords are:\n");
203  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
204  tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
205  }
206  tprintf("\n");
207  tprintf("Right side coords are:\n");
208  it.set_to_list(&pdblk.rightside);
209  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
210  tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
211  }
212  tprintf("\n");
213  }
214 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void print() const
Definition: rect.h:289

◆ prop()

bool tesseract::BLOCK::prop ( ) const
inline

return proportional

Definition at line 71 of file ocrblock.h.

71  {
72  return proportional;
73  }

◆ re_rotation()

FCOORD tesseract::BLOCK::re_rotation ( ) const
inline

Definition at line 129 of file ocrblock.h.

129  {
130  return re_rotation_; // How to transform coords back to image.
131  }

◆ reflect_polygon_in_y_axis()

void tesseract::BLOCK::reflect_polygon_in_y_axis ( )

BLOCK::reflect_polygon_in_y_axis

Reflects the polygon in the y-axis and recompute the bounding_box. Does nothing to any contained rows/words/blobs etc.

Definition at line 104 of file ocrblock.cpp.

104  {
107 }
TBOX * bounding_box()
Definition: polyblk.h:38

◆ reject_blobs()

C_BLOB_LIST* tesseract::BLOCK::reject_blobs ( )
inline

Definition at line 126 of file ocrblock.h.

126  {
127  return &rej_blobs;
128  }

◆ render_mask()

Image tesseract::BLOCK::render_mask ( TBOX mask_box)
inline

Definition at line 155 of file ocrblock.h.

155  {
156  return pdblk.render_mask(re_rotation_, mask_box);
157  }
Image render_mask(const FCOORD &rerotation, TBOX *mask_box)
Definition: pdblock.cpp:137

◆ restricted_bounding_box()

TBOX tesseract::BLOCK::restricted_bounding_box ( bool  upper_dots,
bool  lower_dots 
) const

Definition at line 88 of file ocrblock.cpp.

88  {
89  TBOX box;
90  // This is a read-only iteration of the rows in the block.
91  ROW_IT it(const_cast<ROW_LIST *>(&rows));
92  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
93  box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
94  }
95  return box;
96 }

◆ right_to_left()

bool tesseract::BLOCK::right_to_left ( ) const
inline

Definition at line 74 of file ocrblock.h.

74  {
75  return right_to_left_;
76  }

◆ rotate()

void tesseract::BLOCK::rotate ( const FCOORD rotation)

BLOCK::rotate

Rotate the polygon by the given rotation and recompute the bounding_box.

Definition at line 81 of file ocrblock.cpp.

81  {
82  pdblk.poly_block()->rotate(rotation);
84 }
void rotate(FCOORD rotation)
Definition: polyblk.cpp:191

◆ row_list()

ROW_LIST* tesseract::BLOCK::row_list ( )
inline

get rows

Definition at line 111 of file ocrblock.h.

111  {
112  return &rows;
113  }

◆ set_cell_over_xheight()

void tesseract::BLOCK::set_cell_over_xheight ( float  ratio)
inline

Definition at line 107 of file ocrblock.h.

107  {
108  cell_over_xheight_ = ratio;
109  }

◆ set_classify_rotation()

void tesseract::BLOCK::set_classify_rotation ( const FCOORD rotation)
inline

Definition at line 138 of file ocrblock.h.

138  {
139  classify_rotation_ = rotation;
140  }

◆ set_font_class()

void tesseract::BLOCK::set_font_class ( int16_t  font)
inline

set font class

Definition at line 67 of file ocrblock.h.

67  {
68  font_class = font;
69  }
int16_t font() const
return font class
Definition: ocrblock.h:89

◆ set_median_size()

void tesseract::BLOCK::set_median_size ( int  x,
int  y 
)
inline

Definition at line 150 of file ocrblock.h.

150  {
151  median_size_.set_x(x);
152  median_size_.set_y(y);
153  }
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
void set_y(TDimension yin)
rewrite function
Definition: points.h:71

◆ set_re_rotation()

void tesseract::BLOCK::set_re_rotation ( const FCOORD rotation)
inline

Definition at line 132 of file ocrblock.h.

132  {
133  re_rotation_ = rotation;
134  }

◆ set_right_to_left()

void tesseract::BLOCK::set_right_to_left ( bool  value)
inline

Definition at line 77 of file ocrblock.h.

77  {
78  right_to_left_ = value;
79  }

◆ set_skew()

void tesseract::BLOCK::set_skew ( const FCOORD skew)
inline

Definition at line 144 of file ocrblock.h.

144  {
145  skew_ = skew;
146  }
FCOORD skew() const
Definition: ocrblock.h:141

◆ set_stats()

void tesseract::BLOCK::set_stats ( bool  prop,
int16_t  kern,
int16_t  space,
int16_t  ch_pitch 
)
inline

set space size etc.

Parameters
propproportional
kerninter char size
spaceinter word size
ch_pitchpitch if fixed

Definition at line 56 of file ocrblock.h.

56  {
57  proportional = prop;
58  kerning = static_cast<int8_t>(kern);
59  spacing = space;
60  pitch = ch_pitch;
61  }

◆ set_xheight()

void tesseract::BLOCK::set_xheight ( int32_t  height)
inline

set char size

Definition at line 63 of file ocrblock.h.

63  {
64  xheight = height;
65  }

◆ skew()

FCOORD tesseract::BLOCK::skew ( ) const
inline

Definition at line 141 of file ocrblock.h.

141  {
142  return skew_; // Direction of true horizontal.
143  }

◆ sort_rows()

void tesseract::BLOCK::sort_rows ( )

decreasing y order

BLOCK::sort_rows

Order rows so that they are in order of decreasing Y coordinate

Definition at line 115 of file ocrblock.cpp.

115  { // order on "top"
116  ROW_IT row_it(&rows);
117 
118  row_it.sort(decreasing_top_order);
119 }

◆ space()

int16_t tesseract::BLOCK::space ( ) const
inline

return spacing

Definition at line 93 of file ocrblock.h.

93  {
94  return spacing;
95  }

◆ x_height()

int32_t tesseract::BLOCK::x_height ( ) const
inline

return xheight

Definition at line 101 of file ocrblock.h.

101  {
102  return xheight;
103  }

Friends And Related Function Documentation

◆ BLOCK_RECT_IT

friend class BLOCK_RECT_IT
friend

Definition at line 35 of file ocrblock.h.

Member Data Documentation

◆ pdblk

PDBLK tesseract::BLOCK::pdblk

Page Description Block.

Definition at line 185 of file ocrblock.h.


The documentation for this class was generated from the following files: