tesseract  5.0.0
seam.h
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: seam.h
4  * Author: Mark Seaman, SW Productivity
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 #ifndef SEAM_H
19 #define SEAM_H
20 
21 // Include automatically generated configuration file if running autoconf.
22 #ifdef HAVE_CONFIG_H
23 # include "config_auto.h"
24 #endif
25 
26 #include "blobs.h"
27 #include "split.h"
28 
29 namespace tesseract {
30 
31 using PRIORITY = float; /* PRIORITY */
32 
33 class SEAM {
34 public:
35  // A seam with no splits
36  SEAM(float priority, const TPOINT &location)
37  : priority_(priority), location_(location), num_splits_(0) {}
38  // A seam with a single split point.
39  SEAM(float priority, const TPOINT &location, const SPLIT &split)
40  : priority_(priority), location_(location), num_splits_(1) {
41  splits_[0] = split;
42  }
43  // Default copy constructor, operator= and destructor are OK!
44 
45  // Accessors.
46  float priority() const {
47  return priority_;
48  }
49  void set_priority(float priority) {
50  priority_ = priority;
51  }
52  bool HasAnySplits() const {
53  return num_splits_ > 0;
54  }
55 
56  // Returns the bounding box of all the points in the seam.
57  TBOX bounding_box() const;
58 
59  // Returns true if other can be combined into *this.
60  bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const {
61  int dist = location_.x - other.location_.x;
62  return -max_x_dist < dist && dist < max_x_dist &&
63  num_splits_ + other.num_splits_ <= kMaxNumSplits &&
64  priority_ + other.priority_ < max_total_priority && !OverlappingSplits(other) &&
65  !SharesPosition(other);
66  }
67 
68  // Combines other into *this. Only works if CombinableWith returned true.
69  void CombineWith(const SEAM &other) {
70  priority_ += other.priority_;
71  location_ += other.location_;
72  location_ /= 2;
73 
74  for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
75  splits_[num_splits_++] = other.splits_[s];
76  }
77  }
78 
79  // Returns true if the given blob contains all splits of *this SEAM.
80  bool ContainedByBlob(const TBLOB &blob) const {
81  for (int s = 0; s < num_splits_; ++s) {
82  if (!splits_[s].ContainedByBlob(blob)) {
83  return false;
84  }
85  }
86  return true;
87  }
88 
89  // Returns true if the given EDGEPT is used by this SEAM, checking only
90  // the EDGEPT pointer, not the coordinates.
91  bool UsesPoint(const EDGEPT *point) const {
92  for (int s = 0; s < num_splits_; ++s) {
93  if (splits_[s].UsesPoint(point)) {
94  return true;
95  }
96  }
97  return false;
98  }
99  // Returns true if *this and other share any common point, by coordinates.
100  bool SharesPosition(const SEAM &other) const {
101  for (int s = 0; s < num_splits_; ++s) {
102  for (int t = 0; t < other.num_splits_; ++t) {
103  if (splits_[s].SharesPosition(other.splits_[t])) {
104  return true;
105  }
106  }
107  }
108  return false;
109  }
110  // Returns true if *this and other have any vertically overlapping splits.
111  bool OverlappingSplits(const SEAM &other) const {
112  for (int s = 0; s < num_splits_; ++s) {
113  TBOX split1_box = splits_[s].bounding_box();
114  for (int t = 0; t < other.num_splits_; ++t) {
115  TBOX split2_box = other.splits_[t].bounding_box();
116  if (split1_box.y_overlap(split2_box)) {
117  return true;
118  }
119  }
120  }
121  return false;
122  }
123 
124  // Marks the edgepts used by the seam so the segments made by the cut
125  // never get split further by another seam in the future.
126  void Finalize() {
127  for (int s = 0; s < num_splits_; ++s) {
128  splits_[s].point1->MarkChop();
129  splits_[s].point2->MarkChop();
130  }
131  }
132 
133  // Returns true if the splits in *this SEAM appear OK in the sense that they
134  // do not cross any outlines and do not chop off any ridiculously small
135  // pieces.
136  bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
137 
138  // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
139  // seam, which is about to be inserted at insert_index. Returns false if
140  // any of the computations fails, as this indicates an invalid chop.
141  // widthn_/widthp_ are only changed if modify is true.
142  bool PrepareToInsertSeam(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
143  int insert_index, bool modify);
144  // Computes the widthp_/widthn_ range. Returns false if not all the splits
145  // are accounted for. widthn_/widthp_ are only changed if modify is true.
146  bool FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify);
147 
148  // Splits this blob into two blobs by applying the splits included in
149  // *this SEAM
150  void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const;
151  // Undoes ApplySeam by removing the seam between these two blobs.
152  // Produces one blob as a result, and deletes other_blob.
153  void UndoSeam(TBLOB *blob, TBLOB *other_blob) const;
154 
155  // Prints everything in *this SEAM.
156  void Print(const char *label) const;
157  // Prints a collection of SEAMs.
158  static void PrintSeams(const char *label, const std::vector<SEAM *> &seams);
159 #ifndef GRAPHICS_DISABLED
160  // Draws the seam in the given window.
161  void Mark(ScrollView *window) const;
162 #endif
163 
164  // Break up the blobs in this chain so that they are all independent.
165  // This operation should undo the affect of join_pieces.
166  static void BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
167  int first, int last);
168  // Join a group of base level pieces into a single blob that can then
169  // be classified.
170  static void JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
171  int first, int last);
172 
173  // Hides the seam so the outlines appear not to be cut by it.
174  void Hide() const;
175  // Undoes hide, so the outlines are cut by the seam.
176  void Reveal() const;
177 
178  // Computes and returns, but does not set, the full priority of *this SEAM.
179  // The arguments here are config parameters defined in Wordrec. Add chop_
180  // to the beginning of the name.
181  float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
182  double center_knob, double width_change_knob) const;
183 
184 private:
185  // Maximum number of splits that a SEAM can hold.
186  static const uint8_t kMaxNumSplits = 3;
187  // Priority of this split. Lower is better.
188  float priority_;
189  // Position of the middle of the seam.
190  TPOINT location_;
191  // A range such that all splits in *this SEAM are contained within blobs in
192  // the range [index - widthn_,index + widthp_] where index is the index of
193  // this SEAM in the seams vector.
194  uint8_t widthp_ = 0;
195  uint8_t widthn_ = 0;
196  // Number of splits_ that are used.
197  uint8_t num_splits_;
198  // Set of pairs of points that are the ends of each split in the SEAM.
199  SPLIT splits_[kMaxNumSplits];
200 };
201 
202 void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array);
203 
204 } // namespace tesseract
205 
206 #endif
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:41
LIST last(LIST var_list)
Definition: oldlist.cpp:153
float PRIORITY
Definition: seam.h:31
void start_seam_list(TWERD *word, std::vector< SEAM * > *seam_array)
Definition: seam.cpp:262
TDimension x
Definition: blobs.h:89
void MarkChop()
Definition: blobs.h:187
bool y_overlap(const TBOX &box) const
Definition: rect.h:435
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:106
void CombineWith(const SEAM &other)
Definition: seam.h:69
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:91
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:238
TBOX bounding_box() const
Definition: seam.cpp:33
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:122
void set_priority(float priority)
Definition: seam.h:49
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:204
static void PrintSeams(const char *label, const std::vector< SEAM * > &seams)
Definition: seam.cpp:158
bool HasAnySplits() const
Definition: seam.h:52
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.h:60
float priority() const
Definition: seam.h:46
bool SharesPosition(const SEAM &other) const
Definition: seam.h:100
bool FindBlobWidth(const std::vector< TBLOB * > &blobs, int index, bool modify)
Definition: seam.cpp:74
bool PrepareToInsertSeam(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:54
void Mark(ScrollView *window) const
Definition: seam.cpp:171
SEAM(float priority, const TPOINT &location)
Definition: seam.h:36
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:44
void Reveal() const
Definition: seam.cpp:231
void Finalize()
Definition: seam.h:126
void Hide() const
Definition: seam.cpp:224
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:181
void Print(const char *label) const
Definition: seam.cpp:144
bool OverlappingSplits(const SEAM &other) const
Definition: seam.h:111
bool ContainedByBlob(const TBLOB &blob) const
Definition: seam.h:80
SEAM(float priority, const TPOINT &location, const SPLIT &split)
Definition: seam.h:39
TBOX bounding_box() const
Definition: split.h:38
EDGEPT * point2
Definition: split.h:107
EDGEPT * point1
Definition: split.h:106