tesseract  5.0.0
indexmapbidi.cpp
Go to the documentation of this file.
1 // File: indexmapbidi.cpp
3 // Description: Bi-directional mapping between a sparse and compact space.
4 // Author: rays@google.com (Ray Smith)
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "helpers.h"
20 #include "indexmapbidi.h"
21 #include "serialis.h"
22 
23 namespace tesseract {
24 
25 // Destructor.
26 // It is defined here, so the compiler can create a single vtable
27 // instead of weak vtables in every compilation unit.
28 IndexMap::~IndexMap() = default;
29 
30 // SparseToCompact takes a sparse index to an index in the compact space.
31 // Uses a binary search to find the result. For faster speed use
32 // IndexMapBiDi, but that takes more memory.
33 int IndexMap::SparseToCompact(int sparse_index) const {
34  auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index);
35  if (pos > compact_map_.begin()) {
36  --pos;
37  }
38  auto result = pos - compact_map_.begin();
39  return compact_map_[result] == sparse_index ? result : -1;
40 }
41 
42 // Copy from the input.
43 void IndexMap::CopyFrom(const IndexMap &src) {
46 }
47 void IndexMap::CopyFrom(const IndexMapBiDi &src) {
48  sparse_size_ = src.SparseSize();
50 }
51 
52 // Writes to the given file. Returns false in case of error.
53 bool IndexMap::Serialize(FILE *fp) const {
55 }
56 
57 // Reads from the given file. Returns false in case of error.
58 // If swap is true, assumes a big/little-endian swap is needed.
59 bool IndexMap::DeSerialize(bool swap, FILE *fp) {
60  uint32_t sparse_size;
61  if (!tesseract::DeSerialize(fp, &sparse_size)) {
62  return false;
63  }
64  if (swap) {
65  ReverseN(&sparse_size, sizeof(sparse_size));
66  }
67  // Arbitrarily limit the number of elements to protect against bad data.
68  if (sparse_size > UINT16_MAX) {
69  return false;
70  }
71  sparse_size_ = sparse_size;
72  return tesseract::DeSerialize(swap, fp, compact_map_);
73 }
74 
75 // Destructor.
76 // It is defined here, so the compiler can create a single vtable
77 // instead of weak vtables in every compilation unit.
78 IndexMapBiDi::~IndexMapBiDi() = default;
79 
80 // Top-level init function in a single call to initialize a map to select
81 // a single contiguous subrange [start, end) of the sparse space to be mapped
82 // 1 to 1 to the compact space, with all other elements of the sparse space
83 // left unmapped.
84 // No need to call Setup after this.
85 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
86  Init(sparse_size, false);
87  for (int i = start; i < end; ++i) {
88  SetMap(i, true);
89  }
90  Setup();
91 }
92 
93 // Initializes just the sparse_map_ to the given size with either all
94 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
95 // Call Setup immediately after, or make calls to SetMap first to adjust the
96 // mapping and then call Setup before using the map.
97 void IndexMapBiDi::Init(int size, bool all_mapped) {
98  if (!all_mapped) {
99  sparse_map_.clear();
100  }
101  sparse_map_.resize(size, -1);
102  if (all_mapped) {
103  for (int i = 0; i < size; ++i) {
104  sparse_map_[i] = i;
105  }
106  }
107 }
108 
109 // Sets a given index in the sparse_map_ to be mapped or not.
110 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
111  sparse_map_[sparse_index] = mapped ? 0 : -1;
112 }
113 
114 // Sets up the sparse_map_ and compact_map_ properly after Init and
115 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
116 // in the forward map to the compact space.
118  int compact_size = 0;
119  for (int &i : sparse_map_) {
120  if (i >= 0) {
121  i = compact_size++;
122  }
123  }
124  compact_map_.clear();
125  compact_map_.resize(compact_size, -1);
126  for (size_t i = 0; i < sparse_map_.size(); ++i) {
127  if (sparse_map_[i] >= 0) {
128  compact_map_[sparse_map_[i]] = i;
129  }
130  }
131  sparse_size_ = sparse_map_.size();
132 }
133 
134 // Copy from the input.
136  sparse_map_ = src.sparse_map_;
138  sparse_size_ = sparse_map_.size();
139 }
140 
141 // Merges the two compact space indices. May be called many times, but
142 // the merges must be concluded by a call to CompleteMerges.
143 // Returns true if a merge was actually performed.
144 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
145  // Find the current master index for index1 and index2.
146  compact_index1 = MasterCompactIndex(compact_index1);
147  compact_index2 = MasterCompactIndex(compact_index2);
148  // Be sure that index1 < index2.
149  if (compact_index1 > compact_index2) {
150  int tmp = compact_index1;
151  compact_index1 = compact_index2;
152  compact_index2 = tmp;
153  } else if (compact_index1 == compact_index2) {
154  return false;
155  }
156  // To save iterating over all sparse_map_ entries, simply make the master
157  // entry for index2 point to index1.
158  // This leaves behind a potential chain of parents that needs to be chased,
159  // as above.
160  sparse_map_[compact_map_[compact_index2]] = compact_index1;
161  if (compact_index1 >= 0) {
162  compact_map_[compact_index2] = compact_map_[compact_index1];
163  }
164  return true;
165 }
166 
167 // Completes one or more Merge operations by further compacting the
168 // compact space. Unused compact space indices are removed, and the used
169 // ones above shuffled down to fill the gaps.
170 // Example:
171 // Input sparse_map_: (x indicates -1)
172 // x x 0 x 2 x x 4 x 0 x 2 x
173 // Output sparse_map_:
174 // x x 0 x 1 x x 2 x 0 x 1 x
175 // Output compact_map_:
176 // 2 4 7.
178  // Ensure each sparse_map_entry contains a master compact_map_ index.
179  int compact_size = 0;
180  for (int &i : sparse_map_) {
181  int compact_index = MasterCompactIndex(i);
182  i = compact_index;
183  if (compact_index >= compact_size) {
184  compact_size = compact_index + 1;
185  }
186  }
187  // Re-generate the compact_map leaving holes for unused indices.
188  compact_map_.clear();
189  compact_map_.resize(compact_size, -1);
190  for (size_t i = 0; i < sparse_map_.size(); ++i) {
191  if (sparse_map_[i] >= 0) {
192  if (compact_map_[sparse_map_[i]] == -1) {
193  compact_map_[sparse_map_[i]] = i;
194  }
195  }
196  }
197  // Compact the compact_map, leaving tmp_compact_map saying where each
198  // index went to in the compacted map.
199  std::vector<int32_t> tmp_compact_map(compact_size, -1);
200  compact_size = 0;
201  for (size_t i = 0; i < compact_map_.size(); ++i) {
202  if (compact_map_[i] >= 0) {
203  tmp_compact_map[i] = compact_size;
204  compact_map_[compact_size++] = compact_map_[i];
205  }
206  }
207  compact_map_.resize(compact_size);
208  // Now modify the entries in the sparse map to point to the new locations.
209  for (int &i : sparse_map_) {
210  if (i >= 0) {
211  i = tmp_compact_map[i];
212  }
213  }
214 }
215 
216 // Writes to the given file. Returns false in case of error.
217 bool IndexMapBiDi::Serialize(FILE *fp) const {
218  if (!IndexMap::Serialize(fp)) {
219  return false;
220  }
221  // Make a vector containing the rest of the map. If the map is many-to-one
222  // then each additional sparse entry needs to be stored.
223  // Normally we store only the compact map to save space.
224  std::vector<int32_t> remaining_pairs;
225  for (unsigned i = 0; i < sparse_map_.size(); ++i) {
226  if (sparse_map_[i] >= 0 && static_cast<unsigned>(compact_map_[sparse_map_[i]]) != i) {
227  remaining_pairs.push_back(i);
228  remaining_pairs.push_back(sparse_map_[i]);
229  }
230  }
231  return tesseract::Serialize(fp, remaining_pairs);
232 }
233 
234 // Reads from the given file. Returns false in case of error.
235 // If swap is true, assumes a big/little-endian swap is needed.
236 bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
237  if (!IndexMap::DeSerialize(swap, fp)) {
238  return false;
239  }
240  std::vector<int32_t> remaining_pairs;
241  if (!tesseract::DeSerialize(swap, fp, remaining_pairs)) {
242  return false;
243  }
244  sparse_map_.clear();
245  sparse_map_.resize(sparse_size_, -1);
246  for (unsigned i = 0; i < compact_map_.size(); ++i) {
247  sparse_map_[compact_map_[i]] = i;
248  }
249  for (size_t i = 0; i < remaining_pairs.size(); ++i) {
250  int sparse_index = remaining_pairs[i++];
251  sparse_map_[sparse_index] = remaining_pairs[i];
252  }
253  return true;
254 }
255 
256 // Bulk calls to SparseToCompact.
257 // Maps the given array of sparse indices to an array of compact indices.
258 // Assumes the input is sorted. The output indices are sorted and uniqued.
259 // Return value is the number of "missed" features, being features that
260 // don't map to the compact feature space.
261 int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const {
262  compact->clear();
263  int num_features = sparse.size();
264  int missed_features = 0;
265  int prev_good_feature = -1;
266  for (int f = 0; f < num_features; ++f) {
267  int feature = sparse_map_[sparse[f]];
268  if (feature >= 0) {
269  if (feature != prev_good_feature) {
270  compact->push_back(feature);
271  prev_good_feature = feature;
272  }
273  } else {
274  ++missed_features;
275  }
276  }
277  return missed_features;
278 }
279 
280 } // namespace tesseract.
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:189
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:220
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:251
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)
void CopyFrom(const IndexMap &src)
virtual int SparseToCompact(int sparse_index) const
std::vector< int32_t > compact_map_
Definition: indexmapbidi.h:82
void Init(int size, bool all_mapped)
int MapFeatures(const std::vector< int > &sparse, std::vector< int > *compact) const
bool Merge(int compact_index1, int compact_index2)
void CopyFrom(const IndexMapBiDi &src)
void SetMap(int sparse_index, bool mapped)
int SparseSize() const override
Definition: indexmapbidi.h:144
void InitAndSetupRange(int sparse_size, int start, int end)
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)