tesseract  5.0.0
networkio.h
Go to the documentation of this file.
1 // File: networkio.h
3 // Description: Network input/output data, allowing float/int implementations.
4 // Author: Ray Smith
5 // Created: Tue Jun 17 08:43:11 PST 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #ifndef TESSERACT_LSTM_NETWORKIO_H_
20 #define TESSERACT_LSTM_NETWORKIO_H_
21 
22 #include "helpers.h"
23 #include "image.h"
24 #include "static_shape.h"
25 #include "stridemap.h"
26 #include "weightmatrix.h"
27 
28 #include <cmath>
29 #include <cstdio>
30 #include <vector>
31 
32 struct Pix;
33 
34 namespace tesseract {
35 
36 // Class to contain all the input/output of a network, allowing for fixed or
37 // variable-strided 2d to 1d mapping, and float or int8_t values. Provides
38 // enough calculating functions to hide the detail of the implementation.
40 public:
41  NetworkIO() : int_mode_(false) {}
42  // Resizes the array (and stride), avoiding realloc if possible, to the given
43  // size from various size specs:
44  // Same stride size, but given number of features.
45  void Resize(const NetworkIO &src, int num_features) {
46  ResizeToMap(src.int_mode(), src.stride_map(), num_features);
47  }
48  // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
49  void Resize2d(bool int_mode, int width, int num_features);
50  // Resizes forcing a float representation with the stridemap of src and the
51  // given number of features.
52  void ResizeFloat(const NetworkIO &src, int num_features) {
53  ResizeToMap(false, src.stride_map(), num_features);
54  }
55  // Resizes to a specific stride_map.
56  void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features);
57  // Shrinks image size by x_scale,y_scale, and use given number of features.
58  void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features);
59  // Resizes to just 1 x-coord, whatever the input.
60  void ResizeXTo1(const NetworkIO &src, int num_features);
61  // Initialize all the array to zero.
62  void Zero();
63  // Initializes to zero all elements of the array that do not correspond to
64  // valid image positions. (If a batch of different-sized images are packed
65  // together, then there will be padding pixels.)
66  void ZeroInvalidElements();
67  // Sets up the array from the given image, using the currently set int_mode_.
68  // If the image width doesn't match the shape, the image is truncated or
69  // padded with noise to match.
70  void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer);
71  // Sets up the array from the given set of images, using the currently set
72  // int_mode_. If the image width doesn't match the shape, the images are
73  // truncated or padded with noise to match.
74  void FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
75  TRand *randomizer);
76  // Copies the given pix to *this at the given batch index, stretching and
77  // clipping the pixel values so that [black, black + 2*contrast] maps to the
78  // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
79  // This is a 2-d operation in the sense that the output depth is the number
80  // of input channels, the height is the height of the image, and the width
81  // is the width of the image, or truncated/padded with noise if the width
82  // is a fixed size.
83  void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
84  // Copies the given pix to *this at the given batch index, as Copy2DImage
85  // above, except that the output depth is the height of the input image, the
86  // output height is 1, and the output width as for Copy2DImage.
87  // The image is thus treated as a 1-d set of vertical pixel strips.
88  void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
89  // Helper stores the pixel value in i_ or f_ according to int_mode_.
90  // t: is the index from the StrideMap corresponding to the current
91  // [batch,y,x] position
92  // f: is the index into the depth/channel
93  // pixel: the value of the pixel from the image (in one channel)
94  // black: the pixel value to map to the lowest of the range of *this
95  // contrast: the range of pixel values to stretch to half the range of *this.
96  void SetPixel(int t, int f, int pixel, float black, float contrast);
97  // Converts the array to a Pix. Must be pixDestroyed after use.
98  Image ToPix() const;
99  // Prints the first and last num timesteps of the array for each feature.
100  void Print(int num) const;
101 
102  // Returns the timestep width.
103  int Width() const {
104  return int_mode_ ? i_.dim1() : f_.dim1();
105  }
106  // Returns the number of features.
107  int NumFeatures() const {
108  return int_mode_ ? i_.dim2() : f_.dim2();
109  }
110  // Accessor to a timestep of the float matrix.
111  float *f(int t) {
112  ASSERT_HOST(!int_mode_);
113  return f_[t];
114  }
115  const float *f(int t) const {
116  ASSERT_HOST(!int_mode_);
117  return f_[t];
118  }
119  const int8_t *i(int t) const {
120  ASSERT_HOST(int_mode_);
121  return i_[t];
122  }
123  bool int_mode() const {
124  return int_mode_;
125  }
126  void set_int_mode(bool is_quantized) {
127  int_mode_ = is_quantized;
128  }
129  const StrideMap &stride_map() const {
130  return stride_map_;
131  }
132  void set_stride_map(const StrideMap &map) {
133  stride_map_ = map;
134  }
136  return f_;
137  }
139  return &f_;
140  }
141 
142  // Copies a single time step from src.
143  void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t);
144  // Copies a part of single time step from src.
145  void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src,
146  int src_t, int src_offset);
147  // Zeroes a single time step.
148  void ZeroTimeStep(int t) {
149  ZeroTimeStepGeneral(t, 0, NumFeatures());
150  }
151  void ZeroTimeStepGeneral(int t, int offset, int num_features);
152  // Sets the given range to random values.
153  void Randomize(int t, int offset, int num_features, TRand *randomizer);
154 
155  // Helper returns the label and score of the best choice over a range.
156  int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating,
157  float *certainty) const;
158  // Helper returns the rating and certainty of the choice over a range in t.
159  void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating,
160  float *certainty) const;
161  // Returns the index (label) of the best value at the given timestep,
162  // and if not null, sets the score to the log of the corresponding value.
163  int BestLabel(int t, float *score) const {
164  return BestLabel(t, -1, -1, score);
165  }
166  // Returns the index (label) of the best value at the given timestep,
167  // excluding not_this and not_that, and if not null, sets the score to the
168  // log of the corresponding value.
169  int BestLabel(int t, int not_this, int not_that, float *score) const;
170  // Returns the best start position out of range (into which both start and end
171  // must fit) to obtain the highest cumulative score for the given labels.
172  int PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const;
173  // Returns the cumulative score of the given labels starting at start, and
174  // using one label per time-step.
175  TFloat ScoreOfLabels(const std::vector<int> &labels, int start) const;
176  // Helper function sets all the outputs for a single timestep, such that
177  // label has value ok_score, and the other labels share 1 - ok_score.
178  // Assumes float mode.
179  void SetActivations(int t, int label, float ok_score);
180  // Modifies the values, only if needed, so that the given label is
181  // the winner at the given time step t.
182  // Assumes float mode.
183  void EnsureBestLabel(int t, int label);
184  // Helper function converts prob to certainty taking the minimum into account.
185  static float ProbToCertainty(float prob);
186  // Returns true if there is any bad value that is suspiciously like a GT
187  // error. Assuming that *this is the difference(gradient) between target
188  // and forward output, returns true if there is a large negative value
189  // (correcting a very confident output) for which there is no corresponding
190  // positive value in an adjacent timestep for the same feature index. This
191  // allows the box-truthed samples to make fine adjustments to position while
192  // stopping other disagreements of confident output with ground truth.
193  bool AnySuspiciousTruth(float confidence_thr) const;
194 
195  // Reads a single timestep to floats in the range [-1, 1].
196  void ReadTimeStep(int t, TFloat *output) const;
197  // Adds a single timestep to floats.
198  void AddTimeStep(int t, TFloat *inout) const;
199  // Adds part of a single timestep to floats.
200  void AddTimeStepPart(int t, int offset, int num_features, float *inout) const;
201  // Writes a single timestep from floats in the range [-1, 1].
202  void WriteTimeStep(int t, const TFloat *input);
203  // Writes a single timestep from floats in the range [-1, 1] writing only
204  // num_features elements of input to (*this)[t], starting at offset.
205  void WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input);
206  // Maxpools a single time step from src.
207  void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line);
208  // Runs maxpool backward, using maxes to index timesteps in *this.
209  void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY<int> &maxes);
210  // Returns the min over time of the maxes over features of the outputs.
211  float MinOfMaxes() const;
212  // Returns the min over time.
213  float Max() const {
214  return int_mode_ ? i_.Max() : f_.Max();
215  }
216  // Computes combined results for a combiner that chooses between an existing
217  // input and itself, with an additional output to indicate the choice.
218  void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output);
219  // Computes deltas for a combiner that chooses between 2 sets of inputs.
220  void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output);
221 
222  // Copies the array checking that the types match.
223  void CopyAll(const NetworkIO &src);
224  // Adds the array to a float array, with scaling to [-1, 1] if the src is int.
225  void AddAllToFloat(const NetworkIO &src);
226  // Subtracts the array from a float array. src must also be float.
227  void SubtractAllFromFloat(const NetworkIO &src);
228 
229  // Copies src to *this, with maxabs normalization to match scale.
230  void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale);
231  // Multiplies the float data by the given factor.
232  void ScaleFloatBy(float factor) {
233  f_ *= factor;
234  }
235  // Copies src to *this with independent reversal of the y dimension.
236  void CopyWithYReversal(const NetworkIO &src);
237  // Copies src to *this with independent reversal of the x dimension.
238  void CopyWithXReversal(const NetworkIO &src);
239  // Copies src to *this with independent transpose of the x and y dimensions.
240  void CopyWithXYTranspose(const NetworkIO &src);
241  // Copies src to *this, at the given feature_offset, returning the total
242  // feature offset after the copy. Multiple calls will stack outputs from
243  // multiple sources in feature space.
244  int CopyPacking(const NetworkIO &src, int feature_offset);
245  // Opposite of CopyPacking, fills *this with a part of src, starting at
246  // feature_offset, and picking num_features. Resizes *this to match.
247  void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features);
248  // Transposes the float part of *this into dest.
249  void Transpose(TransposedArray *dest) const;
250 
251  // Clips the content of a single time-step to +/-range.
252  void ClipVector(int t, float range);
253 
254  // Applies Func to timestep t of *this (u) and multiplies the result by v
255  // component-wise, putting the product in *product.
256  // *this and v may be int or float, but must match. The outputs are TFloat.
257  template <class Func>
258  void FuncMultiply(const NetworkIO &v_io, int t, TFloat *product) {
259  Func f;
260  ASSERT_HOST(!int_mode_);
261  ASSERT_HOST(!v_io.int_mode_);
262  int dim = f_.dim2();
263  if (int_mode_) {
264  const int8_t *u = i_[t];
265  const int8_t *v = v_io.i_[t];
266  for (int i = 0; i < dim; ++i) {
267  product[i] = f(u[i] / static_cast<TFloat>(INT8_MAX)) * v[i] / INT8_MAX;
268  }
269  } else {
270  const float *u = f_[t];
271  const float *v = v_io.f_[t];
272  for (int i = 0; i < dim; ++i) {
273  product[i] = f(u[i]) * v[i];
274  }
275  }
276  }
277  // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
278  // component-wise, putting the product in *product.
279  // All NetworkIOs are assumed to be float.
280  template <class Func>
281  void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const TFloat *w,
282  TFloat *product) const {
283  ASSERT_HOST(!int_mode_);
284  ASSERT_HOST(!v_io.int_mode_);
285  Func f;
286  const float *u = f_[u_t];
287  const float *v = v_io.f_[v_t];
288  int dim = f_.dim2();
289  for (int i = 0; i < dim; ++i) {
290  product[i] = f(u[i]) * v[i] * w[i];
291  }
292  }
293  // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
294  // component-wise, adding the product to *product.
295  // All NetworkIOs are assumed to be float.
296  template <class Func>
297  void FuncMultiply3Add(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
298  ASSERT_HOST(!int_mode_);
299  ASSERT_HOST(!v_io.int_mode_);
300  Func f;
301  const float *u = f_[t];
302  const float *v = v_io.f_[t];
303  int dim = f_.dim2();
304  for (int i = 0; i < dim; ++i) {
305  product[i] += f(u[i]) * v[i] * w[i];
306  }
307  }
308  // Applies Func1 to *this (u), Func2 to v, and multiplies the result by w,
309  // component-wise, putting the product in product, all at timestep t, except
310  // w, which is a simple array. All NetworkIOs are assumed to be float.
311  template <class Func1, class Func2>
312  void Func2Multiply3(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
313  ASSERT_HOST(!int_mode_);
314  ASSERT_HOST(!v_io.int_mode_);
315  Func1 f;
316  Func2 g;
317  const float *u = f_[t];
318  const float *v = v_io.f_[t];
319  int dim = f_.dim2();
320  for (int i = 0; i < dim; ++i) {
321  product[i] = f(u[i]) * g(v[i]) * w[i];
322  }
323  }
324 
325 private:
326  // Returns the padding required for the given number of features in order
327  // for the SIMD operations to be safe.
328  static int GetPadding(int num_features);
329 
330  // Choice of float vs 8 bit int for data.
333  // Which of f_ and i_ are we actually using.
334  bool int_mode_;
335  // Stride for 2d input data.
336  StrideMap stride_map_;
337 };
338 
339 } // namespace tesseract.
340 
341 #endif // TESSERACT_LSTM_NETWORKIO_H_
#define ASSERT_HOST(x)
Definition: errcode.h:59
double TFloat
Definition: tesstypes.h:39
void ClipVector(int n, T lower, T upper, T *vec)
Definition: functions.h:251
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:45
void FuncMultiply(const NetworkIO &v_io, int t, TFloat *product)
Definition: networkio.h:258
const StrideMap & stride_map() const
Definition: networkio.h:129
bool int_mode() const
Definition: networkio.h:123
void ResizeFloat(const NetworkIO &src, int num_features)
Definition: networkio.h:52
float Max() const
Definition: networkio.h:213
GENERIC_2D_ARRAY< float > * mutable_float_array()
Definition: networkio.h:138
const int8_t * i(int t) const
Definition: networkio.h:119
void FuncMultiply3Add(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const
Definition: networkio.h:297
void set_stride_map(const StrideMap &map)
Definition: networkio.h:132
void ScaleFloatBy(float factor)
Definition: networkio.h:232
float * f(int t)
Definition: networkio.h:111
const GENERIC_2D_ARRAY< float > & float_array() const
Definition: networkio.h:135
int Width() const
Definition: networkio.h:103
void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const TFloat *w, TFloat *product) const
Definition: networkio.h:281
void ZeroTimeStep(int t)
Definition: networkio.h:148
const float * f(int t) const
Definition: networkio.h:115
void Func2Multiply3(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const
Definition: networkio.h:312
void set_int_mode(bool is_quantized)
Definition: networkio.h:126
int NumFeatures() const
Definition: networkio.h:107
int BestLabel(int t, float *score) const
Definition: networkio.h:163
#define TESS_API
Definition: export.h:34