tesseract  5.0.0
sorthelper.h
Go to the documentation of this file.
1 // File: sorthelper.h
3 // Description: Generic sort and maxfinding class.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_CCUTIL_SORTHELPER_H_
20 #define TESSERACT_CCUTIL_SORTHELPER_H_
21 
22 #include <cstdlib>
23 #include <vector>
24 
25 namespace tesseract {
26 
27 // Generic class to provide functions based on a <value,count> pair.
28 // T is the value type.
29 // The class keeps a count of each value and can return the most frequent
30 // value or a sorted array of the values with counts.
31 // Note that this class uses linear search for adding. It is better
32 // to use the STATS class to get the mode of a large number of values
33 // in a small space. SortHelper is better to get the mode of a small number
34 // of values from a large space.
35 // T must have a copy constructor.
36 template <typename T>
37 class SortHelper {
38 public:
39  // Simple pair class to hold the values and counts.
40  template <typename PairT>
41  struct SortPair {
42  PairT value;
43  int count;
44  };
45  // qsort function to sort by decreasing count.
46  static int SortPairsByCount(const void *v1, const void *v2) {
47  const auto *p1 = static_cast<const SortPair<T> *>(v1);
48  const auto *p2 = static_cast<const SortPair<T> *>(v2);
49  return p2->count - p1->count;
50  }
51  // qsort function to sort by decreasing value.
52  static int SortPairsByValue(const void *v1, const void *v2) {
53  const auto *p1 = static_cast<const SortPair<T> *>(v1);
54  const auto *p2 = static_cast<const SortPair<T> *>(v2);
55  if (p2->value - p1->value < 0) {
56  return -1;
57  }
58  if (p2->value - p1->value > 0) {
59  return 1;
60  }
61  return 0;
62  }
63 
64  // Constructor takes a hint of the array size, but it need not be accurate.
65  explicit SortHelper(int sizehint) {
66  counts_.reserve(sizehint);
67  }
68 
69  // Add a value that may be a duplicate of an existing value.
70  // Uses a linear search.
71  void Add(T value, int count) {
72  // Linear search for value.
73  for (auto &it : counts_) {
74  if (it.value == value) {
75  it.count += count;
76  return;
77  }
78  }
79  SortPair<T> new_pair = {value, count};
80  counts_.push_back(SortPair<T>(new_pair));
81  }
82 
83  // Returns the frequency of the most frequent value.
84  // If max_value is not nullptr, returns the most frequent value.
85  // If the array is empty, returns -INT32_MAX and max_value is unchanged.
86  int MaxCount(T *max_value) const {
87  int best_count = -INT32_MAX;
88  for (auto &it : counts_) {
89  if (it.count > best_count) {
90  best_count = it.count;
91  if (max_value != nullptr) {
92  *max_value = it.value;
93  }
94  }
95  }
96  return best_count;
97  }
98 
99  // Returns the data array sorted by decreasing frequency.
100  const std::vector<SortPair<T>> &SortByCount() {
101  counts_.sort(&SortPairsByCount);
102  return counts_;
103  }
104  // Returns the data array sorted by decreasing value.
105  const std::vector<SortPair<T>> &SortByValue() {
106  counts_.sort(&SortPairsByValue);
107  return counts_;
108  }
109 
110 private:
111  std::vector<SortPair<T>> counts_;
112 };
113 
114 } // namespace tesseract
115 
116 #endif // TESSERACT_CCUTIL_SORTHELPER_H_.
SortHelper(int sizehint)
Definition: sorthelper.h:65
int MaxCount(T *max_value) const
Definition: sorthelper.h:86
void Add(T value, int count)
Definition: sorthelper.h:71
const std::vector< SortPair< T > > & SortByCount()
Definition: sorthelper.h:100
const std::vector< SortPair< T > > & SortByValue()
Definition: sorthelper.h:105
static int SortPairsByCount(const void *v1, const void *v2)
Definition: sorthelper.h:46
static int SortPairsByValue(const void *v1, const void *v2)
Definition: sorthelper.h:52