43 STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
44 if (max_bucket_value_plus_1 <= min_bucket_value) {
46 max_bucket_value_plus_1 = 1;
48 rangemin_ = min_bucket_value;
49 rangemax_ = max_bucket_value_plus_1;
50 buckets_ =
new int32_t[rangemax_ - rangemin_];
60 if (max_bucket_value_plus_1 <= min_bucket_value) {
63 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
65 buckets_ =
new int32_t[max_bucket_value_plus_1 - min_bucket_value];
67 rangemin_ = min_bucket_value;
68 rangemax_ = max_bucket_value_plus_1;
80 if (buckets_ !=
nullptr) {
81 memset(buckets_, 0, (rangemax_ - rangemin_) *
sizeof(buckets_[0]));
100 if (buckets_ ==
nullptr) {
103 value =
ClipToRange(value, rangemin_, rangemax_ - 1);
104 buckets_[value - rangemin_] += count;
105 total_count_ += count;
114 if (buckets_ ==
nullptr) {
117 int32_t max = buckets_[0];
118 int32_t maxindex = 0;
119 for (
int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
120 if (buckets_[index] > max) {
121 max = buckets_[index];
125 return maxindex + rangemin_;
134 if (buckets_ ==
nullptr || total_count_ <= 0) {
135 return static_cast<double>(rangemin_);
138 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
139 sum +=
static_cast<int64_t
>(index) * buckets_[index];
141 return static_cast<double>(sum) / total_count_ + rangemin_;
150 if (buckets_ ==
nullptr || total_count_ <= 0) {
155 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
156 sum +=
static_cast<int64_t
>(index) * buckets_[index];
157 sqsum +=
static_cast<double>(index) * index * buckets_[index];
159 double variance =
static_cast<double>(sum) / total_count_;
160 variance = sqsum / total_count_ - variance * variance;
161 if (variance > 0.0) {
162 return sqrt(variance);
174 if (buckets_ ==
nullptr || total_count_ == 0) {
175 return static_cast<double>(rangemin_);
184 double target = frac * total_count_;
185 target =
ClipToRange(target, 1.0,
static_cast<double>(total_count_));
189 for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
194 return rangemin_ + index -
static_cast<double>(sum - target) / buckets_[index - 1];
196 return static_cast<double>(rangemin_);
206 if (buckets_ ==
nullptr || total_count_ == 0) {
210 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
213 return rangemin_ + min;
223 if (buckets_ ==
nullptr || total_count_ == 0) {
227 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
230 return rangemin_ + max;
243 if (buckets_ ==
nullptr) {
244 return static_cast<double>(rangemin_);
247 int median_pile =
static_cast<int>(floor(
median));
248 if ((total_count_ > 1) && (
pile_count(median_pile) == 0)) {
252 for (min_pile = median_pile;
pile_count(min_pile) == 0; min_pile--) {
256 for (max_pile = median_pile;
pile_count(max_pile) == 0; max_pile++) {
259 median = (min_pile + max_pile) / 2.0;
270 if (buckets_ ==
nullptr) {
273 x =
ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
274 if (buckets_[x] == 0) {
278 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) {
281 if (index >= 0 && buckets_[index] < buckets_[x]) {
284 for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
287 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
303 if (buckets_ ==
nullptr || factor < 2) {
306 STATS result(rangemin_, rangemax_);
307 int entrycount = rangemax_ - rangemin_;
308 for (
int entry = 0; entry < entrycount; entry++) {
310 int count = buckets_[entry] * factor;
311 for (
int offset = 1; offset < factor; offset++) {
312 if (entry - offset >= 0) {
313 count += buckets_[entry - offset] * (factor - offset);
315 if (entry + offset < entrycount) {
316 count += buckets_[entry + offset] * (factor - offset);
319 result.
add(entry + rangemin_, count);
321 total_count_ = result.total_count_;
322 memcpy(buckets_, result.buckets_, entrycount *
sizeof(buckets_[0]));
338 int32_t max_clusters,
344 int32_t best_cluster;
345 int32_t new_centre = 0;
350 int32_t cluster_count;
352 if (buckets_ ==
nullptr || max_clusters < 1) {
355 centres =
new float[max_clusters + 1];
356 for (cluster_count = 1;
357 cluster_count <= max_clusters && clusters[cluster_count].buckets_ !=
nullptr &&
358 clusters[cluster_count].total_count_ > 0;
360 centres[cluster_count] =
static_cast<float>(clusters[cluster_count].
ile(0.5));
361 new_centre = clusters[cluster_count].
mode();
362 for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
367 clusters[cluster_count].
add(entry, count);
368 clusters[0].
add(entry, count);
371 for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
376 clusters[cluster_count].
add(entry, count);
377 clusters[0].
add(entry, count);
383 if (cluster_count == 0) {
384 clusters[0].
set_range(rangemin_, rangemax_);
389 for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
390 count = buckets_[entry] - clusters[0].buckets_[entry];
393 min_dist =
static_cast<float>(INT32_MAX);
396 dist = entry + rangemin_ - centres[
cluster];
401 if (dist < min_dist) {
407 && (best_cluster == 0 || entry + rangemin_ > centres[best_cluster] * multiple ||
408 entry + rangemin_ < centres[best_cluster] / multiple)) {
409 if (count > new_mode) {
411 new_centre = entry + rangemin_;
417 if (new_mode > 0 && cluster_count < max_clusters) {
420 if (!clusters[cluster_count].
set_range(rangemin_, rangemax_)) {
424 centres[cluster_count] =
static_cast<float>(new_centre);
425 clusters[cluster_count].
add(new_centre, new_mode);
426 clusters[0].
add(new_centre, new_mode);
427 for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
432 clusters[cluster_count].
add(entry, count);
433 clusters[0].
add(entry, count);
436 for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
441 clusters[cluster_count].
add(entry, count);
442 clusters[0].
add(entry, count);
445 centres[cluster_count] =
static_cast<float>(clusters[cluster_count].
ile(0.5));
447 }
while (new_cluster && cluster_count < max_clusters);
449 return cluster_count;
458 static bool GatherPeak(
int index,
const int *src_buckets,
int *used_buckets,
int *prev_count,
459 int *total_count,
double *total_value) {
460 int pile_count = src_buckets[index] - used_buckets[index];
461 if (pile_count <= *prev_count && pile_count > 0) {
463 *total_count += pile_count;
464 *total_value += index * pile_count;
466 used_buckets[index] = src_buckets[index];
467 *prev_count = pile_count;
482 if (max_modes <= 0) {
485 int src_count = rangemax_ - rangemin_;
487 STATS used(rangemin_, rangemax_);
497 for (
int src_index = 0; src_index < src_count; src_index++) {
498 int pile_count = buckets_[src_index] - used.buckets_[src_index];
501 max_index = src_index;
506 used.buckets_[max_index] = max_count;
508 double total_value = max_index * max_count;
509 int total_count = max_count;
510 int prev_pile = max_count;
511 for (
int offset = 1; max_index + offset < src_count; ++offset) {
512 if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, &total_count,
517 prev_pile = buckets_[max_index];
518 for (
int offset = 1; max_index - offset >= 0; ++offset) {
519 if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, &total_count,
524 if (total_count > least_count || modes.size() <
static_cast<size_t>(max_modes)) {
526 if (modes.size() ==
static_cast<size_t>(max_modes)) {
527 modes.resize(max_modes - 1);
529 size_t target_index = 0;
531 while (target_index < modes.size() && modes[target_index].data() >= total_count) {
534 auto peak_mean =
static_cast<float>(total_value / total_count + rangemin_);
536 least_count = modes.back().data();
539 }
while (max_count > 0);
549 if (buckets_ ==
nullptr) {
556 for (
int index = min; index <= max; index++) {
557 if (buckets_[index] != 0) {
558 tprintf(
"%4d:%-3d ", rangemin_ + index, buckets_[index]);
559 if (++num_printed % 8 == 0) {
574 if (buckets_ ==
nullptr) {
579 tprintf(
"Total count=%d\n", total_count_);
580 tprintf(
"Min=%.2f Really=%d\n",
ile(0.0), min);
584 tprintf(
"Max=%.2f Really=%d\n",
ile(1.0), max);
585 tprintf(
"Range=%d\n", max + 1 - min);
596 #ifndef GRAPHICS_DISABLED
603 if (buckets_ ==
nullptr) {
608 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
609 window->
Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
610 yorigin + yscale * buckets_[index]);
621 #ifndef GRAPHICS_DISABLED
628 if (buckets_ ==
nullptr) {
632 window->
SetCursor(xorigin, yorigin + yscale * buckets_[0]);
633 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
634 window->
DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
void tprintf(const char *format,...)
int IntCastRounded(double x)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void print_summary() const
void add(int32_t value, int32_t count)
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
int32_t pile_count(int32_t value) const
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
int32_t min_bucket() const
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
bool local_min(int32_t x) const
void smooth(int32_t factor)
int32_t max_bucket() const
int top_n_modes(int max_modes, std::vector< KDPairInc< float, int >> &modes) const
double ile(double frac) const
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
void Rectangle(int x1, int y1, int x2, int y2)
void SetCursor(int x, int y)
void DrawTo(int x, int y)