Convert palette from double to float.
About 20% less time spent coding in vp10_k_means().
Change-Id: I5cf7605cde869a269776197bace70de353b07d83
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index d6b1563..2e8af98 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -64,7 +64,7 @@
typedef struct {
uint8_t best_palette_color_map[MAX_SB_SQUARE];
- double kmeans_data_buf[2 * MAX_SB_SQUARE];
+ float kmeans_data_buf[2 * MAX_SB_SQUARE];
uint8_t kmeans_indices_buf[MAX_SB_SQUARE];
uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE];
} PALETTE_BUFFER;
diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c
index d413935..cbc3582 100644
--- a/vp10/encoder/palette.c
+++ b/vp10/encoder/palette.c
@@ -11,20 +11,21 @@
#include <math.h>
#include "vp10/encoder/palette.h"
-static double calc_dist(const double *p1, const double *p2, int dim) {
- double dist = 0;
+static float calc_dist(const float *p1, const float *p2, int dim) {
+ float dist = 0;
int i = 0;
for (i = 0; i < dim; ++i) {
- dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i]));
+ float diff = p1[i] - roundf(p2[i]);
+ dist += diff * diff;
}
return dist;
}
-void vp10_calc_indices(const double *data, const double *centroids,
+void vp10_calc_indices(const float *data, const float *centroids,
uint8_t *indices, int n, int k, int dim) {
int i, j;
- double min_dist, this_dist;
+ float min_dist, this_dist;
for (i = 0; i < n; ++i) {
min_dist = calc_dist(data + i * dim, centroids, dim);
@@ -45,7 +46,7 @@
return *state / 65536 % 32768;
}
-static void calc_centroids(const double *data, double *centroids,
+static void calc_centroids(const float *data, float *centroids,
const uint8_t *indices, int n, int k, int dim) {
int i, j, index;
int count[PALETTE_MAX_SIZE];
@@ -70,16 +71,16 @@
memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
sizeof(centroids[0]) * dim);
} else {
- const double norm = 1.0 / count[i];
+ const float norm = 1.0f / count[i];
for (j = 0; j < dim; ++j)
centroids[i * dim + j] *= norm;
}
}
}
-static double calc_total_dist(const double *data, const double *centroids,
+static float calc_total_dist(const float *data, const float *centroids,
const uint8_t *indices, int n, int k, int dim) {
- double dist = 0;
+ float dist = 0;
int i;
(void) k;
@@ -89,11 +90,11 @@
return dist;
}
-int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
+int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
uint8_t *pre_indices, int n, int k, int dim, int max_itr) {
int i = 0;
- double pre_dist, this_dist;
- double pre_centroids[2 * PALETTE_MAX_SIZE];
+ float pre_dist, this_dist;
+ float pre_centroids[2 * PALETTE_MAX_SIZE];
vp10_calc_indices(data, centroids, indices, n, k, dim);
pre_dist = calc_total_dist(data, centroids, indices, n, k, dim);
@@ -121,9 +122,9 @@
return i;
}
-void vp10_insertion_sort(double *data, int n) {
+void vp10_insertion_sort(float *data, int n) {
int i, j, k;
- double val;
+ float val;
if (n <= 1)
return;
diff --git a/vp10/encoder/palette.h b/vp10/encoder/palette.h
index 124cf74..40d9ef9 100644
--- a/vp10/encoder/palette.h
+++ b/vp10/encoder/palette.h
@@ -17,10 +17,10 @@
extern "C" {
#endif
-void vp10_insertion_sort(double *data, int n);
-void vp10_calc_indices(const double *data, const double *centroids,
+void vp10_insertion_sort(float *data, int n);
+void vp10_calc_indices(const float *data, const float *centroids,
uint8_t *indices, int n, int k, int dim);
-int vp10_k_means(const double *data, double *centroids, uint8_t *indices,
+int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
uint8_t *pre_indices, int n, int k, int dim, int max_itr);
int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols);
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 918ad3e..87836cb 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1774,12 +1774,12 @@
const int max_itr = 50;
int color_ctx, color_idx = 0;
int color_order[PALETTE_MAX_SIZE];
- double *const data = x->palette_buffer->kmeans_data_buf;
+ float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
- double centroids[PALETTE_MAX_SIZE];
+ float centroids[PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[0].color_index_map;
- double lb, ub, val;
+ float lb, ub, val;
MB_MODE_INFO *const mbmi = &mic->mbmi;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -1834,7 +1834,7 @@
n, 1, max_itr);
vp10_insertion_sort(centroids, n);
for (i = 0; i < n; ++i)
- centroids[i] = round(centroids[i]);
+ centroids[i] = roundf(centroids[i]);
// remove duplicates
i = 1;
k = n;
@@ -1854,12 +1854,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.use_highbitdepth)
for (i = 0; i < k; ++i)
- pmi->palette_colors[i] = clip_pixel_highbd((int)round(centroids[i]),
+ pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]),
cpi->common.bit_depth);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
for (i = 0; i < k; ++i)
- pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
+ pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i]));
pmi->palette_size[0] = k;
vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
@@ -3583,12 +3583,12 @@
int color_ctx, color_idx = 0;
int color_order[PALETTE_MAX_SIZE];
int64_t this_sse;
- double lb_u, ub_u, val_u;
- double lb_v, ub_v, val_v;
- double *const data = x->palette_buffer->kmeans_data_buf;
+ float lb_u, ub_u, val_u;
+ float lb_v, ub_v, val_v;
+ float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
- double centroids[2 * PALETTE_MAX_SIZE];
+ float centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@@ -3657,12 +3657,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.use_highbitdepth)
pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
- clip_pixel_highbd(round(centroids[j * 2 + i - 1]),
+ clip_pixel_highbd(roundf(centroids[j * 2 + i - 1]),
cpi->common.bit_depth);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
- clip_pixel(round(centroids[j * 2 + i - 1]));
+ clip_pixel(roundf(centroids[j * 2 + i - 1]));
}
}
for (r = 0; r < rows; ++r)
@@ -7475,9 +7475,9 @@
int src_stride = x->plane[1].src.stride;
const uint8_t *const src_u = x->plane[1].src.buf;
const uint8_t *const src_v = x->plane[2].src.buf;
- double *const data = x->palette_buffer->kmeans_data_buf;
+ float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
- double centroids[2 * PALETTE_MAX_SIZE];
+ float centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map;
int r, c;
#if CONFIG_VP9_HIGHBITDEPTH