Convert palette from double to float. About 20% less time spent coding in vp10_k_means(). Change-Id: I5cf7605cde869a269776197bace70de353b07d83
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index d6b1563..2e8af98 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h
@@ -64,7 +64,7 @@ typedef struct { uint8_t best_palette_color_map[MAX_SB_SQUARE]; - double kmeans_data_buf[2 * MAX_SB_SQUARE]; + float kmeans_data_buf[2 * MAX_SB_SQUARE]; uint8_t kmeans_indices_buf[MAX_SB_SQUARE]; uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE]; } PALETTE_BUFFER;
diff --git a/vp10/encoder/palette.c b/vp10/encoder/palette.c index d413935..cbc3582 100644 --- a/vp10/encoder/palette.c +++ b/vp10/encoder/palette.c
@@ -11,20 +11,21 @@ #include <math.h> #include "vp10/encoder/palette.h" -static double calc_dist(const double *p1, const double *p2, int dim) { - double dist = 0; +static float calc_dist(const float *p1, const float *p2, int dim) { + float dist = 0; int i = 0; for (i = 0; i < dim; ++i) { - dist = dist + (p1[i] - round(p2[i])) * (p1[i] - round(p2[i])); + float diff = p1[i] - roundf(p2[i]); + dist += diff * diff; } return dist; } -void vp10_calc_indices(const double *data, const double *centroids, +void vp10_calc_indices(const float *data, const float *centroids, uint8_t *indices, int n, int k, int dim) { int i, j; - double min_dist, this_dist; + float min_dist, this_dist; for (i = 0; i < n; ++i) { min_dist = calc_dist(data + i * dim, centroids, dim); @@ -45,7 +46,7 @@ return *state / 65536 % 32768; } -static void calc_centroids(const double *data, double *centroids, +static void calc_centroids(const float *data, float *centroids, const uint8_t *indices, int n, int k, int dim) { int i, j, index; int count[PALETTE_MAX_SIZE]; @@ -70,16 +71,16 @@ memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim, sizeof(centroids[0]) * dim); } else { - const double norm = 1.0 / count[i]; + const float norm = 1.0f / count[i]; for (j = 0; j < dim; ++j) centroids[i * dim + j] *= norm; } } } -static double calc_total_dist(const double *data, const double *centroids, +static float calc_total_dist(const float *data, const float *centroids, const uint8_t *indices, int n, int k, int dim) { - double dist = 0; + float dist = 0; int i; (void) k; @@ -89,11 +90,11 @@ return dist; } -int vp10_k_means(const double *data, double *centroids, uint8_t *indices, +int vp10_k_means(const float *data, float *centroids, uint8_t *indices, uint8_t *pre_indices, int n, int k, int dim, int max_itr) { int i = 0; - double pre_dist, this_dist; - double pre_centroids[2 * PALETTE_MAX_SIZE]; + float pre_dist, this_dist; + float pre_centroids[2 * PALETTE_MAX_SIZE]; vp10_calc_indices(data, centroids, indices, n, k, dim); pre_dist = calc_total_dist(data, centroids, indices, n, k, dim); @@ -121,9 +122,9 @@ return i; } -void vp10_insertion_sort(double *data, int n) { +void vp10_insertion_sort(float *data, int n) { int i, j, k; - double val; + float val; if (n <= 1) return;
diff --git a/vp10/encoder/palette.h b/vp10/encoder/palette.h index 124cf74..40d9ef9 100644 --- a/vp10/encoder/palette.h +++ b/vp10/encoder/palette.h
@@ -17,10 +17,10 @@ extern "C" { #endif -void vp10_insertion_sort(double *data, int n); -void vp10_calc_indices(const double *data, const double *centroids, +void vp10_insertion_sort(float *data, int n); +void vp10_calc_indices(const float *data, const float *centroids, uint8_t *indices, int n, int k, int dim); -int vp10_k_means(const double *data, double *centroids, uint8_t *indices, +int vp10_k_means(const float *data, float *centroids, uint8_t *indices, uint8_t *pre_indices, int n, int k, int dim, int max_itr); int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols); #if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 918ad3e..87836cb 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c
@@ -1774,12 +1774,12 @@ const int max_itr = 50; int color_ctx, color_idx = 0; int color_order[PALETTE_MAX_SIZE]; - double *const data = x->palette_buffer->kmeans_data_buf; + float *const data = x->palette_buffer->kmeans_data_buf; uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf; - double centroids[PALETTE_MAX_SIZE]; + float centroids[PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[0].color_index_map; - double lb, ub, val; + float lb, ub, val; MB_MODE_INFO *const mbmi = &mic->mbmi; PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; #if CONFIG_VP9_HIGHBITDEPTH @@ -1834,7 +1834,7 @@ n, 1, max_itr); vp10_insertion_sort(centroids, n); for (i = 0; i < n; ++i) - centroids[i] = round(centroids[i]); + centroids[i] = roundf(centroids[i]); // remove duplicates i = 1; k = n; @@ -1854,12 +1854,12 @@ #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) for (i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel_highbd((int)round(centroids[i]), + pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]), cpi->common.bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH for (i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel((int)round(centroids[i])); + pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i])); pmi->palette_size[0] = k; vp10_calc_indices(data, centroids, indices, rows * cols, k, 1); @@ -3583,12 +3583,12 @@ int color_ctx, color_idx = 0; int color_order[PALETTE_MAX_SIZE]; int64_t this_sse; - double lb_u, ub_u, val_u; - double lb_v, ub_v, val_v; - double *const data = x->palette_buffer->kmeans_data_buf; + float lb_u, ub_u, val_u; + float lb_v, ub_v, val_v; + float *const data = x->palette_buffer->kmeans_data_buf; uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf; - double centroids[2 * PALETTE_MAX_SIZE]; + float centroids[2 * PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[1].color_index_map; PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; @@ -3657,12 +3657,12 @@ #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = - clip_pixel_highbd(round(centroids[j * 2 + i - 1]), + clip_pixel_highbd(roundf(centroids[j * 2 + i - 1]), cpi->common.bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = - clip_pixel(round(centroids[j * 2 + i - 1])); + clip_pixel(roundf(centroids[j * 2 + i - 1])); } } for (r = 0; r < rows; ++r) @@ -7475,9 +7475,9 @@ int src_stride = x->plane[1].src.stride; const uint8_t *const src_u = x->plane[1].src.buf; const uint8_t *const src_v = x->plane[2].src.buf; - double *const data = x->palette_buffer->kmeans_data_buf; + float *const data = x->palette_buffer->kmeans_data_buf; uint8_t *const indices = x->palette_buffer->kmeans_indices_buf; - double centroids[2 * PALETTE_MAX_SIZE]; + float centroids[2 * PALETTE_MAX_SIZE]; uint8_t *const color_map = xd->plane[1].color_index_map; int r, c; #if CONFIG_VP9_HIGHBITDEPTH