Palette: replace floating point numbers with integers
Performance change is within noise range.
Change-Id: I9ca2ea6dfdc629513836637283ae3f964a1dd497
diff --git a/aom_ports/mem.h b/aom_ports/mem.h
index 500e397..937b79f 100644
--- a/aom_ports/mem.h
+++ b/aom_ports/mem.h
@@ -61,6 +61,8 @@
#define ALIGN_POWER_OF_TWO(value, n) \
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
+
#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
diff --git a/av1/common/av1_txfm.h b/av1/common/av1_txfm.h
index ec74a02..66e9753 100644
--- a/av1/common/av1_txfm.h
+++ b/av1/common/av1_txfm.h
@@ -26,7 +26,6 @@
#endif
#define MAX_TXFM_STAGE_NUM 12
-#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
static const int cos_bit_min = 10;
static const int cos_bit_max = 16;
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index f02b9e0..4489913 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -117,7 +117,7 @@
typedef struct {
uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
- float kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
+ int kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
} PALETTE_BUFFER;
typedef struct {
diff --git a/av1/encoder/k_means_template.h b/av1/encoder/k_means_template.h
index 3a433d9..9e526b8 100644
--- a/av1/encoder/k_means_template.h
+++ b/av1/encoder/k_means_template.h
@@ -23,25 +23,23 @@
#define RENAME_(x, y) AV1_K_MEANS_RENAME(x, y)
#define RENAME(x) RENAME_(x, AV1_K_MEANS_DIM)
-static float RENAME(calc_dist)(const float *p1, const float *p2) {
- float dist = 0;
- int i;
- for (i = 0; i < AV1_K_MEANS_DIM; ++i) {
- const float diff = p1[i] - p2[i];
+static int RENAME(calc_dist)(const int *p1, const int *p2) {
+ int dist = 0;
+ for (int i = 0; i < AV1_K_MEANS_DIM; ++i) {
+ const int diff = p1[i] - p2[i];
dist += diff * diff;
}
return dist;
}
-void RENAME(av1_calc_indices)(const float *data, const float *centroids,
+void RENAME(av1_calc_indices)(const int *data, const int *centroids,
uint8_t *indices, int n, int k) {
- int i, j;
- for (i = 0; i < n; ++i) {
- float min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
+ for (int i = 0; i < n; ++i) {
+ int min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
indices[i] = 0;
- for (j = 1; j < k; ++j) {
- const float this_dist = RENAME(calc_dist)(
- data + i * AV1_K_MEANS_DIM, centroids + j * AV1_K_MEANS_DIM);
+ for (int j = 1; j < k; ++j) {
+ const int this_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
+ centroids + j * AV1_K_MEANS_DIM);
if (this_dist < min_dist) {
min_dist = this_dist;
indices[i] = j;
@@ -50,19 +48,16 @@
}
}
-static void RENAME(calc_centroids)(const float *data, float *centroids,
+static void RENAME(calc_centroids)(const int *data, int *centroids,
const uint8_t *indices, int n, int k) {
- int i, j, index;
- int count[PALETTE_MAX_SIZE];
+ int i, j;
+ int count[PALETTE_MAX_SIZE] = { 0 };
unsigned int rand_state = (unsigned int)data[0];
-
assert(n <= 32768);
-
- memset(count, 0, sizeof(count[0]) * k);
memset(centroids, 0, sizeof(centroids[0]) * k * AV1_K_MEANS_DIM);
for (i = 0; i < n; ++i) {
- index = indices[i];
+ const int index = indices[i];
assert(index < k);
++count[index];
for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
@@ -76,43 +71,35 @@
data + (lcg_rand16(&rand_state) % n) * AV1_K_MEANS_DIM,
sizeof(centroids[0]) * AV1_K_MEANS_DIM);
} else {
- const float norm = 1.0f / count[i];
- for (j = 0; j < AV1_K_MEANS_DIM; ++j)
- centroids[i * AV1_K_MEANS_DIM + j] *= norm;
+ for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
+ centroids[i * AV1_K_MEANS_DIM + j] =
+ DIVIDE_AND_ROUND(centroids[i * AV1_K_MEANS_DIM + j], count[i]);
+ }
}
}
-
- // Round to nearest integers.
- for (i = 0; i < k * AV1_K_MEANS_DIM; ++i) {
- centroids[i] = roundf(centroids[i]);
- }
}
-static float RENAME(calc_total_dist)(const float *data, const float *centroids,
- const uint8_t *indices, int n, int k) {
- float dist = 0;
- int i;
+static int64_t RENAME(calc_total_dist)(const int *data, const int *centroids,
+ const uint8_t *indices, int n, int k) {
+ int64_t dist = 0;
(void)k;
-
- for (i = 0; i < n; ++i)
+ for (int i = 0; i < n; ++i) {
dist += RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
centroids + indices[i] * AV1_K_MEANS_DIM);
-
+ }
return dist;
}
-void RENAME(av1_k_means)(const float *data, float *centroids, uint8_t *indices,
+void RENAME(av1_k_means)(const int *data, int *centroids, uint8_t *indices,
int n, int k, int max_itr) {
- int i;
- float this_dist;
- float pre_centroids[2 * PALETTE_MAX_SIZE];
+ int pre_centroids[2 * PALETTE_MAX_SIZE];
uint8_t pre_indices[MAX_SB_SQUARE];
RENAME(av1_calc_indices)(data, centroids, indices, n, k);
- this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
+ int64_t this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
- for (i = 0; i < max_itr; ++i) {
- const float pre_dist = this_dist;
+ for (int i = 0; i < max_itr; ++i) {
+ const int64_t pre_dist = this_dist;
memcpy(pre_centroids, centroids,
sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
@@ -132,6 +119,5 @@
break;
}
}
-
#undef RENAME_
#undef RENAME
diff --git a/av1/encoder/palette.c b/av1/encoder/palette.c
index 94e866f..5fcbf9a 100644
--- a/av1/encoder/palette.c
+++ b/av1/encoder/palette.c
@@ -23,16 +23,14 @@
#include "av1/encoder/k_means_template.h"
#undef AV1_K_MEANS_DIM
-static int float_comparer(const void *a, const void *b) {
- const float fa = *(const float *)a;
- const float fb = *(const float *)b;
- return (fa > fb) - (fa < fb);
+static int int_comparer(const void *a, const void *b) {
+ return (*(int *)a - *(int *)b);
}
-int av1_remove_duplicates(float *centroids, int num_centroids) {
+int av1_remove_duplicates(int *centroids, int num_centroids) {
int num_unique; // number of unique centroids
int i;
- qsort(centroids, num_centroids, sizeof(*centroids), float_comparer);
+ qsort(centroids, num_centroids, sizeof(*centroids), int_comparer);
// Remove duplicates.
num_unique = 1;
for (i = 1; i < num_centroids; ++i) {
diff --git a/av1/encoder/palette.h b/av1/encoder/palette.h
index faf2229..bbdd507 100644
--- a/av1/encoder/palette.h
+++ b/av1/encoder/palette.h
@@ -20,22 +20,22 @@
#define AV1_K_MEANS_RENAME(func, dim) func##_dim##dim
-void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const float *data,
- const float *centroids,
+void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const int *data,
+ const int *centroids,
uint8_t *indices, int n, int k);
-void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const float *data,
- const float *centroids,
+void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const int *data,
+ const int *centroids,
uint8_t *indices, int n, int k);
-void AV1_K_MEANS_RENAME(av1_k_means, 1)(const float *data, float *centroids,
+void AV1_K_MEANS_RENAME(av1_k_means, 1)(const int *data, int *centroids,
uint8_t *indices, int n, int k,
int max_itr);
-void AV1_K_MEANS_RENAME(av1_k_means, 2)(const float *data, float *centroids,
+void AV1_K_MEANS_RENAME(av1_k_means, 2)(const int *data, int *centroids,
uint8_t *indices, int n, int k,
int max_itr);
// Given 'n' 'data' points and 'k' 'centroids' each of dimension 'dim',
// calculate the centroid 'indices' for the data points.
-static INLINE void av1_calc_indices(const float *data, const float *centroids,
+static INLINE void av1_calc_indices(const int *data, const int *centroids,
uint8_t *indices, int n, int k, int dim) {
if (dim == 1) {
AV1_K_MEANS_RENAME(av1_calc_indices, 1)(data, centroids, indices, n, k);
@@ -50,7 +50,7 @@
// dimension 'dim', runs up to 'max_itr' iterations of k-means algorithm to get
// updated 'centroids' and the centroid 'indices' for elements in 'data'.
// Note: the output centroids are rounded off to nearest integers.
-static INLINE void av1_k_means(const float *data, float *centroids,
+static INLINE void av1_k_means(const int *data, int *centroids,
uint8_t *indices, int n, int k, int dim,
int max_itr) {
if (dim == 1) {
@@ -66,7 +66,7 @@
// puts these unique centroids in first 'k' indices of 'centroids' array.
// Ideally, the centroids should be rounded to integers before calling this
// method.
-int av1_remove_duplicates(float *centroids, int num_centroids);
+int av1_remove_duplicates(int *centroids, int num_centroids);
// Given a color cache and a set of base colors, find if each cache color is
// present in the base colors, record the binary results in "cache_color_found".
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 2f63807..7bd299d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2797,20 +2797,19 @@
// Bias toward using colors in the cache.
// TODO(huisu): Try other schemes to improve compression.
static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
- int n_colors, int stride,
- float *centroids) {
+ int n_colors, int stride, int *centroids) {
if (n_cache <= 0) return;
for (int i = 0; i < n_colors * stride; i += stride) {
- float min_diff = fabsf(centroids[i] - color_cache[0]);
+ int min_diff = abs(centroids[i] - (int)color_cache[0]);
int idx = 0;
for (int j = 1; j < n_cache; ++j) {
- float this_diff = fabsf(centroids[i] - color_cache[j]);
+ const int this_diff = abs(centroids[i] - color_cache[j]);
if (this_diff < min_diff) {
min_diff = this_diff;
idx = j;
}
}
- if (min_diff < 1.5) centroids[i] = color_cache[idx];
+ if (min_diff <= 1) centroids[i] = color_cache[idx];
}
}
@@ -2818,19 +2817,13 @@
// of palette mode.
static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int palette_ctx,
- int dc_mode_cost, const float *data, float *centroids,
+ int dc_mode_cost, const int *data, int *centroids,
int n, uint16_t *color_cache, int n_cache,
MB_MODE_INFO *best_mbmi,
uint8_t *best_palette_color_map, int64_t *best_rd,
int64_t *best_model_rd, int *rate, int *rate_tokenonly,
int *rate_overhead, int64_t *distortion,
int *skippable) {
- aom_clear_system_state();
-#ifndef NDEBUG
- for (int i = 0; i < n; ++i) {
- assert(!isnan(centroids[i]));
- }
-#endif // NDEBUG
optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
int k = av1_remove_duplicates(centroids, n);
if (k < PALETTE_MIN_SIZE) {
@@ -2847,7 +2840,7 @@
else
#endif // CONFIG_HIGHBITDEPTH
for (int i = 0; i < k; ++i)
- pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
+ pmi->palette_colors[i] = clip_pixel(centroids[i]);
pmi->palette_size[0] = k;
MACROBLOCKD *const xd = &x->e_mbd;
uint8_t *const color_map = xd->plane[0].color_index_map;
@@ -2928,12 +2921,11 @@
#endif // CONFIG_FILTER_INTRA
if (colors > 1 && colors <= 64) {
- aom_clear_system_state();
int r, c, i;
const int max_itr = 50;
- float *const data = x->palette_buffer->kmeans_data_buf;
- float centroids[PALETTE_MAX_SIZE];
- float lb, ub, val;
+ int *const data = x->palette_buffer->kmeans_data_buf;
+ int centroids[PALETTE_MAX_SIZE];
+ int lb, ub, val;
#if CONFIG_HIGHBITDEPTH
uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
if (cpi->common.use_highbitdepth)
@@ -2998,8 +2990,7 @@
// TODO(huisu@google.com): Try to avoid duplicate computation in cases
// where the dominant colors and the k-means results are similar.
for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
- aom_clear_system_state();
- for (i = 0; i < n; ++i) centroids[i] = (float)(top_colors[i]);
+ for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
palette_rd_y(cpi, x, mbmi, bsize, palette_ctx, dc_mode_cost, data,
centroids, n, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate,
@@ -3008,7 +2999,6 @@
// K-means clustering.
for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
- aom_clear_system_state();
if (colors == PALETTE_MIN_SIZE) {
// Special case: These colors automatically become the centroids.
assert(colors == n);
@@ -5174,13 +5164,12 @@
colors = colors_u > colors_v ? colors_u : colors_v;
if (colors > 1 && colors <= 64) {
- aom_clear_system_state();
int r, c, n, i, j;
const int max_itr = 50;
- float lb_u, ub_u, val_u;
- float lb_v, ub_v, val_v;
- float *const data = x->palette_buffer->kmeans_data_buf;
- float centroids[2 * PALETTE_MAX_SIZE];
+ int lb_u, ub_u, val_u;
+ int lb_v, ub_v, val_v;
+ int *const data = x->palette_buffer->kmeans_data_buf;
+ int centroids[2 * PALETTE_MAX_SIZE];
#if CONFIG_HIGHBITDEPTH
uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
@@ -5230,26 +5219,20 @@
for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
--n) {
- aom_clear_system_state();
for (i = 0; i < n; ++i) {
centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
}
av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
-#ifndef NDEBUG
- for (i = 0; i < 2 * n; ++i) {
- assert(!isnan(centroids[i]));
- }
-#endif // NDEBUG
optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
// Sort the U channel colors in ascending order.
for (i = 0; i < 2 * (n - 1); i += 2) {
int min_idx = i;
- float min_val = centroids[i];
+ int min_val = centroids[i];
for (j = i + 2; j < 2 * n; j += 2)
if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
if (min_idx != i) {
- float temp_u = centroids[i], temp_v = centroids[i + 1];
+ int temp_u = centroids[i], temp_v = centroids[i + 1];
centroids[i] = centroids[min_idx];
centroids[i + 1] = centroids[min_idx + 1];
centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
@@ -9031,8 +9014,8 @@
int src_stride = x->plane[1].src.stride;
const uint8_t *const src_u = x->plane[1].src.buf;
const uint8_t *const src_v = x->plane[2].src.buf;
- float *const data = x->palette_buffer->kmeans_data_buf;
- float centroids[2 * PALETTE_MAX_SIZE];
+ int *const data = x->palette_buffer->kmeans_data_buf;
+ int centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map;
int r, c;
#if CONFIG_HIGHBITDEPTH