palette-delta-encoding: replace sorting with merging

Replace sorting with merging on decoder side.

Change-Id: I77d63c1b2b0569a2e0d0fc239f77d7a90b1275df
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 429cce3..ba15a8c 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -711,21 +711,36 @@
 }
 
 #if CONFIG_PALETTE_DELTA_ENCODING
-static int uint16_compare(const void *a, const void *b) {
-  const uint16_t va = *(const uint16_t *)a;
-  const uint16_t vb = *(const uint16_t *)b;
-  return va - vb;
+// Merge the sorted list of cached colors(cached_colors[0...n_cached_colors-1])
+// and the sorted list of transmitted colors(colors[n_cached_colors...n-1]) into
+// one single sorted list(colors[...]).
+static void merge_colors(uint16_t *colors, uint16_t *cached_colors,
+                         int n_colors, int n_cached_colors) {
+  if (n_cached_colors == 0) return;
+  int cache_idx = 0, trans_idx = n_cached_colors;
+  for (int i = 0; i < n_colors; ++i) {
+    if (cache_idx < n_cached_colors &&
+        (trans_idx >= n_colors ||
+         cached_colors[cache_idx] <= colors[trans_idx])) {
+      colors[i] = cached_colors[cache_idx++];
+    } else {
+      assert(trans_idx < n_colors);
+      colors[i] = colors[trans_idx++];
+    }
+  }
 }
 
 static void read_palette_colors_y(MACROBLOCKD *const xd, int bit_depth,
                                   PALETTE_MODE_INFO *const pmi, aom_reader *r) {
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+  uint16_t cached_colors[PALETTE_MAX_SIZE];
   const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
   const int n = pmi->palette_size[0];
   int idx = 0;
   for (int i = 0; i < n_cache && idx < n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i];
+    if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
   if (idx < n) {
+    const int n_cached_colors = idx;
     pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
     if (idx < n) {
       const int min_bits = bit_depth - 3;
@@ -740,8 +755,10 @@
         bits = AOMMIN(bits, av1_ceil_log2(range));
       }
     }
+    merge_colors(pmi->palette_colors, cached_colors, n, n_cached_colors);
+  } else {
+    memcpy(pmi->palette_colors, cached_colors, n * sizeof(cached_colors[0]));
   }
-  qsort(pmi->palette_colors, n, sizeof(pmi->palette_colors[0]), uint16_compare);
 }
 
 static void read_palette_colors_uv(MACROBLOCKD *const xd, int bit_depth,
@@ -750,11 +767,14 @@
   const int n = pmi->palette_size[1];
   // U channel colors.
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+  uint16_t cached_colors[PALETTE_MAX_SIZE];
   const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
-  int idx = PALETTE_MAX_SIZE;
-  for (int i = 0; i < n_cache && idx < PALETTE_MAX_SIZE + n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i];
-  if (idx < PALETTE_MAX_SIZE + n) {
+  int idx = 0;
+  for (int i = 0; i < n_cache && idx < n; ++i)
+    if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
+  if (idx < n) {
+    const int n_cached_colors = idx;
+    idx += PALETTE_MAX_SIZE;
     pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
     if (idx < PALETTE_MAX_SIZE + n) {
       const int min_bits = bit_depth - 3;
@@ -769,9 +789,12 @@
         bits = AOMMIN(bits, av1_ceil_log2(range));
       }
     }
+    merge_colors(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors, n,
+                 n_cached_colors);
+  } else {
+    memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors,
+           n * sizeof(cached_colors[0]));
   }
-  qsort(pmi->palette_colors + PALETTE_MAX_SIZE, n,
-        sizeof(pmi->palette_colors[0]), uint16_compare);
 
   // V channel colors.
   if (aom_read_bit(r, ACCT_STR)) {  // Delta encoding.