Add update_scan_order

augment_prob: embed r + c and coeff_idx info with nonzero probabilities.
When sorting the nonzero probabilities, if there is a tie, the coefficient
with smaller r + c will be scanned first

sort_prob: quick sort

dfs_scan: topological sort

update_sort_order: apply quick sort on nonzero probabilities to obtain
a sort order

update_scan_order: apply topological sort on the nonzero
probabilities sorting order to guarantee each to-be-scanned
coefficient's upper and left coefficient will be scanned before the
to-be-scanned coefficient.

Change-Id: I719b24dc704e9652a7665af93816bacea7078fb0
diff --git a/av1/common/scan.c b/av1/common/scan.c
index e13d134..23fa3e9 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -738,6 +738,12 @@
 };
 
 #if CONFIG_ADAPT_SCAN
+// TX_32X32 will has 1024 coefficients whose indexes can be represented in 10
+// bits
+#define COEFF_IDX_BITS 10
+#define COEFF_IDX_SIZE (1 << COEFF_IDX_BITS)
+#define COEFF_IDX_MASK (COEFF_IDX_SIZE - 1)
+
 int get_tx1d_size(TX_SIZE tx_size) { return 1 << (tx_size + 2); }
 
 int get_tx2d_size(TX_SIZE tx_size) { return 1 << ((tx_size + 2) * 2); }
@@ -793,8 +799,9 @@
   }
 }
 
-void update_scan_count(int16_t *scan, int max_scan, tran_low_t *dqcoeffs,
-                       uint32_t *non_zero_count) {
+static void update_scan_count(int16_t *scan, int max_scan,
+                              const tran_low_t *dqcoeffs,
+                              uint32_t *non_zero_count) {
   int i;
   for (i = 0; i < max_scan; ++i) {
     int coeff_idx = scan[i];
@@ -803,9 +810,109 @@
 }
 
 void update_scan_count_facade(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
-                              tran_low_t *dqcoeffs, int max_scan) {
+                              const tran_low_t *dqcoeffs, int max_scan) {
   int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
   uint32_t *non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type);
   update_scan_count(scan, max_scan, dqcoeffs, non_zero_count);
 }
+
+static int cmp_prob(const void *a, const void *b) {
+  return *(const uint32_t *)b > *(const uint32_t *)a ? 1 : -1;
+}
+
+void augment_prob(uint32_t *prob, int size, int tx1d_size) {
+  int r, c;
+  for (r = 0; r < size; r++) {
+    for (c = 0; c < size; c++) {
+      const int coeff_idx = r * tx1d_size + c;
+      const int idx = r * size + c;
+      const uint32_t mask_16 = ((1 << 16) - 1);
+      const uint32_t tie_breaker = ~(((r + c) << COEFF_IDX_BITS) | coeff_idx);
+      // prob[idx]: 16 bits  r+c: 6 bits  coeff_idx: 10 bits
+      prob[idx] = (prob[idx] << 16) | (mask_16 & tie_breaker);
+    }
+  }
+}
+
+// topological sort
+static void dfs_scan(int tx1d_size, int *scan_idx, int coeff_idx, int16_t *scan,
+                     int16_t *iscan) {
+  const int r = coeff_idx / tx1d_size;
+  const int c = coeff_idx % tx1d_size;
+
+  if (iscan[coeff_idx] != -1) return;
+
+  if (r > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - tx1d_size, scan, iscan);
+
+  if (c > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - 1, scan, iscan);
+
+  scan[*scan_idx] = coeff_idx;
+  iscan[coeff_idx] = *scan_idx;
+  ++(*scan_idx);
+}
+
+void update_neighbors(int tx_size, const int16_t *scan, const int16_t *iscan,
+                      int16_t *neighbors) {
+  const int tx1d_size = get_tx1d_size(tx_size);
+  const int tx2d_size = get_tx2d_size(tx_size);
+  int scan_idx;
+  for (scan_idx = 0; scan_idx < tx2d_size; ++scan_idx) {
+    const int coeff_idx = scan[scan_idx];
+    const int r = coeff_idx / tx1d_size;
+    const int c = coeff_idx % tx1d_size;
+    const int has_left = c > 0 && iscan[coeff_idx - 1] < scan_idx;
+    const int has_above = r > 0 && iscan[coeff_idx - tx1d_size] < scan_idx;
+
+    if (has_left && has_above) {
+      neighbors[scan_idx * MAX_NEIGHBORS + 0] = coeff_idx - 1;
+      neighbors[scan_idx * MAX_NEIGHBORS + 1] = coeff_idx - tx1d_size;
+    } else if (has_left) {
+      neighbors[scan_idx * MAX_NEIGHBORS + 0] = coeff_idx - 1;
+      neighbors[scan_idx * MAX_NEIGHBORS + 1] = coeff_idx - 1;
+    } else if (has_above) {
+      neighbors[scan_idx * MAX_NEIGHBORS + 0] = coeff_idx - tx1d_size;
+      neighbors[scan_idx * MAX_NEIGHBORS + 1] = coeff_idx - tx1d_size;
+    } else {
+      neighbors[scan_idx * MAX_NEIGHBORS + 0] = scan[0];
+      neighbors[scan_idx * MAX_NEIGHBORS + 1] = scan[0];
+    }
+  }
+  neighbors[tx2d_size * MAX_NEIGHBORS + 0] = scan[0];
+  neighbors[tx2d_size * MAX_NEIGHBORS + 1] = scan[0];
+}
+
+void update_sort_order(TX_SIZE tx_size, const uint32_t *non_zero_prob,
+                       int16_t *sort_order) {
+  uint32_t temp[COEFF_IDX_SIZE];
+  const int tx1d_size = get_tx1d_size(tx_size);
+  const int tx2d_size = get_tx2d_size(tx_size);
+  int sort_idx;
+  assert(tx2d_size <= COEFF_IDX_SIZE);
+  memcpy(temp, non_zero_prob, tx2d_size * sizeof(*non_zero_prob));
+  augment_prob(temp, tx1d_size, tx1d_size);
+  qsort(temp, tx2d_size, sizeof(*temp), cmp_prob);
+  for (sort_idx = 0; sort_idx < tx2d_size; ++sort_idx) {
+    const int coeff_idx = (temp[sort_idx] & COEFF_IDX_MASK) ^ COEFF_IDX_MASK;
+    sort_order[sort_idx] = coeff_idx;
+  }
+}
+
+void update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
+                       int16_t *iscan) {
+  int coeff_idx;
+  int scan_idx;
+  int sort_idx;
+  const int tx1d_size = get_tx1d_size(tx_size);
+  const int tx2d_size = get_tx2d_size(tx_size);
+
+  for (coeff_idx = 0; coeff_idx < tx2d_size; ++coeff_idx) {
+    iscan[coeff_idx] = -1;
+  }
+
+  scan_idx = 0;
+  for (sort_idx = 0; sort_idx < tx2d_size; ++sort_idx) {
+    coeff_idx = sort_order[sort_idx];
+    dfs_scan(tx1d_size, &scan_idx, coeff_idx, scan, iscan);
+  }
+}
 #endif
diff --git a/av1/common/scan.h b/av1/common/scan.h
index 8f0ecb4..5419955 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -38,7 +38,27 @@
 void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
                       int rate_16);
 void update_scan_count_facade(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
-                              tran_low_t *dqcoeffs, int max_scan);
+                              const tran_low_t *dqcoeffs, int max_scan);
+
+// embed r + c and coeff_idx info with nonzero probabilities. When sorting the
+// nonzero probabilities, if there is a tie, the coefficient with smaller r + c
+// will be scanned first
+void augment_prob(uint32_t *prob, int size, int tx1d_size);
+
+// apply quick sort on nonzero probabilities to obtain a sort order
+void update_sort_order(TX_SIZE tx_size, const uint32_t *non_zero_prob,
+                       int16_t *sort_order);
+
+// apply topological sort on the nonzero probabilities sorting order to
+// guarantee each to-be-scanned coefficient's upper and left coefficient will be
+// scanned before the to-be-scanned coefficient.
+void update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
+                       int16_t *iscan);
+
+// For each coeff_idx in scan[], update its above and left neighbors in
+// neighbors[] accordingly.
+void update_neighbors(int tx_size, const int16_t *scan, const int16_t *iscan,
+                      int16_t *neighbors);
 #endif
 
 static INLINE int get_coef_context(const int16_t *neighbors,
diff --git a/test/scan_test.cc b/test/scan_test.cc
new file mode 100644
index 0000000..e51b6bf
--- /dev/null
+++ b/test/scan_test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "av1/common/scan.h"
+#include "test/acm_random.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+TEST(scan_test, augment_prob) {
+  int tx1d_size = 4;
+  uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  uint32_t ref_prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  augment_prob(prob, tx1d_size, tx1d_size);
+  for (int r = 0; r < tx1d_size; ++r) {
+    for (int c = 0; c < tx1d_size; ++c) {
+      int idx = r * tx1d_size + c;
+      EXPECT_EQ(ref_prob[idx], prob[idx] >> 16);
+    }
+  }
+
+  int mask = (1 << 10) - 1;
+  for (int r = 0; r < tx1d_size; ++r) {
+    for (int c = 0; c < tx1d_size; ++c) {
+      int idx = r * tx1d_size + c;
+      EXPECT_EQ(idx, mask ^ (prob[r * tx1d_size + c] & mask));
+    }
+  }
+}
+
+TEST(scan_test, update_sort_order) {
+  int tx_size = TX_4X4;
+  uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  int16_t ref_sort_order[16] = { 0, 1,  4, 5,  2,  3,  6,  8,
+                                 9, 12, 7, 10, 13, 11, 14, 15 };
+  int16_t sort_order[16];
+  update_sort_order(tx_size, prob, sort_order);
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_sort_order[i], sort_order[i]);
+}
+
+TEST(scan_test, update_scan_order) {
+  int tx_size = TX_4X4;
+  uint32_t prob[16] = { 4, 5, 7, 4, 5, 6, 8, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  int16_t sort_order[16];
+  int16_t scan[16];
+  int16_t iscan[16];
+  int16_t ref_iscan[16] = {
+    0, 1, 2, 6, 3, 4, 5, 10, 7, 8, 11, 13, 9, 12, 14, 15
+  };
+
+  update_sort_order(tx_size, prob, sort_order);
+  update_scan_order(tx_size, sort_order, scan, iscan);
+
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_iscan[i], iscan[i]);
+
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(i, scan[ref_iscan[i]]);
+}
+
+TEST(scan_test, update_neighbors) {
+  int tx_size = TX_4X4;
+  // raster order
+  int16_t scan[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  int16_t nb[(16 + 1) * 2];
+  int16_t ref_nb[(16 + 1) * 2] = { 0, 0, 0,  0, 1,  1,  2,  2,  0, 0, 4,  1,
+                                   5, 2, 6,  3, 4,  4,  8,  5,  9, 6, 10, 7,
+                                   8, 8, 12, 9, 13, 10, 14, 11, 0, 0 };
+
+  // raster order's scan and iscan are the same
+  update_neighbors(tx_size, scan, scan, nb);
+  for (int i = 0; i < (16 + 1) * 2; ++i) {
+    EXPECT_EQ(ref_nb[i], nb[i]);
+  }
+}
+
+}  // namespace
diff --git a/test/test.mk b/test/test.mk
index 09e2d9e..5ccb5f2 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -103,6 +103,7 @@
 LIBAOM_TEST_SRCS-yes                   += encoder_parms_get_to_decoder.cc
 endif
 
+LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN)  += scan_test.cc
 LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
 LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
 LIBAOM_TEST_SRCS-yes                   += av1_convolve_test.cc