Add av1_down_sample_scan_count
This is for reduce memory usage for adapt_scan
The whole change will be under the flage USE_2X2_PROB
Change-Id: If7839d6396dad7618155ef2f36896d17743696ce
diff --git a/av1/common/scan.c b/av1/common/scan.c
index fc8562e..97491e8 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -6604,6 +6604,39 @@
return value < low ? low : (value > high ? high : (int)value);
}
+#if USE_2X2_PROB
+static int do_down_sample(TX_SIZE tx_size) {
+ const int tx_w = tx_size_wide[tx_size];
+ const int tx_h = tx_size_high[tx_size];
+ if (tx_w > 8 || tx_h > 8) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
+ const uint32_t *non_zero_count,
+ TX_SIZE tx_size) {
+ const int tx_w = tx_size_wide[tx_size];
+ const int tx_h = tx_size_high[tx_size];
+ const int tx_w_ds = tx_w >> 1;
+ for (int r = 0; r < tx_h; r += 2) {
+ for (int c = 0; c < tx_w; c += 2) {
+ assert(r + 2 < tx_h);
+ assert(c + 2 < tx_w);
+ const int ci = r * tx_w + c;
+ const int r_ds = r >> 1;
+ const int c_ds = c >> 1;
+ const int ci_ds = r_ds * tx_w_ds + c_ds;
+ non_zero_count_ds[ci_ds] = non_zero_count[ci] + non_zero_count[ci + 1] +
+ non_zero_count[ci + tx_w] +
+ non_zero_count[ci + 1 + tx_w];
+ }
+ }
+}
+#endif
+
static void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
int rate) {
FRAME_CONTEXT *pre_fc = cm->pre_fc;
@@ -6612,12 +6645,29 @@
uint32_t *non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type);
const int tx2d_size = tx_size_2d[tx_size];
unsigned int block_num = cm->counts.txb_count[tx_size][tx_type];
+ uint32_t *non_zero_count_new = non_zero_count;
+ int count_size = tx2d_size;
+#if USE_2X2_PROB
+#if CONFIG_TX64X64
+ DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[1024]);
+ assert((tx2d_size >> 2) <= 1024);
+#else // CONFIG_TX64X64
+ DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[256]);
+ assert((tx2d_size >> 2) <= 256);
+#endif // CONFIG_TX64X64
+ if (do_down_sample(tx_size)) {
+ av1_down_sample_scan_count(non_zero_count_ds, non_zero_count, tx_size);
+ non_zero_count_new = non_zero_count_ds;
+ count_size = tx2d_size >> 2;
+ block_num <<= 2;
+ }
+#endif
int i;
- for (i = 0; i < tx2d_size; i++) {
+ for (i = 0; i < count_size; i++) {
int64_t curr_prob =
block_num == 0
? 0
- : (non_zero_count[i] << ADAPT_SCAN_PROB_PRECISION) / block_num;
+ : (non_zero_count_new[i] << ADAPT_SCAN_PROB_PRECISION) / block_num;
int64_t prev_prob = prev_non_zero_prob[i];
int64_t pred_prob =
(curr_prob * rate +
diff --git a/av1/common/scan.h b/av1/common/scan.h
index c9911de..5246f1f 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -30,6 +30,7 @@
extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
#if CONFIG_ADAPT_SCAN
+#define USE_2X2_PROB 0
void av1_update_scan_count_facade(AV1_COMMON *cm, FRAME_COUNTS *counts,
TX_SIZE tx_size, TX_TYPE tx_type,
const tran_low_t *dqcoeffs, int max_scan);
@@ -55,7 +56,12 @@
const int16_t *iscan, int16_t *neighbors);
void av1_init_scan_order(AV1_COMMON *cm);
void av1_adapt_scan_order(AV1_COMMON *cm);
-#endif
+#if USE_2X2_PROB
+void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
+ const uint32_t *non_zero_count,
+ TX_SIZE tx_size);
+#endif // USE_2X2_PROB
+#endif // CONFIG_ADAPT_SCAN
void av1_deliver_eob_threshold(const AV1_COMMON *cm, MACROBLOCKD *xd);
static INLINE int get_coef_context(const int16_t *neighbors,
diff --git a/test/scan_test.cc b/test/scan_test.cc
index 16c831c..a2ca724 100644
--- a/test/scan_test.cc
+++ b/test/scan_test.cc
@@ -94,4 +94,17 @@
}
}
+#if USE_2X2_PROB
+TEST(ScanTest, av1_down_sample_scan_count) {
+ const uint32_t non_zero_count[16] = { 13, 12, 11, 10, 13, 9, 10, 8,
+ 11, 12, 9, 8, 13, 9, 9, 10 };
+ const uint32_t ref_non_zero_count_ds[4] = { 47, 39, 45, 36 };
+ uint32_t non_zero_count_ds[4];
+ av1_down_sample_scan_count(non_zero_count_ds, non_zero_count, TX_4X4);
+ for (int i = 0; i < 4; ++i) {
+ EXPECT_EQ(ref_non_zero_count_ds[i], non_zero_count_ds[i]);
+ }
+}
+#endif
+
} // namespace