Use ml based tx_split search for high bit-depth This speed feature was originally enabled only for 8-bit. Tyrn this on for high bit-depth for consistency. STATS_CHANGED for high bit depth encodes. On the av2ctc A2 set, the average BDRATE increase is about 0.02% with the loss happenng only for 10-bit videos, for about 1-2% speed-up. Change-Id: Ic4ec202466129a5abdc966626c8154732c46a88b

commit: 4c48ed919513e452b86334ae83462b85ac3ad818 [log] [tgz]
author: Debargha Mukherjee <debargha@google.com> Thu Mar 18 23:47:21 2021 -0700
committer: Urvang Joshi <urvang@google.com> Tue Mar 23 19:02:11 2021 +0000
tree: 77ee62a777f5c761261af6c3f63b7a39f1fd54d1
parent: 44059c67e97e67d6507892089bc0d91769ceddfb [diff]
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index a040c6b..a405c39 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c

@@ -1873,8 +1873,9 @@
 
 // Feature used by the model to predict tx split: the mean and standard
 // deviation values of the block and sub-blocks.
-static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride,
-                                             int bw, int bh, float *feature) {
+static AOM_INLINE void get_mean_dev_features(int bd, const int16_t *data,
+                                             int stride, int bw, int bh,
+                                             float *feature) {
   const int16_t *const data_ptr = &data[0];
   const int subh = (bh >= bw) ? (bh >> 1) : bh;
   const int subw = (bw >= bh) ? (bw >> 1) : bw;
@@ -1894,6 +1895,8 @@
       // TODO(any): Write a SIMD version. Clear registers.
       aom_get_blk_sse_sum(data_ptr + row * stride + col, stride, subw, subh,
                           &x_sum, &x2_sum);
+      x_sum >>= (bd - 8);
+      x2_sum >>= (bd - 8) * 2;
       total_x_sum += x_sum;
       total_x2_sum += x2_sum;
 
@@ -1933,7 +1936,7 @@
   aom_clear_system_state();
 
   float features[64] = { 0.0f };
-  get_mean_dev_features(diff, diff_stride, bw, bh, features);
+  get_mean_dev_features(x->e_mbd.bd, diff, diff_stride, bw, bh, features);
 
   float score = 0.0f;
   av1_nn_predict(features, nn_config, 1, &score);
@@ -2785,8 +2788,7 @@
   }
 
   // ML based speed feature to skip searching for split transform blocks.
-  if (x->e_mbd.bd == 8 && try_split &&
-      !(ref_best_rd == INT64_MAX && no_split.rd == INT64_MAX)) {
+  if (try_split && !(ref_best_rd == INT64_MAX && no_split.rd == INT64_MAX)) {
     const int threshold = cpi->sf.tx_sf.tx_type_search.ml_tx_split_thresh;
     if (threshold >= 0) {
       const int split_score =
commit	4c48ed919513e452b86334ae83462b85ac3ad818	[log] [tgz]
author	Debargha Mukherjee <debargha@google.com>	Thu Mar 18 23:47:21 2021 -0700
committer	Urvang Joshi <urvang@google.com>	Tue Mar 23 19:02:11 2021 +0000
tree	77ee62a777f5c761261af6c3f63b7a39f1fd54d1
parent	44059c67e97e67d6507892089bc0d91769ceddfb [diff]