Use ml based tx_split search for high bit-depth This speed feature was originally enabled only for 8-bit. Tyrn this on for high bit-depth for consistency. STATS_CHANGED for high bit depth encodes. On the av2ctc A2 set, the average BDRATE increase is about 0.02% with the loss happenng only for 10-bit videos, for about 1-2% speed-up. Change-Id: Ic4ec202466129a5abdc966626c8154732c46a88b
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c index a040c6b..a405c39 100644 --- a/av1/encoder/tx_search.c +++ b/av1/encoder/tx_search.c
@@ -1873,8 +1873,9 @@ // Feature used by the model to predict tx split: the mean and standard // deviation values of the block and sub-blocks. -static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride, - int bw, int bh, float *feature) { +static AOM_INLINE void get_mean_dev_features(int bd, const int16_t *data, + int stride, int bw, int bh, + float *feature) { const int16_t *const data_ptr = &data[0]; const int subh = (bh >= bw) ? (bh >> 1) : bh; const int subw = (bw >= bh) ? (bw >> 1) : bw; @@ -1894,6 +1895,8 @@ // TODO(any): Write a SIMD version. Clear registers. aom_get_blk_sse_sum(data_ptr + row * stride + col, stride, subw, subh, &x_sum, &x2_sum); + x_sum >>= (bd - 8); + x2_sum >>= (bd - 8) * 2; total_x_sum += x_sum; total_x2_sum += x2_sum; @@ -1933,7 +1936,7 @@ aom_clear_system_state(); float features[64] = { 0.0f }; - get_mean_dev_features(diff, diff_stride, bw, bh, features); + get_mean_dev_features(x->e_mbd.bd, diff, diff_stride, bw, bh, features); float score = 0.0f; av1_nn_predict(features, nn_config, 1, &score); @@ -2785,8 +2788,7 @@ } // ML based speed feature to skip searching for split transform blocks. - if (x->e_mbd.bd == 8 && try_split && - !(ref_best_rd == INT64_MAX && no_split.rd == INT64_MAX)) { + if (try_split && !(ref_best_rd == INT64_MAX && no_split.rd == INT64_MAX)) { const int threshold = cpi->sf.tx_sf.tx_type_search.ml_tx_split_thresh; if (threshold >= 0) { const int split_score =