Use ml based tx_split search for high bit-depth
This speed feature was originally enabled only for 8-bit.
Tyrn this on for high bit-depth for consistency.
STATS_CHANGED for high bit depth encodes.
On the av2ctc A2 set, the average BDRATE increase is about 0.02%
with the loss happenng only for 10-bit videos, for about 1-2%
speed-up.
Change-Id: Ic4ec202466129a5abdc966626c8154732c46a88b
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index a040c6b..a405c39 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -1873,8 +1873,9 @@
// Feature used by the model to predict tx split: the mean and standard
// deviation values of the block and sub-blocks.
-static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride,
- int bw, int bh, float *feature) {
+static AOM_INLINE void get_mean_dev_features(int bd, const int16_t *data,
+ int stride, int bw, int bh,
+ float *feature) {
const int16_t *const data_ptr = &data[0];
const int subh = (bh >= bw) ? (bh >> 1) : bh;
const int subw = (bw >= bh) ? (bw >> 1) : bw;
@@ -1894,6 +1895,8 @@
// TODO(any): Write a SIMD version. Clear registers.
aom_get_blk_sse_sum(data_ptr + row * stride + col, stride, subw, subh,
&x_sum, &x2_sum);
+ x_sum >>= (bd - 8);
+ x2_sum >>= (bd - 8) * 2;
total_x_sum += x_sum;
total_x2_sum += x2_sum;
@@ -1933,7 +1936,7 @@
aom_clear_system_state();
float features[64] = { 0.0f };
- get_mean_dev_features(diff, diff_stride, bw, bh, features);
+ get_mean_dev_features(x->e_mbd.bd, diff, diff_stride, bw, bh, features);
float score = 0.0f;
av1_nn_predict(features, nn_config, 1, &score);
@@ -2785,8 +2788,7 @@
}
// ML based speed feature to skip searching for split transform blocks.
- if (x->e_mbd.bd == 8 && try_split &&
- !(ref_best_rd == INT64_MAX && no_split.rd == INT64_MAX)) {
+ if (try_split && !(ref_best_rd == INT64_MAX && no_split.rd == INT64_MAX)) {
const int threshold = cpi->sf.tx_sf.tx_type_search.ml_tx_split_thresh;
if (threshold >= 0) {
const int split_score =