Use SIMD optimized model in hog pruning
Change-Id: Ie78541418d62b462b16e03d454c0ef15277b8844
diff --git a/av1/encoder/intra_mode_search_utils.h b/av1/encoder/intra_mode_search_utils.h
index 8ec7cb3..411aa5a 100644
--- a/av1/encoder/intra_mode_search_utils.h
+++ b/av1/encoder/intra_mode_search_utils.h
@@ -17,6 +17,7 @@
#ifndef AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
#define AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
+#include "av1/common/enums.h"
#include "av1/common/pred_common.h"
#include "av1/common/reconintra.h"
@@ -30,12 +31,12 @@
/*!\cond */
#define BINS 32
-static const float intra_hog_model_bias[DIRECTIONAL_MODES] = {
+static const float av1_intra_hog_model_bias[DIRECTIONAL_MODES] = {
0.450578f, 0.695518f, -0.717944f, -0.639894f,
-0.602019f, -0.453454f, 0.055857f, -0.465480f,
};
-static const float intra_hog_model_weights[BINS * DIRECTIONAL_MODES] = {
+static const float av1_intra_hog_model_weights[BINS * DIRECTIONAL_MODES] = {
-3.076402f, -3.757063f, -3.275266f, -3.180665f, -3.452105f, -3.216593f,
-2.871212f, -3.134296f, -1.822324f, -2.401411f, -1.541016f, -1.195322f,
-0.434156f, 0.322868f, 2.260546f, 3.368715f, 3.989290f, 3.308487f,
@@ -81,6 +82,19 @@
-1.222860f, -1.502437f, -1.900969f, -3.206816f,
};
+static const NN_CONFIG av1_intra_hog_model_nnconfig = {
+ BINS, // num_inputs
+ DIRECTIONAL_MODES, // num_outputs
+ 0, // num_hidden_layers
+ {},
+ {
+ av1_intra_hog_model_weights,
+ },
+ {
+ av1_intra_hog_model_bias,
+ },
+};
+
#define FIX_PREC_BITS (16)
static AOM_INLINE int get_hist_bin_idx(int dx, int dy) {
const int32_t ratio = (dy * (1 << FIX_PREC_BITS)) / dx;
@@ -189,13 +203,9 @@
for (int i = 0; i < BINS; ++i) hist[i] /= total;
}
-static AOM_INLINE void prune_intra_mode_with_hog(
- const MACROBLOCK *x, BLOCK_SIZE bsize, float th,
- uint8_t *directional_mode_skip_mask, int is_chroma) {
- aom_clear_system_state();
-
+static INLINE void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize,
+ int plane, float *hog) {
const MACROBLOCKD *xd = &x->e_mbd;
- const int plane = is_chroma ? AOM_PLANE_U : AOM_PLANE_Y;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int ss_x = pd->subsampling_x;
const int ss_y = pd->subsampling_y;
@@ -209,24 +219,36 @@
ss_x;
const int src_stride = x->plane[plane].src.stride;
const uint8_t *src = x->plane[plane].src.buf;
- float hist[BINS] = { 0.0f };
if (is_cur_buf_hbd(xd)) {
- generate_hog_hbd(src, src_stride, rows, cols, hist);
+ generate_hog_hbd(src, src_stride, rows, cols, hog);
} else {
- generate_hog(src, src_stride, rows, cols, hist);
+ generate_hog(src, src_stride, rows, cols, hog);
}
+ // Scale the hog so the luma and chroma are on the same scale
for (int b = 0; b < BINS; ++b) {
- hist[b] *= (1 + ss_x) * (1 + ss_y);
+ hog[b] *= (1 + ss_x) * (1 + ss_y);
}
+}
- for (int i = 0; i < DIRECTIONAL_MODES; ++i) {
- float this_score = intra_hog_model_bias[i];
- const float *weights = &intra_hog_model_weights[i * BINS];
- for (int j = 0; j < BINS; ++j) {
- this_score += weights[j] * hist[j];
+static AOM_INLINE void prune_intra_mode_with_hog(
+ const MACROBLOCK *x, BLOCK_SIZE bsize, float th,
+ uint8_t *directional_mode_skip_mask, int is_chroma) {
+ aom_clear_system_state();
+
+ const int plane = is_chroma ? AOM_PLANE_U : AOM_PLANE_Y;
+ float hist[BINS] = { 0.0f };
+ collect_hog_data(x, bsize, plane, hist);
+
+ // Make prediction for each of the mode
+ float scores[DIRECTIONAL_MODES] = { 0.0f };
+ aom_clear_system_state();
+ av1_nn_predict(hist, &av1_intra_hog_model_nnconfig, 1, scores);
+ for (UV_PREDICTION_MODE uv_mode = UV_V_PRED; uv_mode <= UV_D67_PRED;
+ uv_mode++) {
+ if (scores[uv_mode - UV_V_PRED] <= th) {
+ directional_mode_skip_mask[uv_mode] = 1;
}
- if (this_score < th) directional_mode_skip_mask[i + 1] = 1;
}
aom_clear_system_state();