Add a precision reduction step after nn_predict
The precision reduction step is a temporary fix to reduce the
probability of mismatches between C and SIMD implementations
for floating point av1_nn_predict(). Ideally the SIMD
implementation needs to be redone.
The patch fixes the issue in bug 2415, but there is no
guarantee that mismatches will never happen, since the
error is often larger than the reduced precision.
STATS_CHANGED in the noise range.
lowres (33 frames end-usage q cpu-used 0): +0.003
midres (33 frames end-usage q cpu-used 0): -0.011
BUG=aomedia:2415
Change-Id: I70298e0e35abfe86cb65ad12b7ee506f9b736e74
diff --git a/av1/encoder/ml.c b/av1/encoder/ml.c
index b5d8a16..57228ec 100644
--- a/av1/encoder/ml.c
+++ b/av1/encoder/ml.c
@@ -15,11 +15,21 @@
#include "aom_dsp/aom_dsp_common.h"
#include "av1/encoder/ml.h"
+void av1_nn_output_prec_reduce(float *const output, int num_output) {
+ const int prec_bits = 11;
+ const int prec = 1 << prec_bits;
+ const float inv_prec = (float)(1.0 / prec);
+ for (int i = 0; i < num_output; i++) {
+ output[i] = ((int)(output[i] * prec + 0.5)) * inv_prec;
+ }
+}
+
// Calculate prediction based on the given input features and neural net config.
// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
// layer.
void av1_nn_predict_c(const float *input_nodes,
- const NN_CONFIG *const nn_config, float *const output) {
+ const NN_CONFIG *const nn_config, int reduce_prec,
+ float *const output) {
int num_input_nodes = nn_config->num_inputs;
int buf_index = 0;
float buf[2][NN_MAX_NODES_PER_LAYER];
@@ -55,6 +65,7 @@
val += layer_weights[node * num_input_nodes + i] * input_nodes[i];
output[node] = val;
}
+ if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_outputs);
}
#if CONFIG_NN_V2
@@ -107,7 +118,7 @@
}
void av1_nn_predict_v2(const float *feature, NN_CONFIG_V2 *nn_config,
- float *output) {
+ int reduce_prec, float *output) {
const float *input_nodes = feature;
// Propagate the layers.
@@ -124,6 +135,7 @@
assert(nn_config->layer[num_layers].num_outputs == nn_config->num_logits);
// Copy the final layer output
memcpy(output, input_nodes, sizeof(*input_nodes) * nn_config->num_logits);
+ if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_logits);
}
#endif // CONFIG_NN_V2