Add a precision reduction step after nn_predict The precision reduction step is a temporary fix to reduce the probability of mismatches between C and SIMD implementations for floating point av1_nn_predict(). Ideally the SIMD implementation needs to be redone. The patch fixes the issue in bug 2415, but there is no guarantee that mismatches will never happen, since the error is often larger than the reduced precision. STATS_CHANGED in the noise range. lowres (33 frames end-usage q cpu-used 0): +0.003 midres (33 frames end-usage q cpu-used 0): -0.011 BUG=aomedia:2415 Change-Id: I70298e0e35abfe86cb65ad12b7ee506f9b736e74

commit: d44f5d12c2c6dd6a81ebc0ee54c786f649885503 [log] [tgz]
author: Debargha Mukherjee <debargha@google.com> Thu Jun 27 14:56:05 2019 -0700
committer: Debargha Mukherjee <debargha@google.com> Tue Jul 02 16:41:07 2019 +0000
tree: 560ca0d812927b6e2d4ed6048ba22fd4bd75810b
parent: 39fca5f5821c4e77a60e0d4c29726b842d388712 [diff] [blame]
diff --git a/av1/encoder/ml.c b/av1/encoder/ml.c
index b5d8a16..57228ec 100644
--- a/av1/encoder/ml.c
+++ b/av1/encoder/ml.c

@@ -15,11 +15,21 @@
 #include "aom_dsp/aom_dsp_common.h"
 #include "av1/encoder/ml.h"
 
+void av1_nn_output_prec_reduce(float *const output, int num_output) {
+  const int prec_bits = 11;
+  const int prec = 1 << prec_bits;
+  const float inv_prec = (float)(1.0 / prec);
+  for (int i = 0; i < num_output; i++) {
+    output[i] = ((int)(output[i] * prec + 0.5)) * inv_prec;
+  }
+}
+
 // Calculate prediction based on the given input features and neural net config.
 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
 // layer.
 void av1_nn_predict_c(const float *input_nodes,
-                      const NN_CONFIG *const nn_config, float *const output) {
+                      const NN_CONFIG *const nn_config, int reduce_prec,
+                      float *const output) {
   int num_input_nodes = nn_config->num_inputs;
   int buf_index = 0;
   float buf[2][NN_MAX_NODES_PER_LAYER];
@@ -55,6 +65,7 @@
       val += layer_weights[node * num_input_nodes + i] * input_nodes[i];
     output[node] = val;
   }
+  if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_outputs);
 }
 
 #if CONFIG_NN_V2
@@ -107,7 +118,7 @@
 }
 
 void av1_nn_predict_v2(const float *feature, NN_CONFIG_V2 *nn_config,
-                       float *output) {
+                       int reduce_prec, float *output) {
   const float *input_nodes = feature;
 
   // Propagate the layers.
@@ -124,6 +135,7 @@
   assert(nn_config->layer[num_layers].num_outputs == nn_config->num_logits);
   // Copy the final layer output
   memcpy(output, input_nodes, sizeof(*input_nodes) * nn_config->num_logits);
+  if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_logits);
 }
 #endif  // CONFIG_NN_V2
commit	d44f5d12c2c6dd6a81ebc0ee54c786f649885503	[log] [tgz]
author	Debargha Mukherjee <debargha@google.com>	Thu Jun 27 14:56:05 2019 -0700
committer	Debargha Mukherjee <debargha@google.com>	Tue Jul 02 16:41:07 2019 +0000
tree	560ca0d812927b6e2d4ed6048ba22fd4bd75810b
parent	39fca5f5821c4e77a60e0d4c29726b842d388712 [diff] [blame]