| /* |
| * Copyright (c) 2021, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 3-Clause Clear License |
| * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear |
| * License was not distributed with this source code in the LICENSE file, you |
| * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the |
| * Alliance for Open Media Patent License 1.0 was not distributed with this |
| * source code in the PATENTS file, you can obtain it at |
| * aomedia.org/license/patent-license/. |
| */ |
| |
| #ifndef AOM_AV1_ENCODER_ML_H_ |
| #define AOM_AV1_ENCODER_ML_H_ |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| #include "config/av1_rtcd.h" |
| |
| #define NN_MAX_HIDDEN_LAYERS 10 |
| #define NN_MAX_NODES_PER_LAYER 128 |
| |
| struct NN_CONFIG { |
| int num_inputs; // Number of input nodes, i.e. features. |
| int num_outputs; // Number of output nodes. |
| int num_hidden_layers; // Number of hidden layers, maximum 10. |
| // Number of nodes for each hidden layer. |
| int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS]; |
| // Weight parameters, indexed by layer. |
| const float *weights[NN_MAX_HIDDEN_LAYERS + 1]; |
| // Bias parameters, indexed by layer. |
| const float *bias[NN_MAX_HIDDEN_LAYERS + 1]; |
| }; |
| // Typedef from struct NN_CONFIG to NN_CONFIG is in rtcd_defs |
| |
| #if CONFIG_NN_V2 |
| // Fully-connectedly layer configuration |
| struct FC_LAYER { |
| const int num_inputs; // Number of input nodes, i.e. features. |
| const int num_outputs; // Number of output nodes. |
| |
| float *weights; // Weight parameters. |
| float *bias; // Bias parameters. |
| const ACTIVATION activation; // Activation function. |
| |
| float *output; // The output array. |
| float *dY; // Gradient of outputs |
| float *dW; // Gradient of weights. |
| float *db; // Gradient of bias |
| }; |
| |
| // NN configure structure V2 |
| struct NN_CONFIG_V2 { |
| const int num_hidden_layers; // Number of hidden layers, max = 10. |
| FC_LAYER layer[NN_MAX_HIDDEN_LAYERS + 1]; // The layer array |
| const int num_logits; // Number of output nodes. |
| float *logits; // Raw prediction (same as output of final layer) |
| const LOSS loss; // Loss function |
| }; |
| |
| // Calculate prediction based on the given input features and neural net config. |
| // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden |
| // layer. |
| void av1_nn_predict_v2(const float *features, NN_CONFIG_V2 *nn_config, |
| int reduce_prec, float *output); |
| #endif // CONFIG_NN_V2 |
| |
| // Applies the softmax normalization function to the input |
| // to get a valid probability distribution in the output: |
| // output[i] = exp(input[i]) / sum_{k \in [0,n)}(exp(input[k])) |
| void av1_nn_softmax(const float *input, float *output, int n); |
| |
| // Applies a precision reduction to output of av1_nn_predict to prevent |
| // mismatches between C and SIMD implementations. |
| void av1_nn_output_prec_reduce(float *const output, int num_output); |
| |
| #ifdef __cplusplus |
| } // extern "C" |
| #endif |
| |
| #endif // AOM_AV1_ENCODER_ML_H_ |