| /* | 
 |  * Copyright (c) 2019, Alliance for Open Media. All rights reserved. | 
 |  * | 
 |  * This source code is subject to the terms of the BSD 2 Clause License and | 
 |  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
 |  * was not distributed with this source code in the LICENSE file, you can | 
 |  * obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
 |  * Media Patent License 1.0 was not distributed with this source code in the | 
 |  * PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
 |  */ | 
 |  | 
 | #ifndef AOM_AV1_ENCODER_CNN_H_ | 
 | #define AOM_AV1_ENCODER_CNN_H_ | 
 |  | 
 | #ifdef __cplusplus | 
 | extern "C" { | 
 | #endif | 
 |  | 
 | #include <math.h> | 
 | #include <stdbool.h> | 
 |  | 
 | #include "aom_util/aom_thread.h" | 
 | #include "config/av1_rtcd.h" | 
 |  | 
 | struct AV1Common; | 
 |  | 
 | #define CNN_MAX_HIDDEN_LAYERS 64 | 
 | #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1) | 
 | #define CNN_MAX_CHANNELS 256 | 
 | #define CNN_MAX_BRANCHES 4 | 
 | #define CNN_MAX_THREADS 32 | 
 |  | 
 | #define NO_BRANCH_CONFIG { 0, 0, 0 } | 
 | #define NO_BN_PARAMS { NULL, NULL, NULL, NULL } | 
 |  | 
 | enum { | 
 |   PADDING_SAME_ZERO,       // tensorflow's SAME padding with pixels outside | 
 |                            // the image area assumed to be 0 (default) | 
 |   PADDING_SAME_REPLICATE,  // tensorflow's SAME padding with pixels outside | 
 |                            // the image area replicated from closest edge | 
 |   PADDING_VALID            // tensorflow's VALID padding | 
 | } UENUM1BYTE(PADDING_TYPE); | 
 |  | 
 | // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION); | 
 |  | 
 | // Times when input tensor may be copied to branches given in input_to_branches. | 
 | // BRANCH_NO_COPY: doesn't copy any tensor. | 
 | // BRANCH_INPUT: copies the input tensor to branches. | 
 | // BRANCH_OUTPUT: copies the convolved tensor to branches. | 
 | // BRANCH_COMBINED: copies the combined (after convolving and branch combining) | 
 | //   tensor. If no combinations happen at this layer, then this option | 
 | //   has the same effect as COPY_OUTPUT. | 
 | enum { | 
 |   BRANCH_NO_COPY, | 
 |   BRANCH_INPUT, | 
 |   BRANCH_OUTPUT, | 
 |   BRANCH_COMBINED | 
 | } UENUM1BYTE(BRANCH_COPY); | 
 |  | 
 | // Types of combining branches with output of current layer: | 
 | // BRANCH_NOC: no branch combining | 
 | // BRANCH_ADD: Add previously stored branch tensor to output of layer | 
 | // BRANCH_CAT: Concatenate branch tensor to output of layer | 
 | enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE); | 
 |  | 
 | // The parameters used to scale each channel in batch | 
 | // normalization. The processing in done on a per-channel basis. | 
 | // e.g. bn_mean[c] is the mean for all pixels in channel c. This | 
 | // is always applied after activation. The output is given by | 
 | // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where | 
 | // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c] | 
 | // here we assume that the effect of variance_epsilon is already | 
 | // taken into account when bn_std is calculated. The pointers | 
 | // needs to be either all zero or all valid. If all zero, then | 
 | // batchnorm is disabled, else batchnorm is applied. | 
 | struct CNN_BATCHNORM_PARAMS { | 
 |   const float *bn_gamma; | 
 |   const float *bn_beta; | 
 |   const float *bn_mean; | 
 |   const float *bn_std; | 
 | }; | 
 |  | 
 | struct CNN_BRANCH_CONFIG { | 
 |   int input_to_branches;  // If nonzero, copy the active tensor to the current | 
 |   // layer and store for future use in branches | 
 |   // specified in the field as a binary mask. For | 
 |   // example, if input_to_branch = 0x06, it means the | 
 |   // input tensor to the current branch is copied to | 
 |   // branches 1 and 2 (where 0 represents the primary | 
 |   // branch). One restriction is that the mask | 
 |   // cannot indicate copying to the current branch. | 
 |   // If greater than 0, only copies the channels up | 
 |   // to the given index. | 
 |   int channels_to_copy;  // Within the layer, input a copy of active | 
 |   // tensor to branches given in input_to_branches. | 
 |   int branches_to_combine;  // mask of branches to combine with output of | 
 |   // current layer, if | 
 |   // branch_combine_type != BRANCH_NOC | 
 |   // For example, if branches_to_combine = 0x0A, | 
 |   // it means that braches 1 and 3 are combined | 
 |   // with the current branch. | 
 | }; | 
 |  | 
 | struct CNN_LAYER_CONFIG { | 
 |   int in_channels; | 
 |   int filter_width; | 
 |   int filter_height; | 
 |   int out_channels; | 
 |   int skip_width; | 
 |   int skip_height; | 
 |   int maxpool;            // whether to use maxpool or not (only effective when | 
 |                           // skip width or skip_height are > 1) | 
 |   const float *weights;   // array of length filter_height x filter_width x | 
 |                           // in_channels x out_channels where the inner-most | 
 |                           // scan is out_channels and the outer most scan is | 
 |                           // filter_height. | 
 |   const float *bias;      // array of length out_channels | 
 |   PADDING_TYPE pad;       // padding type | 
 |   ACTIVATION activation;  // the activation function to use after convolution | 
 |   int deconvolve;         // whether this is a deconvolution layer. | 
 |                           // 0: If skip_width or skip_height are > 1, then we | 
 |                           // reduce resolution | 
 |                           // 1: If skip_width or skip_height are > 1, then we | 
 |                           // increase resolution | 
 |   int branch;             // branch index in [0, CNN_MAX_BRANCHES - 1], where | 
 |                           // 0 refers to the primary branch. | 
 |   BRANCH_COPY branch_copy_type; | 
 |   BRANCH_COMBINE branch_combine_type; | 
 |   struct CNN_BRANCH_CONFIG branch_config; | 
 |   struct CNN_BATCHNORM_PARAMS | 
 |       bn_params;   // A struct that contains the parameters | 
 |                    // used for batch normalization. | 
 |   int output_num;  // The output buffer idx to which the layer output is | 
 |                    // written. Set to -1 to disable writing it to the output. In | 
 |                    // the case that branch_combine_type is BRANCH_CAT, all | 
 |                    // concatenated channels will be written to output. In the | 
 |                    // case of BRANCH_ADD, the output will be the result of | 
 |                    // summation. | 
 | }; | 
 |  | 
 | struct CNN_CONFIG { | 
 |   int num_layers;  // number of CNN layers ( = number of hidden layers + 1) | 
 |   int is_residue;  // whether the output activation is a residue | 
 |   int ext_width, ext_height;  // extension horizontally and vertically | 
 |   int strict_bounds;          // whether the input bounds are strict or not. | 
 |                               // If strict, the extension area is filled by | 
 |                               // replication; if not strict, image data is | 
 |                               // assumed available beyond the bounds. | 
 |   CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS]; | 
 | }; | 
 |  | 
 | struct CNN_THREAD_DATA { | 
 |   int num_workers; | 
 |   AVxWorker *workers; | 
 | }; | 
 |  | 
 | struct CNN_MULTI_OUT { | 
 |   int num_outputs; | 
 |   const int *output_channels; | 
 |   const int *output_strides; | 
 |   float **output_buffer; | 
 | }; | 
 |  | 
 | // Function to return size of output | 
 | void av1_find_cnn_output_size(int in_width, int in_height, | 
 |                               const CNN_CONFIG *cnn_config, int *out_width, | 
 |                               int *out_height, int *out_channels); | 
 |  | 
 | // Function to return output width and output height of given layer. | 
 | void av1_find_cnn_layer_output_size(int in_width, int in_height, | 
 |                                     const CNN_LAYER_CONFIG *layer_config, | 
 |                                     int *out_width, int *out_height); | 
 |  | 
 | // Prediction functions from set of input image buffers. This function supports | 
 | // CNN with multiple outputs. | 
 | bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, | 
 |                                    int stride, const CNN_CONFIG *cnn_config, | 
 |                                    const CNN_THREAD_DATA *thread_data, | 
 |                                    struct CNN_MULTI_OUT *output); | 
 | bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, | 
 |                                           int stride, | 
 |                                           const CNN_CONFIG *cnn_config, | 
 |                                           const CNN_THREAD_DATA *thread_data, | 
 |                                           int bit_depth, CNN_MULTI_OUT *output); | 
 | #ifdef __cplusplus | 
 | }  // extern "C" | 
 | #endif | 
 |  | 
 | #endif  // AOM_AV1_ENCODER_CNN_H_ |