|  | /* | 
|  | * Copyright (c) 2019, Alliance for Open Media. All rights reserved | 
|  | * | 
|  | * This source code is subject to the terms of the BSD 2 Clause License and | 
|  | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | * was not distributed with this source code in the LICENSE file, you can | 
|  | * obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | * Media Patent License 1.0 was not distributed with this source code in the | 
|  | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | */ | 
|  |  | 
|  | #ifndef AOM_AV1_COMMON_CNN_H_ | 
|  | #define AOM_AV1_COMMON_CNN_H_ | 
|  |  | 
|  | #ifdef __cplusplus | 
|  | extern "C" { | 
|  | #endif | 
|  |  | 
|  | #include <math.h> | 
|  |  | 
|  | #include "aom_util/aom_thread.h" | 
|  | #include "config/av1_rtcd.h" | 
|  |  | 
|  | struct AV1Common; | 
|  |  | 
|  | #define CNN_MAX_HIDDEN_LAYERS 64 | 
|  | #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1) | 
|  | #define CNN_MAX_CHANNELS 256 | 
|  | #define CNN_MAX_BRANCHES 4 | 
|  | #define CNN_MAX_THREADS 32 | 
|  |  | 
|  | #define NO_BRANCH_CONFIG \ | 
|  | { 0, 0, 0 } | 
|  | #define NO_BN_PARAMS \ | 
|  | { NULL, NULL, NULL, NULL } | 
|  |  | 
|  | enum { | 
|  | PADDING_SAME_ZERO,       // tensorflow's SAME padding with pixels outside | 
|  | // the image area assumed to be 0 (default) | 
|  | PADDING_SAME_REPLICATE,  // tensorflow's SAME padding with pixels outside | 
|  | // the image area replicated from closest edge | 
|  | PADDING_VALID            // tensorflow's VALID padding | 
|  | } UENUM1BYTE(PADDING_TYPE); | 
|  |  | 
|  | // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION); | 
|  |  | 
|  | // Times when input tensor may be copied to branches given in input_to_branches. | 
|  | // BRANCH_NO_COPY: doesn't copy any tensor. | 
|  | // BRANCH_INPUT: copies the input tensor to branches. | 
|  | // BRANCH_OUTPUT: copies the convolved tensor to branches. | 
|  | // BRANCH_COMBINED: copies the combined (after convolving and branch combining) | 
|  | //   tensor. If no combinations happen at this layer, then this option | 
|  | //   has the same effect as COPY_OUTPUT. | 
|  | enum { | 
|  | BRANCH_NO_COPY, | 
|  | BRANCH_INPUT, | 
|  | BRANCH_OUTPUT, | 
|  | BRANCH_COMBINED | 
|  | } UENUM1BYTE(BRANCH_COPY); | 
|  |  | 
|  | // Types of combining branches with output of current layer: | 
|  | // BRANCH_NOC: no branch combining | 
|  | // BRANCH_ADD: Add previously stored branch tensor to output of layer | 
|  | // BRANCH_CAT: Concatenate branch tensor to output of layer | 
|  | enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE); | 
|  |  | 
|  | // The parameters used to scale each channel in batch | 
|  | // normalization. The processing in done on a per-channel basis. | 
|  | // e.g. bn_mean[c] is the mean for all pixels in channel c. This | 
|  | // is always applied after activation. The output is given by | 
|  | // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where | 
|  | // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c] | 
|  | // here we assume that the effect of variance_epsilon is already | 
|  | // taken into account when bn_std is calculated. The pointers | 
|  | // needs to be either all zero or all valid. If all zero, then | 
|  | // batchnorm is disabled, else batchnorm is applied. | 
|  | struct CNN_BATCHNORM_PARAMS { | 
|  | const float *bn_gamma; | 
|  | const float *bn_beta; | 
|  | const float *bn_mean; | 
|  | const float *bn_std; | 
|  | }; | 
|  |  | 
|  | struct CNN_BRANCH_CONFIG { | 
|  | int input_to_branches;  // If nonzero, copy the active tensor to the current | 
|  | // layer and store for future use in branches | 
|  | // specified in the field as a binary mask. For | 
|  | // example, if input_to_branch = 0x06, it means the | 
|  | // input tensor to the current branch is copied to | 
|  | // branches 1 and 2 (where 0 represents the primary | 
|  | // branch). One restriction is that the mask | 
|  | // cannot indicate copying to the current branch. | 
|  | // If greater than 0, only copies the channels up | 
|  | // to the given index. | 
|  | int channels_to_copy;  // Within the layer, input a copy of active | 
|  | // tensor to branches given in input_to_branches. | 
|  | int branches_to_combine;  // mask of branches to combine with output of | 
|  | // current layer, if | 
|  | // branch_combine_type != BRANCH_NOC | 
|  | // For example, if branches_to_combine = 0x0A, | 
|  | // it means that braches 1 and 3 are combined | 
|  | // with the current branch. | 
|  | }; | 
|  |  | 
|  | struct CNN_LAYER_CONFIG { | 
|  | int in_channels; | 
|  | int filter_width; | 
|  | int filter_height; | 
|  | int out_channels; | 
|  | int skip_width; | 
|  | int skip_height; | 
|  | int maxpool;            // whether to use maxpool or not (only effective when | 
|  | // skip width or skip_height are > 1) | 
|  | const float *weights;   // array of length filter_height x filter_width x | 
|  | // in_channels x out_channels where the inner-most | 
|  | // scan is out_channels and the outer most scan is | 
|  | // filter_height. | 
|  | const float *bias;      // array of length out_channels | 
|  | PADDING_TYPE pad;       // padding type | 
|  | ACTIVATION activation;  // the activation function to use after convolution | 
|  | int deconvolve;         // whether this is a deconvolution layer. | 
|  | // 0: If skip_width or skip_height are > 1, then we | 
|  | // reduce resolution | 
|  | // 1: If skip_width or skip_height are > 1, then we | 
|  | // increase resolution | 
|  | int branch;             // branch index in [0, CNN_MAX_BRANCHES - 1], where | 
|  | // 0 refers to the primary branch. | 
|  | BRANCH_COPY branch_copy_type; | 
|  | BRANCH_COMBINE branch_combine_type; | 
|  | struct CNN_BRANCH_CONFIG branch_config; | 
|  | struct CNN_BATCHNORM_PARAMS | 
|  | bn_params;   // A struct that contains the parameters | 
|  | // used for batch normalization. | 
|  | int output_num;  // The output buffer idx to which the layer output is | 
|  | // written. Set to -1 to disable writing it to the output. In | 
|  | // the case that branch_combine_type is BRANCH_CAT, all | 
|  | // concatenated channels will be written to output. In the | 
|  | // case of BRANCH_ADD, the output will be the result of | 
|  | // summation. | 
|  | }; | 
|  |  | 
|  | struct CNN_CONFIG { | 
|  | int num_layers;  // number of CNN layers ( = number of hidden layers + 1) | 
|  | int is_residue;  // whether the output activation is a residue | 
|  | int ext_width, ext_height;  // extension horizontally and vertically | 
|  | int strict_bounds;          // whether the input bounds are strict or not. | 
|  | // If strict, the extension area is filled by | 
|  | // replication; if not strict, image data is | 
|  | // assumed available beyond the bounds. | 
|  | CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS]; | 
|  | }; | 
|  |  | 
|  | struct CNN_THREAD_DATA { | 
|  | int num_workers; | 
|  | AVxWorker *workers; | 
|  | }; | 
|  |  | 
|  | struct CNN_MULTI_OUT { | 
|  | int num_outputs; | 
|  | const int *output_channels; | 
|  | const int *output_strides; | 
|  | float **output_buffer; | 
|  | }; | 
|  |  | 
|  | // Function to return size of output | 
|  | void av1_find_cnn_output_size(int in_width, int in_height, | 
|  | const CNN_CONFIG *cnn_config, int *out_width, | 
|  | int *out_height, int *out_channels); | 
|  |  | 
|  | // Function to return output width and output height of given layer. | 
|  | void av1_find_cnn_layer_output_size(int in_width, int in_height, | 
|  | const CNN_LAYER_CONFIG *layer_config, | 
|  | int *out_width, int *out_height); | 
|  |  | 
|  | // Prediction functions from set of input image buffers. This function supports | 
|  | // CNN with multiple outputs. | 
|  | void av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, | 
|  | int stride, const CNN_CONFIG *cnn_config, | 
|  | const CNN_THREAD_DATA *thread_data, | 
|  | struct CNN_MULTI_OUT *output); | 
|  | void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, | 
|  | int stride, | 
|  | const CNN_CONFIG *cnn_config, | 
|  | const CNN_THREAD_DATA *thread_data, | 
|  | int bit_depth, CNN_MULTI_OUT *output); | 
|  |  | 
|  | // Prediction functions from set of input image buffers. This function only | 
|  | // supports a single output. | 
|  | void av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride, | 
|  | const CNN_CONFIG *cnn_config, | 
|  | const CNN_THREAD_DATA *thread_data, float **output, | 
|  | int out_stride); | 
|  | void av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height, | 
|  | int stride, const CNN_CONFIG *cnn_config, | 
|  | const CNN_THREAD_DATA *thread_data, | 
|  | int bit_depth, float **output, int out_stride); | 
|  |  | 
|  | #ifdef __cplusplus | 
|  | }  // extern "C" | 
|  | #endif | 
|  |  | 
|  | #endif  // AOM_AV1_COMMON_CNN_H_ |