| /* |
| * Copyright (c) 2019, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #ifndef AOM_AV1_ENCODER_CNN_H_ |
| #define AOM_AV1_ENCODER_CNN_H_ |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| #include <math.h> |
| #include <stdbool.h> |
| |
| #include "aom_util/aom_thread.h" |
| #include "config/av1_rtcd.h" |
| |
| struct AV1Common; |
| |
| #define CNN_MAX_HIDDEN_LAYERS 64 |
| #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1) |
| #define CNN_MAX_CHANNELS 256 |
| #define CNN_MAX_BRANCHES 4 |
| #define CNN_MAX_THREADS 32 |
| |
| #define NO_BRANCH_CONFIG \ |
| { 0, 0, 0 } |
| #define NO_BN_PARAMS \ |
| { NULL, NULL, NULL, NULL } |
| |
| enum { |
| PADDING_SAME_ZERO, // tensorflow's SAME padding with pixels outside |
| // the image area assumed to be 0 (default) |
| PADDING_SAME_REPLICATE, // tensorflow's SAME padding with pixels outside |
| // the image area replicated from closest edge |
| PADDING_VALID // tensorflow's VALID padding |
| } UENUM1BYTE(PADDING_TYPE); |
| |
| // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION); |
| |
| // Times when input tensor may be copied to branches given in input_to_branches. |
| // BRANCH_NO_COPY: doesn't copy any tensor. |
| // BRANCH_INPUT: copies the input tensor to branches. |
| // BRANCH_OUTPUT: copies the convolved tensor to branches. |
| // BRANCH_COMBINED: copies the combined (after convolving and branch combining) |
| // tensor. If no combinations happen at this layer, then this option |
| // has the same effect as COPY_OUTPUT. |
| enum { |
| BRANCH_NO_COPY, |
| BRANCH_INPUT, |
| BRANCH_OUTPUT, |
| BRANCH_COMBINED |
| } UENUM1BYTE(BRANCH_COPY); |
| |
| // Types of combining branches with output of current layer: |
| // BRANCH_NOC: no branch combining |
| // BRANCH_ADD: Add previously stored branch tensor to output of layer |
| // BRANCH_CAT: Concatenate branch tensor to output of layer |
| enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE); |
| |
| // The parameters used to scale each channel in batch |
| // normalization. The processing in done on a per-channel basis. |
| // e.g. bn_mean[c] is the mean for all pixels in channel c. This |
| // is always applied after activation. The output is given by |
| // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where |
| // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c] |
| // here we assume that the effect of variance_epsilon is already |
| // taken into account when bn_std is calculated. The pointers |
| // needs to be either all zero or all valid. If all zero, then |
| // batchnorm is disabled, else batchnorm is applied. |
| struct CNN_BATCHNORM_PARAMS { |
| const float *bn_gamma; |
| const float *bn_beta; |
| const float *bn_mean; |
| const float *bn_std; |
| }; |
| |
| struct CNN_BRANCH_CONFIG { |
| int input_to_branches; // If nonzero, copy the active tensor to the current |
| // layer and store for future use in branches |
| // specified in the field as a binary mask. For |
| // example, if input_to_branch = 0x06, it means the |
| // input tensor to the current branch is copied to |
| // branches 1 and 2 (where 0 represents the primary |
| // branch). One restriction is that the mask |
| // cannot indicate copying to the current branch. |
| // If greater than 0, only copies the channels up |
| // to the given index. |
| int channels_to_copy; // Within the layer, input a copy of active |
| // tensor to branches given in input_to_branches. |
| int branches_to_combine; // mask of branches to combine with output of |
| // current layer, if |
| // branch_combine_type != BRANCH_NOC |
| // For example, if branches_to_combine = 0x0A, |
| // it means that braches 1 and 3 are combined |
| // with the current branch. |
| }; |
| |
| struct CNN_LAYER_CONFIG { |
| int in_channels; |
| int filter_width; |
| int filter_height; |
| int out_channels; |
| int skip_width; |
| int skip_height; |
| int maxpool; // whether to use maxpool or not (only effective when |
| // skip width or skip_height are > 1) |
| const float *weights; // array of length filter_height x filter_width x |
| // in_channels x out_channels where the inner-most |
| // scan is out_channels and the outer most scan is |
| // filter_height. |
| const float *bias; // array of length out_channels |
| PADDING_TYPE pad; // padding type |
| ACTIVATION activation; // the activation function to use after convolution |
| int deconvolve; // whether this is a deconvolution layer. |
| // 0: If skip_width or skip_height are > 1, then we |
| // reduce resolution |
| // 1: If skip_width or skip_height are > 1, then we |
| // increase resolution |
| int branch; // branch index in [0, CNN_MAX_BRANCHES - 1], where |
| // 0 refers to the primary branch. |
| BRANCH_COPY branch_copy_type; |
| BRANCH_COMBINE branch_combine_type; |
| struct CNN_BRANCH_CONFIG branch_config; |
| struct CNN_BATCHNORM_PARAMS |
| bn_params; // A struct that contains the parameters |
| // used for batch normalization. |
| int output_num; // The output buffer idx to which the layer output is |
| // written. Set to -1 to disable writing it to the output. In |
| // the case that branch_combine_type is BRANCH_CAT, all |
| // concatenated channels will be written to output. In the |
| // case of BRANCH_ADD, the output will be the result of |
| // summation. |
| }; |
| |
| struct CNN_CONFIG { |
| int num_layers; // number of CNN layers ( = number of hidden layers + 1) |
| int is_residue; // whether the output activation is a residue |
| int ext_width, ext_height; // extension horizontally and vertically |
| int strict_bounds; // whether the input bounds are strict or not. |
| // If strict, the extension area is filled by |
| // replication; if not strict, image data is |
| // assumed available beyond the bounds. |
| CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS]; |
| }; |
| |
| struct CNN_THREAD_DATA { |
| int num_workers; |
| AVxWorker *workers; |
| }; |
| |
| struct CNN_MULTI_OUT { |
| int num_outputs; |
| const int *output_channels; |
| const int *output_strides; |
| float **output_buffer; |
| }; |
| |
| // Function to return size of output |
| void av1_find_cnn_output_size(int in_width, int in_height, |
| const CNN_CONFIG *cnn_config, int *out_width, |
| int *out_height, int *out_channels); |
| |
| // Function to return output width and output height of given layer. |
| void av1_find_cnn_layer_output_size(int in_width, int in_height, |
| const CNN_LAYER_CONFIG *layer_config, |
| int *out_width, int *out_height); |
| |
| // Prediction functions from set of input image buffers. This function supports |
| // CNN with multiple outputs. |
| bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, |
| int stride, const CNN_CONFIG *cnn_config, |
| const CNN_THREAD_DATA *thread_data, |
| struct CNN_MULTI_OUT *output); |
| bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, |
| int stride, |
| const CNN_CONFIG *cnn_config, |
| const CNN_THREAD_DATA *thread_data, |
| int bit_depth, CNN_MULTI_OUT *output); |
| #ifdef __cplusplus |
| } // extern "C" |
| #endif |
| |
| #endif // AOM_AV1_ENCODER_CNN_H_ |