blob: e99b8ed53acfd9250c1ac69316d1855f35cb2160 [file] [log] [blame]
* Copyright (c) 2020, Alliance for Open Media. All rights reserved
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at
#include <vector>
#include "aom_dsp/binary_codes_writer.h"
#include "av1/common/av1_common_int.h"
#include "av1/common/cnn_tflite.h"
#include "av1/tflite_models/op_registrations.h"
#include "av1/tflite_models/intra_frame_model/uv_qp0_90.h"
#include "av1/tflite_models/intra_frame_model/uv_qp91_120.h"
#include "av1/tflite_models/intra_frame_model/uv_qp121_145.h"
#include "av1/tflite_models/intra_frame_model/uv_qp146_175.h"
#include "av1/tflite_models/intra_frame_model/uv_qp176_205.h"
#include "av1/tflite_models/intra_frame_model/uv_qp206_255.h"
#include "av1/tflite_models/intra_frame_model/qp0_90.h"
#include "av1/tflite_models/intra_frame_model/qp91_120.h"
#include "av1/tflite_models/intra_frame_model/qp121_145.h"
#include "av1/tflite_models/intra_frame_model/qp146_175.h"
#include "av1/tflite_models/intra_frame_model/qp176_205.h"
#include "av1/tflite_models/intra_frame_model/qp206_255.h"
#include "av1/tflite_models/inter_frame_model/uv_qp0_90.h"
#include "av1/tflite_models/inter_frame_model/uv_qp91_120.h"
#include "av1/tflite_models/inter_frame_model/uv_qp121_145.h"
#include "av1/tflite_models/inter_frame_model/uv_qp146_175.h"
#include "av1/tflite_models/inter_frame_model/uv_qp176_205.h"
#include "av1/tflite_models/inter_frame_model/uv_qp206_255.h"
#include "av1/tflite_models/inter_frame_model/qp0_90.h"
#include "av1/tflite_models/inter_frame_model/qp91_120.h"
#include "av1/tflite_models/inter_frame_model/qp121_145.h"
#include "av1/tflite_models/inter_frame_model/qp146_175.h"
#include "av1/tflite_models/inter_frame_model/qp176_205.h"
#include "av1/tflite_models/inter_frame_model/qp206_255.h"
#include "av1/tflite_models/inter_frame_model/sr5by4ra_1_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr5by4ra_2_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr5by4ra_3_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr3by2ra_1_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr3by2ra_2_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr3by2ra_3_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr7by4ra_1_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr7by4ra_2_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr7by4ra_3_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr2by1ra_1_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr2by1ra_2_tflite.h"
#include "av1/tflite_models/inter_frame_model/sr2by1ra_3_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr5by4ai_1_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr5by4ai_2_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr5by4ai_3_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr3by2ai_1_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr3by2ai_2_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr3by2ai_3_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr7by4ai_1_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr7by4ai_2_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr7by4ai_3_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr2by1ai_1_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr2by1ai_2_tflite.h"
#include "av1/tflite_models/intra_frame_model/sr2by1ai_3_tflite.h"
#include "av1/tflite_models/inter_frame_model/qp0_90_quadtree.h"
#include "av1/tflite_models/inter_frame_model/qp91_120_quadtree.h"
#include "av1/tflite_models/inter_frame_model/qp121_145_quadtree.h"
#include "av1/tflite_models/inter_frame_model/qp146_175_quadtree.h"
#include "av1/tflite_models/inter_frame_model/qp176_205_quadtree.h"
#include "av1/tflite_models/inter_frame_model/qp206_255_quadtree.h"
#include "av1/tflite_models/intra_frame_model/qp0_90_quadtree.h"
#include "av1/tflite_models/intra_frame_model/qp91_120_quadtree.h"
#include "av1/tflite_models/intra_frame_model/qp121_145_quadtree.h"
#include "av1/tflite_models/intra_frame_model/qp146_175_quadtree.h"
#include "av1/tflite_models/intra_frame_model/qp176_205_quadtree.h"
#include "av1/tflite_models/intra_frame_model/qp206_255_quadtree.h"
#include "common/tf_lite_includes.h"
#define USE_XNNPACK 0
// Returns the TF-lite model based on the qindex.
static const unsigned char *get_intra_model_from_qindex(int qindex,
int superres_denom,
int is_luma,
int cnn_index) {
if (qindex <= MIN_CNN_Q_INDEX) {
return nullptr;
assert(superres_denom == SCALE_NUMERATOR || superres_denom == 10 ||
superres_denom == 12 || superres_denom == 14 || superres_denom == 16);
assert(superres_denom == SCALE_NUMERATOR);
if (superres_denom == SCALE_NUMERATOR) { // quadtree
if (is_luma) {
if (qindex <= 90) {
return (cnn_index == 0) ? qp0_90_quadtree_model_tflite_data
: (cnn_index == 1) ? qp91_120_quadtree_model_tflite_data
: qp121_145_quadtree_model_tflite_data;
} else if (qindex <= 120) {
return (cnn_index == 0) ? qp91_120_quadtree_model_tflite_data
: (cnn_index == 1) ? qp0_90_quadtree_model_tflite_data
: qp121_145_quadtree_model_tflite_data;
} else if (qindex <= 145) {
return (cnn_index == 0) ? qp121_145_quadtree_model_tflite_data
: (cnn_index == 1) ? qp91_120_quadtree_model_tflite_data
: qp146_175_quadtree_model_tflite_data;
} else if (qindex <= 175) {
return (cnn_index == 0) ? qp146_175_quadtree_model_tflite_data
: (cnn_index == 1) ? qp121_145_quadtree_model_tflite_data
: qp176_205_quadtree_model_tflite_data;
} else if (qindex <= 205) {
return (cnn_index == 0) ? qp176_205_quadtree_model_tflite_data
: (cnn_index == 1) ? qp146_175_quadtree_model_tflite_data
: qp206_255_quadtree_model_tflite_data;
} else {
return (cnn_index == 0) ? qp206_255_quadtree_model_tflite_data
: (cnn_index == 1) ? qp176_205_quadtree_model_tflite_data
: qp146_175_quadtree_model_tflite_data;
if (superres_denom == SCALE_NUMERATOR) {
if (is_luma) {
if (qindex < 91) {
return (cnn_index == 0) ? qp0_90_model_tflite_data
: (cnn_index == 1) ? qp91_120_model_tflite_data
: qp121_145_model_tflite_data;
} else if (qindex < 121) {
return (cnn_index == 0) ? qp91_120_model_tflite_data
: (cnn_index == 1) ? qp0_90_model_tflite_data
: qp121_145_model_tflite_data;
} else if (qindex < 146) {
return (cnn_index == 0) ? qp121_145_model_tflite_data
: (cnn_index == 1) ? qp91_120_model_tflite_data
: qp146_175_model_tflite_data;
} else if (qindex < 176) {
return (cnn_index == 0) ? qp146_175_model_tflite_data
: (cnn_index == 1) ? qp121_145_model_tflite_data
: qp176_205_model_tflite_data;
} else if (qindex < 206) {
return (cnn_index == 0) ? qp176_205_model_tflite_data
: (cnn_index == 1) ? qp146_175_model_tflite_data
: qp206_255_model_tflite_data;
} else {
return (cnn_index == 0) ? qp206_255_model_tflite_data
: (cnn_index == 1) ? qp176_205_model_tflite_data
: qp146_175_model_tflite_data;
} else {
assert(cnn_index == 0);
if (qindex < 91) {
return uv_qp0_90_model_tflite_data;
} else if (qindex < 121) {
return uv_qp91_120_model_tflite_data;
} else if (qindex < 146) {
return uv_qp121_145_model_tflite_data;
} else if (qindex < 176) {
return uv_qp146_175_model_tflite_data;
} else if (qindex < 206) {
return uv_qp176_205_model_tflite_data;
} else {
return uv_qp206_255_model_tflite_data;
switch (superres_denom) {
case 10:
return (cnn_index == 0) ? sr5by4ai_1_tflite
: (cnn_index == 1) ? sr5by4ai_2_tflite
: sr5by4ai_3_tflite;
case 12:
return (cnn_index == 0) ? sr3by2ai_1_tflite
: (cnn_index == 1) ? sr3by2ai_2_tflite
: sr3by2ai_3_tflite;
case 14:
return (cnn_index == 0) ? sr7by4ai_1_tflite
: (cnn_index == 1) ? sr7by4ai_2_tflite
: sr7by4ai_3_tflite;
case 16:
return (cnn_index == 0) ? sr2by1ai_1_tflite
: (cnn_index == 1) ? sr2by1ai_2_tflite
: sr2by1ai_3_tflite;
default: assert(0); return nullptr;
switch (superres_denom) {
case 10:
if (qindex < 120)
return sr5by4ai_1_tflite;
else if (qindex < 180)
return sr5by4ai_2_tflite;
return sr5by4ai_3_tflite;
case 12:
if (qindex < 120)
return sr3by2ai_1_tflite;
else if (qindex < 180)
return sr3by2ai_2_tflite;
return sr3by2ai_3_tflite;
case 14:
if (qindex < 120)
return sr7by4ai_1_tflite;
else if (qindex < 180)
return sr7by4ai_2_tflite;
return sr7by4ai_3_tflite;
case 16:
if (qindex < 120)
return sr2by1ai_1_tflite;
else if (qindex < 180)
return sr2by1ai_2_tflite;
return sr2by1ai_3_tflite;
default: assert(0); return nullptr;
return nullptr;
// Returns the TF-lite model based on the qindex.
static const unsigned char *get_inter_model_from_qindex(int qindex,
int superres_denom,
int is_luma,
int cnn_index) {
if (qindex <= MIN_CNN_Q_INDEX) {
return nullptr;
assert(superres_denom == SCALE_NUMERATOR || superres_denom == 10 ||
superres_denom == 12 || superres_denom == 14 || superres_denom == 16);
assert(superres_denom == SCALE_NUMERATOR);
if (superres_denom == SCALE_NUMERATOR) { // quadtree
if (is_luma) {
if (qindex <= 90) {
return (cnn_index == 0) ? qp0_90_quadtree_inter_model_tflite_data
: (cnn_index == 1) ? qp91_120_quadtree_inter_model_tflite_data
: qp121_145_quadtree_inter_model_tflite_data;
} else if (qindex <= 120) {
return (cnn_index == 0) ? qp91_120_quadtree_inter_model_tflite_data
: (cnn_index == 1) ? qp0_90_quadtree_inter_model_tflite_data
: qp121_145_quadtree_inter_model_tflite_data;
} else if (qindex <= 145) {
return (cnn_index == 0) ? qp121_145_quadtree_inter_model_tflite_data
: (cnn_index == 1) ? qp91_120_quadtree_inter_model_tflite_data
: qp146_175_quadtree_inter_model_tflite_data;
} else if (qindex <= 175) {
return (cnn_index == 0) ? qp146_175_quadtree_inter_model_tflite_data
: (cnn_index == 1) ? qp121_145_quadtree_inter_model_tflite_data
: qp176_205_quadtree_inter_model_tflite_data;
} else if (qindex <= 205) {
return (cnn_index == 0) ? qp176_205_quadtree_inter_model_tflite_data
: (cnn_index == 1) ? qp146_175_quadtree_inter_model_tflite_data
: qp206_255_quadtree_inter_model_tflite_data;
} else {
return (cnn_index == 0) ? qp206_255_quadtree_inter_model_tflite_data
: (cnn_index == 1) ? qp176_205_quadtree_inter_model_tflite_data
: qp146_175_quadtree_inter_model_tflite_data;
if (superres_denom == SCALE_NUMERATOR) {
if (is_luma) {
if (qindex < 91) {
return (cnn_index == 0) ? qp0_90_inter_model_tflite_data
: (cnn_index == 1) ? qp91_120_inter_model_tflite_data
: qp121_145_inter_model_tflite_data;
} else if (qindex < 121) {
return (cnn_index == 0) ? qp91_120_inter_model_tflite_data
: (cnn_index == 1) ? qp0_90_inter_model_tflite_data
: qp121_145_inter_model_tflite_data;
} else if (qindex < 146) {
return (cnn_index == 0) ? qp121_145_inter_model_tflite_data
: (cnn_index == 1) ? qp91_120_inter_model_tflite_data
: qp146_175_inter_model_tflite_data;
} else if (qindex < 176) {
return (cnn_index == 0) ? qp146_175_inter_model_tflite_data
: (cnn_index == 1) ? qp121_145_inter_model_tflite_data
: qp176_205_inter_model_tflite_data;
} else if (qindex < 206) {
return (cnn_index == 0) ? qp176_205_inter_model_tflite_data
: (cnn_index == 1) ? qp146_175_inter_model_tflite_data
: qp206_255_inter_model_tflite_data;
} else {
return (cnn_index == 0) ? qp206_255_inter_model_tflite_data
: (cnn_index == 1) ? qp176_205_inter_model_tflite_data
: qp146_175_inter_model_tflite_data;
} else {
assert(cnn_index == 0);
if (qindex < 91) {
return uv_qp0_90_inter_model_tflite_data;
} else if (qindex < 121) {
return uv_qp91_120_inter_model_tflite_data;
} else if (qindex < 146) {
return uv_qp121_145_inter_model_tflite_data;
} else if (qindex < 176) {
return uv_qp146_175_inter_model_tflite_data;
} else if (qindex < 206) {
return uv_qp176_205_inter_model_tflite_data;
} else {
return uv_qp206_255_inter_model_tflite_data;
switch (superres_denom) {
case 10:
if (qindex < 120)
return sr5by4ra_1_tflite;
else if (qindex < 180)
return sr5by4ra_2_tflite;
return sr5by4ra_3_tflite;
case 12:
if (qindex < 120)
return sr3by2ra_1_tflite;
else if (qindex < 180)
return sr3by2ra_2_tflite;
return sr3by2ra_3_tflite;
case 14:
if (qindex < 120)
return sr7by4ra_1_tflite;
else if (qindex < 180)
return sr7by4ra_2_tflite;
return sr7by4ra_3_tflite;
case 16:
if (qindex < 120)
return sr2by1ra_1_tflite;
else if (qindex < 180)
return sr2by1ra_2_tflite;
return sr2by1ra_3_tflite;
default: assert(0); return nullptr;
return nullptr;
static TfLiteDelegate *get_tflite_xnnpack_delegate(int num_threads) {
TfLiteXNNPackDelegateOptions xnnpack_options =
xnnpack_options.num_threads = AOMMAX(num_threads, 1);
return TfLiteXNNPackDelegateCreate(&xnnpack_options);
#endif // USE_XNNPACK
// Builds and returns the TFlite interpreter.
static std::unique_ptr<tflite::Interpreter> get_tflite_interpreter(
int qindex, int superres_denom, int width, int height, int num_threads,
int is_intra_only, int is_luma, int cnn_index
TfLiteDelegate *xnnpack_delegate
#endif // USE_XNNPACK
) {
const unsigned char *const model_tflite_data =
is_intra_only ? get_intra_model_from_qindex(qindex, superres_denom,
is_luma, cnn_index)
: get_inter_model_from_qindex(qindex, superres_denom,
is_luma, cnn_index);
auto model = tflite::GetModel(model_tflite_data);
tflite::MutableOpResolver resolver;
tflite::InterpreterBuilder builder(model, resolver);
// TODO(urvang): Investigate if caching the interpreter object provides
// further speed-up. May still have to re-build the interpreter if qindex
// changes.
std::unique_ptr<tflite::Interpreter> interpreter;
interpreter->SetNumThreads(AOMMAX(num_threads, 1));
tflite::ErrorReporter *reporter = tflite::DefaultErrorReporter();
// Dimension order: batch_size, height, width, num_channels.
// Note: height comes before width here!
const std::vector<int> in_out_dims = { 1, height, width, 1 };
// We only need to resize the input tensor. All other tensors (including
// output tensor) will be resized automatically.
if (interpreter->ResizeInputTensor(interpreter->inputs()[0], in_out_dims) !=
kTfLiteOk) {
reporter->Report("Failed at input tensor resize");
return nullptr;
if (interpreter->AllocateTensors() != kTfLiteOk) {
reporter->Report("Failed at tensor allocation");
return nullptr;
if (interpreter->ModifyGraphWithDelegate(xnnpack_delegate) != kTfLiteOk) {
reporter->Report("Failed at modifying graph with XNNPack delegate");
return nullptr;
#endif // USE_XNNPACK
return interpreter;
extern "C" int av1_restore_cnn_img_tflite_highbd(
int qindex, int superres_denom, const uint16_t *dgd, int width, int height,
int dgd_stride, uint16_t *rst, int rst_stride, int num_threads,
int bit_depth, int is_intra_only, int is_luma, int cnn_index) {
// Ensure image can be downscaled by factor of 8 on each axis
int padding_width = int(ceil(float(width) / 8.0) * 8);
int padding_height = int(ceil(float(height) / 8.0) * 8);
TfLiteDelegate *xnnpack_delegate = get_tflite_xnnpack_delegate(num_threads);
#endif // USE_XNNPACK
std::unique_ptr<tflite::Interpreter> interpreter = get_tflite_interpreter(
qindex, superres_denom, padding_width, padding_height, num_threads,
is_intra_only, is_luma, cnn_index
#endif // USE_XNNPACK
// Prepare input.
const auto max_val = static_cast<float>((1 << bit_depth) - 1);
const int in_stride = padding_width;
auto input = interpreter->typed_input_tensor<float>(0);
for (int r = 0; r < padding_height; ++r) {
for (int c = 0; c < padding_width; ++c) {
if (r < height && c < width) {
input[r * in_stride + c] =
static_cast<float>(dgd[r * dgd_stride + c]) / max_val;
assert(input[r * in_stride + c] >= 0.0f);
assert(input[r * in_stride + c] <= 1.0f);
} else {
// Padding with either zeros or by copies
// input[r * in_stride + c] = 0; // Pad with zeros
int w_copy_idx = c;
if (c >= width) {
w_copy_idx = width + (width - c) - 1;
int h_copy_idx = r;
if (r >= height) {
h_copy_idx = height + (height - r) - 1;
input[r * in_stride + c] = input[h_copy_idx * in_stride + w_copy_idx];
// Invoke TFlite inference.
tflite::ErrorReporter *reporter = tflite::DefaultErrorReporter();
auto status = interpreter->Invoke();
if (status != kTfLiteOk) {
reporter->Report("Failed at interpreter invocation");
return 0;
// Use the output to restore 'dgd' and store in 'rst'.
const auto output = interpreter->typed_output_tensor<float>(0);
const int out_stride = width;
for (int r = 0; r < height; ++r) {
for (int c = 0; c < width; ++c) {
const int residue =
static_cast<int>(output[r * out_stride + c] * max_val + 0.5);
rst[r * rst_stride + c] =
clip_pixel_highbd(dgd[r * dgd_stride + c] + residue, bit_depth);
// IMPORTANT: release the interpreter before destroying the delegate.
#endif // USE_XNNPACK
return 1;
extern "C" void av1_restore_cnn_tflite(const AV1_COMMON *cm, int num_threads,
const int apply_cnn[MAX_MB_PLANE],
const int cnn_indices[MAX_MB_PLANE]) {
YV12_BUFFER_CONFIG *buf = &cm->cur_frame->buf;
const int is_intra_only = frame_is_intra_only(cm);
for (int plane = 0; plane < av1_num_planes(cm); ++plane) {
if (!apply_cnn[plane]) continue;
const int is_luma = (plane == AOM_PLANE_Y);
const int cnn_index = cnn_indices[plane];
assert(cnn_index >= 0 &&
cnn_index < av1_num_cnn_indices_for_plane(cm, plane));
switch (plane) {
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->y_buffer), buf->y_crop_width,
buf->y_crop_height, buf->y_stride,
CONVERT_TO_SHORTPTR(buf->y_buffer), buf->y_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->u_buffer), buf->uv_crop_width,
buf->uv_crop_height, buf->uv_stride,
CONVERT_TO_SHORTPTR(buf->u_buffer), buf->uv_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->v_buffer), buf->uv_crop_width,
buf->uv_crop_height, buf->uv_stride,
CONVERT_TO_SHORTPTR(buf->v_buffer), buf->uv_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
default: assert(0 && "Invalid plane index");
// ------------------- Guided Quadtree: Common -------------------------------//
// Given single-channel input in 'dgd', generate intermediate 2-channel CNN
// output 'interm'.
static int generate_interm_guided_restoration(
const uint16_t *dgd, int dgd_stride, int qindex, int superres_denom,
int width, int height, int num_threads, int is_intra_only, int is_luma,
int cnn_index, int bit_depth,
std::vector<std::vector<std::vector<double>>> &interm) {
// Make sure we can downscale 4 times.
const int padding_width = (int)ceil(width * 1.0 / 16) * 16;
const int padding_height = (int)ceil(height * 1.0 / 16) * 16;
TfLiteDelegate *xnnpack_delegate = get_tflite_xnnpack_delegate(num_threads);
#endif // USE_XNNPACK
std::unique_ptr<tflite::Interpreter> interpreter = get_tflite_interpreter(
qindex, superres_denom, padding_width, padding_height, num_threads,
is_intra_only, is_luma, cnn_index
#endif // USE_XNNPACK
// Prepare input.
const auto max_val = static_cast<float>((1 << bit_depth) - 1);
const int in_stride = padding_width;
auto input = interpreter->typed_input_tensor<float>(0);
for (int r = 0; r < padding_height; ++r) {
for (int c = 0; c < padding_width; ++c) {
if (r < height && c < width) {
input[r * in_stride + c] =
static_cast<float>(dgd[r * dgd_stride + c]) / max_val;
assert(input[r * in_stride + c] >= 0.0f);
assert(input[r * in_stride + c] <= 1.0f);
} else {
input[r * in_stride + c] =
static_cast<float>(dgd[AOMMIN(r, height - 1) * dgd_stride +
AOMMIN(c, width - 1)]) /
// Invoke TFlite inference.
tflite::ErrorReporter *reporter = tflite::DefaultErrorReporter();
auto status = interpreter->Invoke();
if (status != kTfLiteOk) {
reporter->Report("Failed at interpreter invocation");
return 0;
// Store the output in 'interm'.
const auto output = interpreter->typed_output_tensor<float>(0);
const int out_stride = padding_width;
for (int r = 0; r < height; ++r) {
for (int c = 0; c < width; ++c) {
interm[r][c][0] = output[r * 2 * out_stride + c * 2] * max_val;
interm[r][c][1] = output[r * 2 * out_stride + c * 2 + 1] * max_val;
// Cleanup.
// IMPORTANT: release the interpreter before destroying the delegate.
#endif // USE_XNNPACK
return 1;
typedef enum {
} GuidedQuadTreePartitionType;
// Get unit width and height based on max size and partition type.
static void get_unit_size(int quadtree_max_size,
GuidedQuadTreePartitionType partition_type,
int *unit_width, int *unit_height) {
assert(partition_type >= 0 && partition_type < GUIDED_QT_TYPES);
const int full_size = quadtree_max_size;
const int half_size = quadtree_max_size >> 1;
*unit_width =
(partition_type == GUIDED_QT_NONE || partition_type == GUIDED_QT_HORZ)
? full_size
: half_size;
*unit_height =
(partition_type == GUIDED_QT_NONE || partition_type == GUIDED_QT_VERT)
? full_size
: half_size;
// ------------------- Guided Quadtree: Encoder ------------------------------//
// Given 2-channel intermediate output 'interm', degraded frame 'dgd' and source
// frame 'src', generates the single-channel output 'out' and corresponding
// linear combination weight pairs 'a'.
// Assumes that `width x height` area needs to be combined using unit of size
// `unit_width x unit_height`.
static void generate_linear_combination(
const std::vector<std::vector<std::vector<double>>> &interm,
const uint16_t *src, int src_stride, const uint16_t *dgd, int dgd_stride,
int start_row, int end_row, int start_col, int end_col, int unit_width,
int unit_height, const int *quadtset, int rdmult, const int *norestorecost,
int bit_depth, std::vector<std::vector<uint16_t>> &out,
std::vector<std::pair<int, int>> &A) {
const int scale0 = quadtset[0];
const int scale1 = quadtset[1];
const int A0_min = quadtset[2];
const int A1_min = quadtset[3];
for (int row = start_row; row < end_row; row += unit_height) {
const int this_start_row = row;
const int this_end_row = AOMMIN(row + unit_height, end_row);
for (int col = start_col; col < end_col; col += unit_width) {
const int this_start_col = col;
const int this_end_col = AOMMIN(col + unit_width, end_col);
const int num_pixels =
(this_end_row - this_start_row) * (this_end_col - this_start_col);
// Extract some flattened arrays.
std::vector<int> sub_r_flatten;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
sub_r_flatten.push_back(src[i * src_stride + j] -
dgd[i * dgd_stride + j]);
assert((int)sub_r_flatten.size() == num_pixels);
std::vector<double> sub_r0;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
assert((int)sub_r0.size() == num_pixels);
std::vector<double> sub_r1;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
assert((int)sub_r1.size() == num_pixels);
// Get R.
std::vector<std::vector<double>> R(num_pixels, std::vector<double>(2));
for (int i = 0; i < num_pixels; i++) {
R[i][0] = sub_r0[i];
R[i][1] = sub_r1[i];
// Get R^T.
std::vector<std::vector<double>> R_T(2, std::vector<double>(num_pixels));
for (int i = 0; i < num_pixels; i++) {
R_T[0][i] = sub_r0[i];
R_T[1][i] = sub_r1[i];
// Get R^T * R.
double R_TDotR[2][2] = { 0 };
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
for (int k = 0; k < num_pixels; k++) {
R_TDotR[i][j] += R_T[i][k] * R[k][j];
// Get (R^T * R)^-1.
const double value_R_TDotR =
R_TDotR[0][0] * R_TDotR[1][1] - R_TDotR[0][1] * R_TDotR[1][0];
double R_TDotR_inver[2][2] = {
{ R_TDotR[1][1] / value_R_TDotR, -1 * R_TDotR[0][1] / value_R_TDotR },
{ -1 * R_TDotR[1][0] / value_R_TDotR, R_TDotR[0][0] / value_R_TDotR }
// Get (R^T * R)^-1 * R^T.
std::vector<std::vector<double>> mid(2, std::vector<double>(num_pixels));
for (int j = 0; j < num_pixels; j++) {
mid[0][j] =
R_TDotR_inver[0][0] * R_T[0][j] + R_TDotR_inver[0][1] * R_T[1][j];
mid[1][j] =
R_TDotR_inver[1][0] * R_T[0][j] + R_TDotR_inver[1][1] * R_T[1][j];
// Compute A = (R^T * R)^-1 * R^T * residual.
double A0 = 0;
double A1 = 0;
for (int i = 0; i < num_pixels; i++) {
A0 += mid[0][i] * sub_r_flatten[i];
A1 += mid[1][i] * sub_r_flatten[i];
A0 = A0 * scale0;
A1 = A1 * scale1;
// Do a finer search for best A0, A1 pair amongst four options:
// (1) A0_floor = floor(A0), A1_floor = floor(A1)
// (2) A0_floor, A1_floor + 1
// (3) A0_floor + 1, A1_floor
// (4) A0_floor + 1, A1_floor + 1
const bool do_finer_search = true;
if (do_finer_search) {
double bestA0 = 0;
double bestA1 = 0;
double cost;
int64_t err = 0;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
const int diff = src[i * src_stride + j] - dgd[i * dgd_stride + j];
err += diff * diff;
rdmult, norestorecost[1] >> 4, err, bit_depth);
// finer search
double flrA0 = (floor(A0));
double flrA1 = (floor(A1));
flrA0 = AOMMIN(AOMMAX(flrA0, A0_min), A0_min + 15);
flrA1 = AOMMIN(AOMMAX(flrA1, A1_min), A1_min + 15);
A0 = flrA0;
A1 = flrA1;
err = 0;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
int rest = int(round(dgd[i * dgd_stride + j] +
A0 * interm[i][j][0] / scale0 +
A1 * interm[i][j][1] / scale1));
rest = clip_pixel_highbd(rest, bit_depth);
const int diff = src[i * src_stride + j] - rest;
err += diff * diff;
// approx RD cost assuming 7 bits per a0, a1 pair
rdmult, (norestorecost[0] + (7 << AV1_PROB_COST_SHIFT)) >> 4, err,
if (cost < bestcost) {
bestA0 = A0;
bestA1 = A1;
bestcost = cost;
if (flrA0 < A0_min + 15) {
A0 = flrA0 + 1;
A1 = flrA1;
err = 0;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
int rest = int(round(dgd[i * dgd_stride + j] +
A0 * interm[i][j][0] / scale0 +
A1 * interm[i][j][1] / scale1));
rest = clip_pixel_highbd(rest, bit_depth);
const int diff = src[i * src_stride + j] - rest;
err += diff * diff;
// approx RD cost assuming 7 bits per a0, a1 pair
rdmult, (norestorecost[0] + (7 << AV1_PROB_COST_SHIFT)) >> 4, err,
if (cost < bestcost) {
bestA0 = A0;
bestA1 = A1;
bestcost = cost;
if (flrA1 < A1_min + 15) {
A0 = flrA0;
A1 = flrA1 + 1;
err = 0;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
int rest = int(round(dgd[i * dgd_stride + j] +
A0 * interm[i][j][0] / scale0 +
A1 * interm[i][j][1] / scale1));
rest = clip_pixel_highbd(rest, bit_depth);
const int diff = src[i * src_stride + j] - rest;
err += diff * diff;
// approx RD cost assuming 7 bits per a0, a1 pair
rdmult, (norestorecost[0] + (7 << AV1_PROB_COST_SHIFT)) >> 4, err,
if (cost < bestcost) {
bestA0 = A0;
bestA1 = A1;
bestcost = cost;
if (flrA0 < A0_min + 15 && flrA1 < A1_min + 15) {
A0 = flrA0 + 1;
A1 = flrA1 + 1;
err = 0;
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
int rest = int(round(dgd[i * dgd_stride + j] +
A0 * interm[i][j][0] / scale0 +
A1 * interm[i][j][1] / scale1));
rest = clip_pixel_highbd(rest, bit_depth);
const int diff = src[i * src_stride + j] - rest;
err += diff * diff;
// approx RD cost assuming 7 bits per a0, a1 pair
rdmult, (norestorecost[0] + (7 << AV1_PROB_COST_SHIFT)) >> 4, err,
if (cost < bestcost) {
bestA0 = A0;
bestA1 = A1;
bestcost = cost;
A0 = bestA0;
A1 = bestA1;
} else {
A0 = (round(A0));
A1 = (round(A1));
A0 = AOMMIN(AOMMAX(A0, A0_min), A0_min + 15);
A1 = AOMMIN(AOMMAX(A1, A1_min), A1_min + 15);
A0 = AOMMIN(AOMMAX(A0, A0_min), A0_min + 15);
A1 = AOMMIN(AOMMAX(A1, A1_min), A1_min + 15);
A.emplace_back((int)A0, (int)A1);
for (int i = this_start_row; i < this_end_row; i++) {
for (int j = this_start_col; j < this_end_col; j++) {
const int out_unclipped = int(round(dgd[i * dgd_stride + j] +
A0 * interm[i][j][0] / scale0 +
A1 * interm[i][j][1] / scale1));
out[i - start_row][j - start_col] =
clip_pixel_highbd(out_unclipped, bit_depth);
#ifndef NDEBUG
const auto num_units_row =
(size_t)ceil((double)(end_row - start_row) / unit_height);
const auto num_units_col =
(size_t)ceil((double)(end_col - start_col) / unit_width);
assert(A.size() == num_units_row * num_units_col);
#endif // NDEBUG
// Computes SSE between 'rst' and 'src'.
static int64_t compute_sse(const std::vector<std::vector<uint16_t>> &rst,
const uint16_t *src, int src_stride, int start_row,
int end_row, int start_col, int end_col) {
int64_t sse = 0;
for (int r = start_row; r < end_row; ++r) {
for (int c = start_col; c < end_col; ++c) {
const uint16_t this_rst = rst[r - start_row][c - start_col];
const uint16_t this_src = src[r * src_stride + c];
const int64_t diff = (int64_t)(this_rst - this_src);
sse += diff * diff;
return sse;
// Computes bitrate for the given weight parameters.
static int compute_rate(const std::vector<std::pair<int, int>> &A,
const std::pair<int, int> &prev_A, const int *quadtset,
const int *norestorecosts) {
const int A0_min = quadtset[2];
const int A1_min = quadtset[3];
int num_bits = 0;
int ref0 = AOMMIN(AOMMAX(prev_A.first - A0_min, 0), 15);
int ref1 = AOMMIN(AOMMAX(prev_A.second - A1_min, 0), 15);
for (auto &this_A : A) {
if (this_A.first == 0 && this_A.second == 0) {
num_bits += norestorecosts[1];
} else {
num_bits += norestorecosts[0];
num_bits += (aom_count_primitive_refsubexpfin(16, 1, ref0,
this_A.first - A0_min) +
aom_count_primitive_refsubexpfin(16, 1, ref1,
this_A.second - A1_min))
ref0 = AOMMIN(AOMMAX(this_A.first - A0_min, 0), 15);
ref1 = AOMMIN(AOMMAX(this_A.second - A1_min, 0), 15);
return num_bits;
// Given 2-channel intermediate output in 'interm' as well as 'src' and 'dgd'
// buffers, tries the given partition type on a single quadtree unit. Outputs
// the RDCost in 'this_rdcost' and restored unit in 'out'.
static void try_one_partition(
const std::vector<std::vector<std::vector<double>>> &interm,
GuidedQuadTreePartitionType partition_type, const uint16_t *src,
int src_stride, const uint16_t *dgd, int dgd_stride, int start_row,
int end_row, int start_col, int end_col, int quadtree_max_size,
const int *quadtset, int rdmult, const std::pair<int, int> &prev_A,
const int *splitcosts, const int *norestorecosts, int bit_depth,
bool is_partial_unit, double *this_rdcost,
std::vector<std::vector<uint16_t>> &out,
std::vector<std::pair<int, int>> &A) {
assert(IMPLIES(is_partial_unit, partition_type == GUIDED_QT_NONE));
// Get unit width and height based on partition type.
int unit_width;
int unit_height;
get_unit_size(quadtree_max_size, partition_type, &unit_width, &unit_height);
// Compute restored unit, a0 and a1.
generate_linear_combination(interm, src, src_stride, dgd, dgd_stride,
start_row, end_row, start_col, end_col,
unit_width, unit_height, quadtset, rdmult,
norestorecosts, bit_depth, out, A);
assert(IMPLIES(partition_type == GUIDED_QT_NONE, A.size() == 1));
assert(IMPLIES(partition_type == GUIDED_QT_HORZ, A.size() == 2));
assert(IMPLIES(partition_type == GUIDED_QT_VERT, A.size() == 2));
assert(IMPLIES(partition_type == GUIDED_QT_SPLIT, A.size() == 4));
// Compute SSE.
const int64_t sse =
compute_sse(out, src, src_stride, start_row, end_row, start_col, end_col);
// Compute Rate.
const int num_bits_for_a = compute_rate(A, prev_A, quadtset, norestorecosts);
// Partition is implied to be NONE in case of partial unit.
const int partition_signaling_cost =
is_partial_unit ? 0 : splitcosts[partition_type];
const int bitrate = num_bits_for_a + partition_signaling_cost;
// Compute RDCost.
*this_rdcost =
RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, bitrate >> 4, sse, bit_depth);
// Given intermediate restoration 'interm', source 'src' and degradade frame
// 'dgd', computes the best partitioning out of NONE, SPLIT, HORZ and VERT based
// on RD cost for the widthxheight unit starting at 'row' and 'col'.
// The split decisions are stored in 'split' and a0,a1 pairs are stored in 'A'.
static void select_quadtree_partitioning(
const std::vector<std::vector<std::vector<double>>> &interm,
const uint16_t *src, int src_stride, int start_row, int start_col,
int width, int height, int quadtree_max_size, const int *quadtset,
int rdmult, const std::pair<int, int> &prev_A, const int *splitcosts,
const int norestorecosts[2], int bit_depth, const uint16_t *dgd,
int dgd_stride, std::vector<int> &split,
std::vector<std::pair<int, int>> &A, double *rdcost) {
const int end_row = AOMMIN(start_row + quadtree_max_size, height);
const int end_col = AOMMIN(start_col + quadtree_max_size, width);
const bool is_partial_unit = (start_row + quadtree_max_size > height) ||
(start_col + quadtree_max_size > width);
auto best_rdcost = DBL_MAX;
std::vector<std::pair<int, int>> best_A;
std::vector<std::vector<uint16_t>> best_out(
quadtree_max_size, std::vector<uint16_t>(quadtree_max_size));
GuidedQuadTreePartitionType best_partition_type = GUIDED_QT_INVALID;
for (int type = 0; type < GUIDED_QT_TYPES; ++type) {
const auto this_partition_type = (GuidedQuadTreePartitionType)type;
// Special case: if only partial unit is within boundary, we implicitly
// use NONE partitioning and do not try the splitting options.
if (is_partial_unit && (this_partition_type != GUIDED_QT_NONE)) {
double this_rdcost;
std::vector<std::pair<int, int>> this_A;
std::vector<std::vector<uint16_t>> this_out(
quadtree_max_size, std::vector<uint16_t>(quadtree_max_size));
try_one_partition(interm, this_partition_type, src, src_stride, dgd,
dgd_stride, start_row, end_row, start_col, end_col,
quadtree_max_size, quadtset, rdmult, prev_A, splitcosts,
norestorecosts, bit_depth, is_partial_unit, &this_rdcost,
this_out, this_A);
if (this_rdcost < best_rdcost) {
best_rdcost = this_rdcost;
best_A = this_A;
best_out = this_out;
best_partition_type = this_partition_type;
// Save RDCost.
*rdcost = best_rdcost;
// Save a0, a1 pairs.
for (auto &a0a1 : best_A) {
// Save split decision.
if (is_partial_unit) {
// Nothing should be added to 'split' array.
assert(best_partition_type == GUIDED_QT_NONE);
switch (best_partition_type) {
default: assert(0 && "Wrong partition type"); break;
static void apply_quadtree_partitioning(
const std::vector<std::vector<std::vector<double>>> &interm, int start_row,
int start_col, int width, int height, int quadtree_max_size,
const int *quadtset, int bit_depth, const std::vector<int> &split,
size_t &split_index, const std::vector<std::pair<int, int>> &A,
size_t &A_index, uint16_t *dgd, int dgd_stride);
// Top-level function to apply guided restoration on encoder side.
static int restore_cnn_quadtree_encode_img_tflite_highbd(
YV12_BUFFER_CONFIG *source_frame, AV1_COMMON *cm, int superres_denom,
int rdmult, const int *splitcosts, int (*norestorecosts)[2],
int num_threads, int bit_depth, int is_intra_only, int is_luma,
int cnn_index, QUADInfo *quad_info, double *rdcost) {
YV12_BUFFER_CONFIG *dgd_buf = &cm->cur_frame->buf;
uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd_buf->y_buffer);
const int dgd_stride = dgd_buf->y_stride;
const int qindex = cm->quant_params.base_qindex;
const int width = cm->superres_upscaled_width;
const int height = cm->superres_upscaled_height;
// Get 2-channel intermediate restoration.
std::vector<std::vector<std::vector<double>>> interm(
height, std::vector<std::vector<double>>(width, std::vector<double>(2)));
if (!generate_interm_guided_restoration(
dgd, dgd_stride, qindex, superres_denom, width, height, num_threads,
is_intra_only, is_luma, cnn_index, bit_depth, interm)) {
return 0;
// Initialization.
const uint16_t *src = CONVERT_TO_SHORTPTR(source_frame->y_buffer);
const int src_stride = source_frame->y_stride;
const int *quadtset = get_quadparm_from_qindex(
qindex, superres_denom, is_intra_only, is_luma, cnn_index);
const int A0_min = quadtset[2];
const int A1_min = quadtset[3];
const int norestore_ctx =
get_guided_norestore_ctx(qindex, superres_denom, is_intra_only);
const int null_norestorecosts[2] = { 0, 0 };
const int *this_norestorecosts =
norestore_ctx == -1 ? null_norestorecosts : norestorecosts[norestore_ctx];
// Try all possible quadtree unit sizes.
int best_unit_index = -1;
std::vector<int> best_split; // selected partitioning options.
std::vector<std::pair<int, int>> best_A; // selected a0, a1 weight pairs.
double best_rdcost_total = DBL_MAX;
for (int this_unit_index = 0; this_unit_index <= 1; ++this_unit_index) {
const int quadtree_max_size =
quad_tree_get_unit_size(width, height, this_unit_index);
// For each quadtree unit, compute the best partitioning out of
// NONE, SPLIT, HORZ and VERT based on RD cost.
std::vector<int> this_split; // selected partitioning options.
std::vector<std::pair<int, int>> this_A; // selected a0, a1 weight pairs.
double this_rdcost_total = 0.0;
// Previous a0, a1 pair is mid-point of the range by default.
std::pair<int, int> prev_A = std::make_pair(8 + A0_min, 8 + A1_min);
// TODO(urvang): Include padded area in a unit if it's < unit size / 2?
// If so, need to modify / replace quad_tree_get_unit_info_length().
// Also double check: quad_tree_get_split_info_length().
for (int row = 0; row < height; row += quadtree_max_size) {
for (int col = 0; col < width; col += quadtree_max_size) {
double this_rdcost;
interm, src, src_stride, row, col, width, height, quadtree_max_size,
quadtset, rdmult, prev_A, splitcosts, this_norestorecosts,
bit_depth, dgd, dgd_stride, this_split, this_A, &this_rdcost);
// updates.
this_rdcost_total += this_rdcost;
prev_A = this_A.back();
// Update best options.
if (this_rdcost_total < best_rdcost_total) {
best_unit_index = this_unit_index;
best_split = this_split;
best_A = this_A;
best_rdcost_total = this_rdcost_total;
// Fill in the best options.
quad_info->unit_index = best_unit_index;
quad_info->split_info_length = (int)best_split.size();
quad_info->unit_info_length = (int)best_A.size();
av1_alloc_quadtree_struct(cm, quad_info);
for (unsigned int i = 0; i < best_split.size(); ++i) {
quad_info->split_info[i].split = best_split[i];
for (unsigned int i = 0; i < best_A.size(); ++i) {
quad_info->unit_info[i].xqd[0] = best_A[i].first;
quad_info->unit_info[i].xqd[1] = best_A[i].second;
*rdcost = best_rdcost_total;
// Apply guided restoration to 'dgd' using best options above.
size_t split_index = 0;
size_t A_index = 0;
for (int row = 0; row < height; row += quad_info->unit_size) {
for (int col = 0; col < width; col += quad_info->unit_size) {
interm, row, col, width, height, quad_info->unit_size, quadtset,
bit_depth, best_split, split_index, best_A, A_index, dgd, dgd_stride);
return 1;
extern "C" int av1_restore_cnn_quadtree_encode_tflite(
AV1_COMMON *cm, YV12_BUFFER_CONFIG *source_frame, int RDMULT,
int *splitcosts, int (*norestorecosts)[2], int num_threads,
const int apply_cnn[MAX_MB_PLANE], const int cnn_indices[MAX_MB_PLANE],
QUADInfo *quad_info, double *rdcost) {
YV12_BUFFER_CONFIG *buf = &cm->cur_frame->buf;
const int is_intra_only = frame_is_intra_only(cm);
for (int plane = 0; plane < av1_num_planes(cm); ++plane) {
if (!apply_cnn[plane]) continue;
const int is_luma = (plane == AOM_PLANE_Y);
const int cnn_index = cnn_indices[plane];
assert(cnn_index >= 0 &&
cnn_index < av1_num_cnn_indices_for_plane(cm, plane));
int ret = 1;
switch (plane) {
ret = restore_cnn_quadtree_encode_img_tflite_highbd(
source_frame, cm, cm->superres_scale_denominator, RDMULT,
splitcosts, norestorecosts, num_threads, cm->seq_params.bit_depth,
is_intra_only, is_luma, cnn_index, quad_info, rdcost);
if (ret == 0) return ret;
ret = av1_restore_cnn_img_tflite_highbd(
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->u_buffer), buf->uv_crop_width,
buf->uv_crop_height, buf->uv_stride,
CONVERT_TO_SHORTPTR(buf->u_buffer), buf->uv_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
if (ret == 0) return ret;
ret = av1_restore_cnn_img_tflite_highbd(
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->v_buffer), buf->uv_crop_width,
buf->uv_crop_height, buf->uv_stride,
CONVERT_TO_SHORTPTR(buf->v_buffer), buf->uv_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
if (ret == 0) return ret;
default: assert(0 && "Invalid plane index"); return 0;
return 1;
// ------------------- Guided Quadtree: Decoder ------------------------------//
// Given the 2-channel intermediate output in 'interm' and weight parameters,
// restores one quadtree unit in 'dgd'.
static void apply_linear_combination(
const std::vector<std::vector<std::vector<double>>> &interm, int start_row,
int end_row, int start_col, int end_col, int unit_width, int unit_height,
const int *quadtset, int bit_depth,
const std::vector<std::pair<int, int>> &A, size_t &A_index, uint16_t *dgd,
int dgd_stride) {
// Get scale parameters.
const int scale0 = quadtset[0];
const int scale1 = quadtset[1];
for (int row = start_row; row < end_row; row += unit_height) {
const int this_start_row = row;
const int this_end_row = AOMMIN(row + unit_height, end_row);
for (int col = start_col; col < end_col; col += unit_width) {
const int this_start_col = col;
const int this_end_col = AOMMIN(col + unit_width, end_col);
// Get weight parameters for this unit.
const auto this_A = A[A_index++];
const int a0 = this_A.first;
const int a1 = this_A.second;
// Restore this unit.
for (int r = this_start_row; r < this_end_row; ++r) {
for (int c = this_start_col; c < this_end_col; ++c) {
const int dgd_unclipped = int(round(dgd[r * dgd_stride + c] +
a0 * interm[r][c][0] / scale0 +
a1 * interm[r][c][1] / scale1));
dgd[r * dgd_stride + c] = clip_pixel_highbd(dgd_unclipped, bit_depth);
// Given intermediate restoration 'interm', quadtree partitioning info 'split'
// and weight parameters 'A', restores the unit starting at 'row' and 'col'
// inside 'dgd'.
static void apply_quadtree_partitioning(
const std::vector<std::vector<std::vector<double>>> &interm, int start_row,
int start_col, int width, int height, int quadtree_max_size,
const int *quadtset, int bit_depth, const std::vector<int> &split,
size_t &split_index, const std::vector<std::pair<int, int>> &A,
size_t &A_index, uint16_t *dgd, int dgd_stride) {
const int end_row = AOMMIN(start_row + quadtree_max_size, height);
const int end_col = AOMMIN(start_col + quadtree_max_size, width);
const bool is_partial_unit = (start_row + quadtree_max_size > height) ||
(start_col + quadtree_max_size > width);
// Get partition type.
GuidedQuadTreePartitionType partition_type = GUIDED_QT_NONE;
if (!is_partial_unit) {
const int spl1 = split[split_index++];
const int spl2 = split[split_index++];
if (spl1 == 0) {
if (spl2 == 0) {
partition_type = GUIDED_QT_NONE; // (0, 0)
} else {
assert(spl2 == 1);
partition_type = GUIDED_QT_SPLIT; // (0, 1)
} else {
assert(spl1 == 1);
if (spl2 == 1) {
partition_type = GUIDED_QT_HORZ; // (1, 1)
} else {
assert(spl2 == 0);
partition_type = GUIDED_QT_VERT; // (1, 0)
assert(partition_type >= 0 && partition_type < GUIDED_QT_TYPES);
// Get unit width and height based on partition type.
int unit_width;
int unit_height;
get_unit_size(quadtree_max_size, partition_type, &unit_width, &unit_height);
// Compute restored unit, a0 and a1 with given A parameters.
apply_linear_combination(interm, start_row, end_row, start_col, end_col,
unit_width, unit_height, quadtset, bit_depth, A,
A_index, dgd, dgd_stride);
// Top-level function to apply guided restoration on decoder side.
static int restore_cnn_quadtree_decode_img_tflite_highbd(
AV1_COMMON *cm, int superres_denom, int num_threads, int bit_depth,
int is_intra_only, int is_luma, int cnn_index) {
YV12_BUFFER_CONFIG *dgd_buf = &cm->cur_frame->buf;
uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd_buf->y_buffer);
const int dgd_stride = dgd_buf->y_stride;
const int qindex = cm->quant_params.base_qindex;
const int width = cm->superres_upscaled_width;
const int height = cm->superres_upscaled_height;
// Get 2-channel intermediate restoration.
std::vector<std::vector<std::vector<double>>> interm(
height, std::vector<std::vector<double>>(width, std::vector<double>(2)));
if (!generate_interm_guided_restoration(
dgd, dgd_stride, qindex, superres_denom, width, height, num_threads,
is_intra_only, is_luma, cnn_index, bit_depth, interm)) {
return 0;
// Get quadtree params.
const QUADInfo *const quad_info = &cm->cnn_quad_info;
const int quadtree_max_size = quad_info->unit_size;
const int *quadtset = get_quadparm_from_qindex(
qindex, superres_denom, is_intra_only, is_luma, cnn_index);
// Get partitioning types.
std::vector<int> split;
for (int i = 0; i < quad_info->split_info_length; ++i) {
// Get a0,a1 pairs.
std::vector<std::pair<int, int>> A;
for (int i = 0; i < quad_info->unit_info_length; ++i) {
// For each quadtree unit, apply given quadtree partitioning.
size_t split_index = 0;
size_t A_index = 0;
for (int row = 0; row < height; row += quadtree_max_size) {
for (int col = 0; col < width; col += quadtree_max_size) {
apply_quadtree_partitioning(interm, row, col, width, height,
quadtree_max_size, quadtset, bit_depth, split,
split_index, A, A_index, dgd, dgd_stride);
assert(split_index == split.size());
assert(A_index == A.size());
return 1;
extern "C" int av1_restore_cnn_quadtree_decode_tflite(
struct AV1Common *cm, int num_threads, int use_quadtree,
const int apply_cnn[MAX_MB_PLANE], const int cnn_indices[MAX_MB_PLANE]) {
YV12_BUFFER_CONFIG *buf = &cm->cur_frame->buf;
const int is_intra_only = frame_is_intra_only(cm);
for (int plane = 0; plane < av1_num_planes(cm); ++plane) {
if (!apply_cnn[plane]) continue;
const int is_luma = (plane == AOM_PLANE_Y);
if (is_luma && !use_quadtree) continue;
const int cnn_index = cnn_indices[plane];
assert(cnn_index >= 0 &&
cnn_index < av1_num_cnn_indices_for_plane(cm, plane));
int ret = 1;
switch (plane) {
ret = restore_cnn_quadtree_decode_img_tflite_highbd(
cm, cm->superres_scale_denominator, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
if (ret == 0) return ret;
ret = av1_restore_cnn_img_tflite_highbd(
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->u_buffer), buf->uv_crop_width,
buf->uv_crop_height, buf->uv_stride,
CONVERT_TO_SHORTPTR(buf->u_buffer), buf->uv_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
if (ret == 0) return ret;
ret = av1_restore_cnn_img_tflite_highbd(
cm->quant_params.base_qindex, cm->superres_scale_denominator,
CONVERT_TO_SHORTPTR(buf->v_buffer), buf->uv_crop_width,
buf->uv_crop_height, buf->uv_stride,
CONVERT_TO_SHORTPTR(buf->v_buffer), buf->uv_stride, num_threads,
cm->seq_params.bit_depth, is_intra_only, is_luma, cnn_index);
if (ret == 0) return ret;
default: assert(0 && "Invalid plane index"); return 0;
return 1;