|  | /* | 
|  | * Copyright (c) 2021, Alliance for Open Media. All rights reserved | 
|  | * | 
|  | * This source code is subject to the terms of the BSD 3-Clause Clear License | 
|  | * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear | 
|  | * License was not distributed with this source code in the LICENSE file, you | 
|  | * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the | 
|  | * Alliance for Open Media Patent License 1.0 was not distributed with this | 
|  | * source code in the PATENTS file, you can obtain it at | 
|  | * aomedia.org/license/patent-license/. | 
|  | */ | 
|  |  | 
|  | #include <math.h> | 
|  |  | 
|  | #include "config/aom_dsp_rtcd.h" | 
|  | #include "config/av1_rtcd.h" | 
|  |  | 
|  | #include "aom_ports/mem.h" | 
|  | #include "av1/common/av1_txfm.h" | 
|  | #include "av1/common/blockd.h" | 
|  | #include "av1/common/enums.h" | 
|  | #include "av1/common/idct.h" | 
|  | #include "av1/common/scan.h" | 
|  | #include "av1/common/txb_common.h" | 
|  |  | 
|  | static INLINE void clamp_buf(int32_t *buf, int32_t size, int8_t bit) { | 
|  | for (int i = 0; i < size; ++i) buf[i] = clamp_value(buf[i], bit); | 
|  | } | 
|  |  | 
|  | void inv_txfm_dct2_size4_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | int j; | 
|  | int a[2], b[2]; | 
|  | int add = 1 << (shift - 1); | 
|  |  | 
|  | const int *tx_mat = tx_kernel_dct2_size4[INV_TXFM][0]; | 
|  |  | 
|  | const int nz_line = line - skip_line; | 
|  | for (j = 0; j < nz_line; j++) { | 
|  | b[0] = tx_mat[1 * 4 + 0] * src[1] + tx_mat[3 * 4 + 0] * src[3]; | 
|  | b[1] = tx_mat[1 * 4 + 1] * src[1] + tx_mat[3 * 4 + 1] * src[3]; | 
|  | a[0] = tx_mat[0 * 4 + 0] * src[0] + tx_mat[2 * 4 + 0] * src[2]; | 
|  | a[1] = tx_mat[0 * 4 + 1] * src[0] + tx_mat[2 * 4 + 1] * src[2]; | 
|  |  | 
|  | dst[0 * line] = clamp((a[0] + b[0] + add) >> shift, coef_min, coef_max); | 
|  | dst[1 * line] = clamp((a[1] + b[1] + add) >> shift, coef_min, coef_max); | 
|  | dst[2 * line] = clamp((a[1] - b[1] + add) >> shift, coef_min, coef_max); | 
|  | dst[3 * line] = clamp((a[0] - b[0] + add) >> shift, coef_min, coef_max); | 
|  |  | 
|  | src += 4; | 
|  | dst++; | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_dct2_size8_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | int j, k; | 
|  | int a[4], b[4]; | 
|  | int c[2], d[2]; | 
|  | int add = 1 << (shift - 1); | 
|  |  | 
|  | const int *tx_mat = tx_kernel_dct2_size8[INV_TXFM][0]; | 
|  |  | 
|  | const int nz_line = line - skip_line; | 
|  | for (j = 0; j < nz_line; j++) { | 
|  | for (k = 0; k < 4; k++) { | 
|  | b[k] = tx_mat[1 * 8 + k] * src[1] + tx_mat[3 * 8 + k] * src[3] + | 
|  | tx_mat[5 * 8 + k] * src[5] + tx_mat[7 * 8 + k] * src[7]; | 
|  | } | 
|  |  | 
|  | d[0] = tx_mat[2 * 8 + 0] * src[2] + tx_mat[6 * 8 + 0] * src[6]; | 
|  | d[1] = tx_mat[2 * 8 + 1] * src[2] + tx_mat[6 * 8 + 1] * src[6]; | 
|  | c[0] = tx_mat[0 * 8 + 0] * src[0] + tx_mat[4 * 8 + 0] * src[4]; | 
|  | c[1] = tx_mat[0 * 8 + 1] * src[0] + tx_mat[4 * 8 + 1] * src[4]; | 
|  |  | 
|  | a[0] = c[0] + d[0]; | 
|  | a[3] = c[0] - d[0]; | 
|  | a[1] = c[1] + d[1]; | 
|  | a[2] = c[1] - d[1]; | 
|  |  | 
|  | for (k = 0; k < 4; k++) { | 
|  | dst[(k)*line] = clamp((a[k] + b[k] + add) >> shift, coef_min, coef_max); | 
|  | dst[(k + 4) * line] = | 
|  | clamp((a[3 - k] - b[3 - k] + add) >> shift, coef_min, coef_max); | 
|  | } | 
|  | src += 8; | 
|  | dst++; | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_dct2_size16_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | int j, k; | 
|  | int a[8], b[8]; | 
|  | int c[4], d[4]; | 
|  | int e[2], f[2]; | 
|  | int add = 1 << (shift - 1); | 
|  |  | 
|  | const int *tx_mat = tx_kernel_dct2_size16[INV_TXFM][0]; | 
|  |  | 
|  | const int nz_line = line - skip_line; | 
|  |  | 
|  | for (j = 0; j < nz_line; j++) { | 
|  | for (k = 0; k < 8; k++) { | 
|  | b[k] = tx_mat[1 * 16 + k] * src[1] + tx_mat[3 * 16 + k] * src[3] + | 
|  | tx_mat[5 * 16 + k] * src[5] + tx_mat[7 * 16 + k] * src[7] + | 
|  | tx_mat[9 * 16 + k] * src[9] + tx_mat[11 * 16 + k] * src[11] + | 
|  | tx_mat[13 * 16 + k] * src[13] + tx_mat[15 * 16 + k] * src[15]; | 
|  | } | 
|  | for (k = 0; k < 4; k++) { | 
|  | d[k] = tx_mat[2 * 16 + k] * src[2] + tx_mat[6 * 16 + k] * src[6] + | 
|  | tx_mat[10 * 16 + k] * src[10] + tx_mat[14 * 16 + k] * src[14]; | 
|  | } | 
|  | f[0] = tx_mat[4 * 16] * src[4] + tx_mat[12 * 16] * src[12]; | 
|  | e[0] = tx_mat[0] * src[0] + tx_mat[8 * 16] * src[8]; | 
|  | f[1] = tx_mat[4 * 16 + 1] * src[4] + tx_mat[12 * 16 + 1] * src[12]; | 
|  | e[1] = tx_mat[0 * 16 + 1] * src[0] + tx_mat[8 * 16 + 1] * src[8]; | 
|  | for (k = 0; k < 2; k++) { | 
|  | c[k] = e[k] + f[k]; | 
|  | c[k + 2] = e[1 - k] - f[1 - k]; | 
|  | } | 
|  | for (k = 0; k < 4; k++) { | 
|  | a[k] = c[k] + d[k]; | 
|  | a[k + 4] = c[3 - k] - d[3 - k]; | 
|  | } | 
|  | for (k = 0; k < 8; k++) { | 
|  | dst[(k)*line] = clamp((a[k] + b[k] + add) >> shift, coef_min, coef_max); | 
|  | dst[(k + 8) * line] = | 
|  | clamp((a[7 - k] - b[7 - k] + add) >> shift, coef_min, coef_max); | 
|  | } | 
|  | src += 16; | 
|  | dst++; | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_dct2_size32_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | int j, k; | 
|  | int a[16], b[16]; | 
|  | int c[8], d[8]; | 
|  | int e[4], f[4]; | 
|  | int g[2], h[2]; | 
|  | int add = 1 << (shift - 1); | 
|  |  | 
|  | const int *tx_mat = tx_kernel_dct2_size32[INV_TXFM][0]; | 
|  |  | 
|  | const int nz_line = line - skip_line; | 
|  |  | 
|  | for (j = 0; j < nz_line; j++) { | 
|  | for (k = 0; k < 16; k++) { | 
|  | b[k] = tx_mat[1 * 32 + k] * src[1] + tx_mat[3 * 32 + k] * src[3] + | 
|  | tx_mat[5 * 32 + k] * src[5] + tx_mat[7 * 32 + k] * src[7] + | 
|  | tx_mat[9 * 32 + k] * src[9] + tx_mat[11 * 32 + k] * src[11] + | 
|  | tx_mat[13 * 32 + k] * src[13] + tx_mat[15 * 32 + k] * src[15] + | 
|  | tx_mat[17 * 32 + k] * src[17] + tx_mat[19 * 32 + k] * src[19] + | 
|  | tx_mat[21 * 32 + k] * src[21] + tx_mat[23 * 32 + k] * src[23] + | 
|  | tx_mat[25 * 32 + k] * src[25] + tx_mat[27 * 32 + k] * src[27] + | 
|  | tx_mat[29 * 32 + k] * src[29] + tx_mat[31 * 32 + k] * src[31]; | 
|  | } | 
|  |  | 
|  | for (k = 0; k < 8; k++) { | 
|  | d[k] = tx_mat[2 * 32 + k] * src[2] + tx_mat[6 * 32 + k] * src[6] + | 
|  | tx_mat[10 * 32 + k] * src[10] + tx_mat[14 * 32 + k] * src[14] + | 
|  | tx_mat[18 * 32 + k] * src[18] + tx_mat[22 * 32 + k] * src[22] + | 
|  | tx_mat[26 * 32 + k] * src[26] + tx_mat[30 * 32 + k] * src[30]; | 
|  | } | 
|  | for (k = 0; k < 4; k++) { | 
|  | f[k] = tx_mat[4 * 32 + k] * src[4] + tx_mat[12 * 32 + k] * src[12] + | 
|  | tx_mat[20 * 32 + k] * src[20] + tx_mat[28 * 32 + k] * src[28]; | 
|  | } | 
|  | h[0] = tx_mat[8 * 32 + 0] * src[8] + tx_mat[24 * 32 + 0] * src[24]; | 
|  | h[1] = tx_mat[8 * 32 + 1] * src[8] + tx_mat[24 * 32 + 1] * src[24]; | 
|  | g[0] = tx_mat[0 * 32 + 0] * src[0] + tx_mat[16 * 32 + 0] * src[16]; | 
|  | g[1] = tx_mat[0 * 32 + 1] * src[0] + tx_mat[16 * 32 + 1] * src[16]; | 
|  |  | 
|  | e[0] = g[0] + h[0]; | 
|  | e[3] = g[0] - h[0]; | 
|  | e[1] = g[1] + h[1]; | 
|  | e[2] = g[1] - h[1]; | 
|  | for (k = 0; k < 4; k++) { | 
|  | c[k] = e[k] + f[k]; | 
|  | c[k + 4] = e[3 - k] - f[3 - k]; | 
|  | } | 
|  | for (k = 0; k < 8; k++) { | 
|  | a[k] = c[k] + d[k]; | 
|  | a[k + 8] = c[7 - k] - d[7 - k]; | 
|  | } | 
|  | for (k = 0; k < 16; k++) { | 
|  | dst[(k)*line] = clamp((a[k] + b[k] + add) >> shift, coef_min, coef_max); | 
|  | dst[(k + 16) * line] = | 
|  | clamp((a[15 - k] - b[15 - k] + add) >> shift, coef_min, coef_max); | 
|  | } | 
|  | src += 32; | 
|  | dst++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // ********************************** IDTX ********************************** | 
|  | void inv_txfm_idtx_size4_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 4; | 
|  | const int scale = 128; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | dst[j * line + i] = | 
|  | clamp((int)(src[i * tx1d_size + j] * scale + offset) >> shift, | 
|  | coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_idtx_size8_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 8; | 
|  | const int scale = 181; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | dst[j * line + i] = | 
|  | clamp((int)(src[i * tx1d_size + j] * scale + offset) >> shift, | 
|  | coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_idtx_size16_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 16; | 
|  | const int scale = 256; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | dst[j * line + i] = | 
|  | clamp((int)(src[i * tx1d_size + j] * scale + offset) >> shift, | 
|  | coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_idtx_size32_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 32; | 
|  | const int scale = 362; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | dst[j * line + i] = | 
|  | clamp((int)(src[i * tx1d_size + j] * scale + offset) >> shift, | 
|  | coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_adst_size4_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 4; | 
|  | const int *tx_mat = tx_kernel_adst_size4[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_fdst_size4_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 4; | 
|  | const int *tx_mat = tx_kernel_fdst_size4[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_adst_size8_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 8; | 
|  | const int *tx_mat = tx_kernel_adst_size8[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_fdst_size8_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 8; | 
|  | const int *tx_mat = tx_kernel_fdst_size8[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_adst_size16_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 16; | 
|  | const int *tx_mat = tx_kernel_adst_size16[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_fdst_size16_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 16; | 
|  | const int *tx_mat = tx_kernel_fdst_size16[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_ddtx_size4_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 4; | 
|  | const int *tx_mat = tx_kernel_ddtx_size4[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_ddtx_size8_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 8; | 
|  | const int *tx_mat = tx_kernel_ddtx_size8[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_ddtx_size16_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 16; | 
|  | const int *tx_mat = tx_kernel_ddtx_size16[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * tx_mat[k * tx1d_size + j]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_fddt_size4_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 4; | 
|  | const int *tx_mat = tx_kernel_ddtx_size4[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * | 
|  | tx_mat[k * tx1d_size + (tx1d_size - 1 - j)]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_fddt_size8_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 8; | 
|  | const int *tx_mat = tx_kernel_ddtx_size8[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * | 
|  | tx_mat[k * tx1d_size + (tx1d_size - 1 - j)]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_fddt_size16_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max) { | 
|  | (void)zero_line; | 
|  | const int offset = 1 << (shift - 1); | 
|  | const int nz_line = line - skip_line; | 
|  | const int tx1d_size = 16; | 
|  | const int *tx_mat = tx_kernel_ddtx_size16[INV_TXFM][0]; | 
|  |  | 
|  | for (int i = 0; i < nz_line; i++) { | 
|  | for (int j = 0; j < tx1d_size; j++) { | 
|  | int sum = 0; | 
|  | for (int k = 0; k < tx1d_size; k++) { | 
|  | sum += src[i * tx1d_size + k] * | 
|  | tx_mat[k * tx1d_size + (tx1d_size - 1 - j)]; | 
|  | } | 
|  | dst[j * line + i] = | 
|  | clamp((int)(sum + offset) >> shift, coef_min, coef_max); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_transform_1d_c(const int *src, int *dst, int shift, int line, | 
|  | int skip_line, int zero_line, const int coef_min, | 
|  | const int coef_max, const int tx_type_index, | 
|  | const int size_index) { | 
|  | switch (size_index) { | 
|  | case 0: | 
|  | switch (tx_type_index) { | 
|  | case 0: | 
|  | inv_txfm_dct2_size4_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 1: | 
|  | inv_txfm_idtx_size4_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 2: | 
|  | inv_txfm_adst_size4_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 3: | 
|  | inv_txfm_fdst_size4_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 4: | 
|  | inv_txfm_ddtx_size4_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 5: | 
|  | inv_txfm_fddt_size4_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | default: assert(0); break; | 
|  | } | 
|  | break; | 
|  | case 1: | 
|  | switch (tx_type_index) { | 
|  | case 0: | 
|  | inv_txfm_dct2_size8_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 1: | 
|  | inv_txfm_idtx_size8_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 2: | 
|  | inv_txfm_adst_size8_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 3: | 
|  | inv_txfm_fdst_size8_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 4: | 
|  | inv_txfm_ddtx_size8_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 5: | 
|  | inv_txfm_fddt_size8_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | default: assert(0); break; | 
|  | } | 
|  | break; | 
|  | case 2: | 
|  | switch (tx_type_index) { | 
|  | case 0: | 
|  | inv_txfm_dct2_size16_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 1: | 
|  | inv_txfm_idtx_size16_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 2: | 
|  | inv_txfm_adst_size16_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 3: | 
|  | inv_txfm_fdst_size16_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 4: | 
|  | inv_txfm_ddtx_size16_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 5: | 
|  | inv_txfm_fddt_size16_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | default: assert(0); break; | 
|  | } | 
|  | break; | 
|  | case 3: | 
|  | switch (tx_type_index) { | 
|  | case 0: | 
|  | inv_txfm_dct2_size32_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | case 1: | 
|  | inv_txfm_idtx_size32_c(src, dst, shift, line, skip_line, zero_line, | 
|  | coef_min, coef_max); | 
|  | break; | 
|  | default: assert(0); break; | 
|  | } | 
|  | break; | 
|  | default: assert(0); break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void inv_txfm_c(const tran_low_t *input, uint16_t *dest, int stride, | 
|  | const TxfmParam *txfm_param) { | 
|  | const TX_SIZE tx_size = txfm_param->tx_size; | 
|  | TX_TYPE tx_type = txfm_param->tx_type; | 
|  |  | 
|  | int width = AOMMIN(MAX_TX_SIZE >> 1, tx_size_wide[tx_size]); | 
|  | int height = AOMMIN(MAX_TX_SIZE >> 1, tx_size_high[tx_size]); | 
|  | const uint32_t tx_wide_index = | 
|  | AOMMIN(MAX_TX_SIZE_LOG2 - 1, tx_size_wide_log2[tx_size]) - 2; | 
|  | const uint32_t tx_high_index = | 
|  | AOMMIN(MAX_TX_SIZE_LOG2 - 1, tx_size_high_log2[tx_size]) - 2; | 
|  |  | 
|  | const int intermediate_bitdepth = txfm_param->bd + 8; | 
|  | const int rng_min = -(1 << (intermediate_bitdepth - 1)); | 
|  | const int rng_max = (1 << (intermediate_bitdepth - 1)) - 1; | 
|  |  | 
|  | const int col_rng_min = -(1 << txfm_param->bd); | 
|  | const int col_rng_max = (1 << txfm_param->bd) - 1; | 
|  |  | 
|  | if (txfm_param->lossless) { | 
|  | assert(tx_type == DCT_DCT); | 
|  | av1_highbd_iwht4x4_add(input, dest, stride, txfm_param->eob, | 
|  | txfm_param->bd); | 
|  | return; | 
|  | } | 
|  |  | 
|  | int tx_type_row = g_hor_tx_type[tx_type]; | 
|  | int tx_type_col = g_ver_tx_type[tx_type]; | 
|  |  | 
|  | if (txfm_param->use_ddt) { | 
|  | const int use_ddt_row = (width == 4 && REPLACE_ADST4) || | 
|  | (width == 8 && REPLACE_ADST8) || | 
|  | (width == 16 && REPLACE_ADST16); | 
|  | if (use_ddt_row && (tx_type_row == DST7 || tx_type_row == DCT8)) { | 
|  | tx_type_row = (tx_type_row == DST7) ? DDTX : FDDT; | 
|  | } | 
|  | const int use_ddt_col = (height == 4 && REPLACE_ADST4) || | 
|  | (height == 8 && REPLACE_ADST8) || | 
|  | (height == 16 && REPLACE_ADST16); | 
|  | if (use_ddt_col && (tx_type_col == DST7 || tx_type_col == DCT8)) { | 
|  | tx_type_col = (tx_type_col == DST7) ? DDTX : FDDT; | 
|  | } | 
|  | } | 
|  |  | 
|  | int skipWidth = width > 32 ? width - 32 : 0; | 
|  | int skipHeight = height > 32 ? height - 32 : 0; | 
|  |  | 
|  | int block[MAX_TX_SQUARE]; | 
|  | int tmp[MAX_TX_SQUARE]; | 
|  |  | 
|  | const int log2width = tx_size_wide_log2[tx_size]; | 
|  | const int log2height = tx_size_high_log2[tx_size]; | 
|  | const int sqrt2 = ((log2width + log2height) & 1) ? 1 : 0; | 
|  |  | 
|  | // This assert is required to silence the static analyzer warnings. | 
|  | assert(width * height > 0); | 
|  |  | 
|  | if (sqrt2) { | 
|  | for (int i = 0; i < AOMMIN(1024, width * height); i++) { | 
|  | tmp[i] = round_shift((int64_t)input[i] * NewInvSqrt2, NewSqrt2Bits); | 
|  | } | 
|  | } else { | 
|  | memcpy(tmp, input, AOMMIN(1024, width * height) * sizeof(tran_low_t)); | 
|  | } | 
|  |  | 
|  | memcpy(block, tmp, AOMMIN(1024, width * height) * sizeof(*tmp)); | 
|  |  | 
|  | clamp_buf(block, AOMMIN(1024, width * height), txfm_param->bd + 8); | 
|  |  | 
|  | if (skipWidth) { | 
|  | for (int y = 0; y < height; y++) { | 
|  | memcpy(block + y * width, tmp + y * 32, 32 * sizeof(*tmp)); | 
|  | } | 
|  | } | 
|  |  | 
|  | const int shift_1st = inv_tx_shift[tx_size][0]; | 
|  | const int shift_2nd = inv_tx_shift[tx_size][1]; | 
|  |  | 
|  | assert(shift_1st >= 0); | 
|  | assert(shift_2nd >= 0); | 
|  |  | 
|  | inv_transform_1d_c(block, tmp, shift_1st, height, skipHeight, skipWidth, | 
|  | rng_min, rng_max, tx_type_row, tx_wide_index); | 
|  |  | 
|  | inv_transform_1d_c(tmp, block, shift_2nd, width, 0, skipHeight, col_rng_min, | 
|  | col_rng_max, tx_type_col, tx_high_index); | 
|  |  | 
|  | if (width < tx_size_wide[tx_size]) { | 
|  | assert(width == 32); | 
|  | memcpy(tmp, block, width * height * sizeof(*block)); | 
|  | for (int y = 0; y < height; y++) { | 
|  | for (int x = 0; x < width; x++) { | 
|  | block[y * 2 * width + 2 * x] = tmp[y * width + x]; | 
|  | block[y * 2 * width + 2 * x + 1] = tmp[y * width + x]; | 
|  | } | 
|  | } | 
|  | width = tx_size_wide[tx_size]; | 
|  | } | 
|  | if (height < tx_size_high[tx_size]) { | 
|  | assert(height == 32); | 
|  | memcpy(tmp, block, width * height * sizeof(*block)); | 
|  | for (int y = 0; y < height; y++) { | 
|  | for (int x = 0; x < width; x++) { | 
|  | block[2 * y * width + x] = tmp[y * width + x]; | 
|  | block[(2 * y + 1) * width + x] = tmp[y * width + x]; | 
|  | } | 
|  | } | 
|  | height = tx_size_high[tx_size]; | 
|  | } | 
|  |  | 
|  | for (int y = 0; y < height; y++) { | 
|  | for (int x = 0; x < width; x++) { | 
|  | dest[y * stride + x] = highbd_clip_pixel_add( | 
|  | dest[y * stride + x], block[(y * width) + x], txfm_param->bd); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | int av1_get_tx_scale(const TX_SIZE tx_size) { | 
|  | const int pels = tx_size_2d[tx_size]; | 
|  | // Largest possible pels is 4096 (64x64). | 
|  | return (pels > 256) + (pels > 1024); | 
|  | } | 
|  |  | 
|  | // NOTE: The implementation of all inverses need to be aware of the fact | 
|  | // that input and output could be the same buffer. | 
|  |  | 
|  | void av1_lossless_inv_idtx_add_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, const TxfmParam *txfm_param) { | 
|  | const int txw = tx_size_wide[txfm_param->tx_size]; | 
|  | const int txh = tx_size_high[txfm_param->tx_size]; | 
|  | int scale_bits = 3 - av1_get_tx_scale(txfm_param->tx_size); | 
|  | for (int i = 0; i < txh; i++) { | 
|  | for (int j = 0; j < txw; j++) { | 
|  | dest[i * stride + j] = highbd_clip_pixel_add( | 
|  | dest[i * stride + j], (input[i * txw + j] >> scale_bits), | 
|  | txfm_param->bd); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void av1_lossless_inv_idtx_add_vert_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, const TxfmParam *txfm_param) { | 
|  | const int txw = tx_size_wide[txfm_param->tx_size]; | 
|  | const int txh = tx_size_high[txfm_param->tx_size]; | 
|  | int scale_bits = 3 - av1_get_tx_scale(txfm_param->tx_size); | 
|  | for (int j = 0; j < txw; j++) { | 
|  | int delay = 0; | 
|  | for (int i = 0; i < txh; i++) { | 
|  | tran_low_t current_txcoeff = (input[i * txw + j] >> scale_bits); | 
|  | dest[i * stride + j] = highbd_clip_pixel_add( | 
|  | dest[i * stride + j], current_txcoeff + delay, txfm_param->bd); | 
|  | delay += current_txcoeff; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void av1_lossless_inv_idtx_add_horz_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, const TxfmParam *txfm_param) { | 
|  | const int txw = tx_size_wide[txfm_param->tx_size]; | 
|  | const int txh = tx_size_high[txfm_param->tx_size]; | 
|  | int scale_bits = 3 - av1_get_tx_scale(txfm_param->tx_size); | 
|  | for (int i = 0; i < txh; i++) { | 
|  | int delay = 0; | 
|  | for (int j = 0; j < txw; j++) { | 
|  | tran_low_t current_txcoeff = (input[i * txw + j] >> scale_bits); | 
|  | dest[i * stride + j] = highbd_clip_pixel_add( | 
|  | dest[i * stride + j], current_txcoeff + delay, txfm_param->bd); | 
|  | delay += current_txcoeff; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // idct | 
|  | void av1_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride, | 
|  | int eob, int bd) { | 
|  | if (eob > 1) | 
|  | av1_highbd_iwht4x4_16_add(input, dest, stride, bd); | 
|  | else | 
|  | av1_highbd_iwht4x4_1_add(input, dest, stride, bd); | 
|  | } | 
|  |  | 
|  | // inverse hadamard transform for DPCM lossless vertical mode | 
|  | void av1_highbd_iwht4x4_vert_add(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, int eob, int bd) { | 
|  | if (eob > 1) | 
|  | av1_highbd_iwht4x4_16_vert_add(input, dest, stride, bd); | 
|  | else | 
|  | av1_highbd_iwht4x4_1_vert_add(input, dest, stride, bd); | 
|  | } | 
|  |  | 
|  | // inverse hadamard transform for DPCM lossless horizontal mode | 
|  | void av1_highbd_iwht4x4_horz_add(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, int eob, int bd) { | 
|  | if (eob > 1) | 
|  | av1_highbd_iwht4x4_16_horz_add(input, dest, stride, bd); | 
|  | else | 
|  | av1_highbd_iwht4x4_1_horz_add(input, dest, stride, bd); | 
|  | } | 
|  |  | 
|  | // inverse transform for 4x4 dpcm lossless vertical mode | 
|  | void av1_highbd_inv_txfm_add_4x4_vert_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, | 
|  | const TxfmParam *txfm_param) { | 
|  | assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); | 
|  | int eob = txfm_param->eob; | 
|  | int bd = txfm_param->bd; | 
|  | int lossless = txfm_param->lossless; | 
|  | const TX_TYPE tx_type = txfm_param->tx_type; | 
|  | if (lossless) { | 
|  | assert(tx_type == DCT_DCT); | 
|  | (void)tx_type; | 
|  | av1_highbd_iwht4x4_vert_add(input, dest, stride, eob, bd); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // inverse transform for 4x4 dpcm lossless horizontal mode | 
|  | void av1_highbd_inv_txfm_add_4x4_horz_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, | 
|  | const TxfmParam *txfm_param) { | 
|  | assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); | 
|  | int eob = txfm_param->eob; | 
|  | int bd = txfm_param->bd; | 
|  | int lossless = txfm_param->lossless; | 
|  | const TX_TYPE tx_type = txfm_param->tx_type; | 
|  | if (lossless) { | 
|  | assert(tx_type == DCT_DCT); | 
|  | (void)tx_type; | 
|  | av1_highbd_iwht4x4_horz_add(input, dest, stride, eob, bd); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, | 
|  | TX_TYPE tx_type, int eob, int reduced_tx_set, | 
|  | int use_ddt, TxfmParam *txfm_param) { | 
|  | (void)plane; | 
|  | MB_MODE_INFO *const mbmi = xd->mi[0]; | 
|  | txfm_param->tx_type = get_primary_tx_type(tx_type); | 
|  | txfm_param->sec_tx_set = 0; | 
|  | txfm_param->sec_tx_type = 0; | 
|  | txfm_param->intra_mode = get_intra_mode(mbmi, plane); | 
|  | txfm_param->is_inter = is_inter_block(xd->mi[0], xd->tree_type); | 
|  | const int width = tx_size_wide[tx_size]; | 
|  | const int height = tx_size_high[tx_size]; | 
|  | bool mode_dependent_condition = | 
|  | (txfm_param->is_inter | 
|  | ? (txfm_param->tx_type == DCT_DCT && width >= 16 && height >= 16) | 
|  | : txfm_param->intra_mode < PAETH_PRED); | 
|  | if (mode_dependent_condition && !xd->lossless[mbmi->segment_id]) { | 
|  | // updated EOB condition | 
|  | txfm_param->sec_tx_type = get_secondary_tx_type(tx_type); | 
|  | txfm_param->sec_tx_set = get_secondary_tx_set(tx_type); | 
|  | } | 
|  | txfm_param->tx_size = tx_size; | 
|  | // EOB needs to adjusted after inverse IST | 
|  | if (txfm_param->sec_tx_type) { | 
|  | const int st_size_class = | 
|  | (width == 8 && height == 8 && txfm_param->tx_type == DCT_DCT) ? 1 | 
|  | : (width >= 8 && height >= 8) ? (txfm_param->tx_type == DCT_DCT ? 2 : 3) | 
|  | : 0; | 
|  | txfm_param->eob = | 
|  | (st_size_class == 0) ? IST_4x4_HEIGHT | 
|  | : (st_size_class == 1) | 
|  | ? IST_8x8_HEIGHT_RED | 
|  | : ((st_size_class == 3) ? IST_ADST_NZ_CNT : IST_8x8_HEIGHT); | 
|  | } else { | 
|  | txfm_param->eob = eob; | 
|  | } | 
|  | txfm_param->use_ddt = use_ddt; | 
|  | txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id]; | 
|  | txfm_param->bd = xd->bd; | 
|  | txfm_param->tx_set_type = av1_get_ext_tx_set_type( | 
|  | txfm_param->tx_size, is_inter_block(xd->mi[0], xd->tree_type), | 
|  | reduced_tx_set); | 
|  | } | 
|  |  | 
|  | void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, const TxfmParam *txfm_param) { | 
|  | assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); | 
|  | inv_txfm(input, dest, stride, txfm_param); | 
|  | } | 
|  |  | 
|  | // inverse transform for dpcm lossless horizontal mode | 
|  | void av1_highbd_inv_txfm_add_horz_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, const TxfmParam *txfm_param) { | 
|  | assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); | 
|  | const TX_SIZE tx_size = txfm_param->tx_size; | 
|  | assert(txfm_param->lossless); | 
|  | if (txfm_param->tx_type == IDTX) { | 
|  | av1_lossless_inv_idtx_add_horz(input, dest, stride, txfm_param); | 
|  | return; | 
|  | } | 
|  | switch (tx_size) { | 
|  | case TX_4X4: | 
|  | av1_highbd_inv_txfm_add_4x4_horz_c(input, dest, stride, txfm_param); | 
|  | break; | 
|  | default: assert(0 && "Invalid transform size for lossless coding"); break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // inverse transform for dpcm lossless vertical mode | 
|  | void av1_highbd_inv_txfm_add_vert_c(const tran_low_t *input, uint16_t *dest, | 
|  | int stride, const TxfmParam *txfm_param) { | 
|  | assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); | 
|  | const TX_SIZE tx_size = txfm_param->tx_size; | 
|  | assert(txfm_param->lossless); | 
|  | if (txfm_param->tx_type == IDTX) { | 
|  | av1_lossless_inv_idtx_add_vert(input, dest, stride, txfm_param); | 
|  | return; | 
|  | } | 
|  | switch (tx_size) { | 
|  | case TX_4X4: | 
|  | av1_highbd_inv_txfm_add_4x4_vert_c(input, dest, stride, txfm_param); | 
|  | break; | 
|  | default: assert(0 && "Invalid transform size for lossless coding"); break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Apply inverse cross chroma component transform | 
|  | void av1_inv_cross_chroma_tx_block(tran_low_t *dqcoeff_c1, | 
|  | tran_low_t *dqcoeff_c2, TX_SIZE tx_size, | 
|  | CctxType cctx_type, const int bd) { | 
|  | if (cctx_type == CCTX_NONE) return; | 
|  | const int ncoeffs = av1_get_max_eob(tx_size); | 
|  | int32_t *src_c1 = (int32_t *)dqcoeff_c1; | 
|  | int32_t *src_c2 = (int32_t *)dqcoeff_c2; | 
|  | int64_t tmp[2] = { 0, 0 }; | 
|  |  | 
|  | const int angle_idx = cctx_type - CCTX_START; | 
|  | for (int i = 0; i < ncoeffs; i++) { | 
|  | tmp[0] = (int64_t)cctx_mtx[angle_idx][0] * (int64_t)src_c1[i] - | 
|  | (int64_t)cctx_mtx[angle_idx][1] * (int64_t)src_c2[i]; | 
|  | tmp[1] = (int64_t)cctx_mtx[angle_idx][1] * (int64_t)src_c1[i] + | 
|  | (int64_t)cctx_mtx[angle_idx][0] * (int64_t)src_c2[i]; | 
|  | src_c1[i] = (int32_t)ROUND_POWER_OF_TWO_SIGNED_64(tmp[0], CCTX_PREC_BITS); | 
|  | src_c2[i] = (int32_t)ROUND_POWER_OF_TWO_SIGNED_64(tmp[1], CCTX_PREC_BITS); | 
|  | src_c1[i] = clamp_value(src_c1[i], 8 + bd); | 
|  | src_c2[i] = clamp_value(src_c2[i], 8 + bd); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void av1_highbd_inv_txfm_add_master(const tran_low_t *input, | 
|  | uint16_t *dest, int stride, | 
|  | const TxfmParam *txfm_param) { | 
|  | if (txfm_param->lossless) { | 
|  | if (txfm_param->tx_type == IDTX) { | 
|  | av1_lossless_inv_idtx_add(input, dest, stride, txfm_param); | 
|  | return; | 
|  | } | 
|  | } | 
|  | av1_highbd_inv_txfm_add(input, dest, stride, txfm_param); | 
|  | } | 
|  |  | 
|  | void av1_inverse_transform_block(const MACROBLOCKD *xd, | 
|  | const tran_low_t *dqcoeff, int plane, | 
|  | TX_TYPE tx_type, TX_SIZE tx_size, | 
|  | uint16_t *dst, int stride, int eob, | 
|  | int use_ddt, int reduced_tx_set) { | 
|  | if (!eob) return; | 
|  |  | 
|  | assert(eob <= av1_get_max_eob(tx_size)); | 
|  |  | 
|  | TxfmParam txfm_param; | 
|  | init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set, use_ddt, | 
|  | &txfm_param); | 
|  | assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]); | 
|  | assert(IMPLIES(txfm_param.sec_tx_type, | 
|  | block_signals_sec_tx_type(xd, tx_size, txfm_param.tx_type, | 
|  | txfm_param.eob))); | 
|  |  | 
|  | // Work buffer for secondary transform | 
|  | DECLARE_ALIGNED(32, tran_low_t, temp_dqcoeff[MAX_TX_SQUARE]); | 
|  | memcpy(temp_dqcoeff, dqcoeff, sizeof(tran_low_t) * tx_size_2d[tx_size]); | 
|  |  | 
|  | av1_inv_stxfm(temp_dqcoeff, &txfm_param); | 
|  |  | 
|  | MB_MODE_INFO *const mbmi = xd->mi[0]; | 
|  | if (xd->lossless[mbmi->segment_id]) { | 
|  | PREDICTION_MODE cur_pred_mode = | 
|  | (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode); | 
|  | int cur_dpcm_flag = | 
|  | (plane == AOM_PLANE_Y) ? mbmi->use_dpcm_y : mbmi->use_dpcm_uv; | 
|  | int cur_angle_delta = (plane == AOM_PLANE_Y) ? mbmi->angle_delta[0] : 0; | 
|  | if (cur_pred_mode == V_PRED && cur_angle_delta == 0 && cur_dpcm_flag > 0) { | 
|  | av1_highbd_inv_txfm_add_vert(temp_dqcoeff, dst, stride, &txfm_param); | 
|  | } else if (cur_pred_mode == H_PRED && cur_angle_delta == 0 && | 
|  | cur_dpcm_flag > 0) { | 
|  | av1_highbd_inv_txfm_add_horz(temp_dqcoeff, dst, stride, &txfm_param); | 
|  | } else { | 
|  | av1_highbd_inv_txfm_add_master(temp_dqcoeff, dst, stride, &txfm_param); | 
|  | } | 
|  | } else { | 
|  | av1_highbd_inv_txfm_add_master(temp_dqcoeff, dst, stride, &txfm_param); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Inverse secondary transform | 
|  | void inv_stxfm_c(tran_low_t *src, tran_low_t *dst, const PREDICTION_MODE mode, | 
|  | const uint8_t stx_idx, const int size, const int bd) { | 
|  | assert(stx_idx < 4); | 
|  | const int16_t *kernel = (size == 0) ? ist_4x4_kernel[mode][stx_idx][0] | 
|  | : ist_8x8_kernel[mode][stx_idx][0]; | 
|  | int *out = dst; | 
|  | const int shift = 7; | 
|  |  | 
|  | int reduced_width, reduced_height; | 
|  | if (size == 0) { | 
|  | reduced_height = IST_4x4_HEIGHT; | 
|  | reduced_width = IST_4x4_WIDTH; | 
|  | } else { | 
|  | reduced_height = (size == 1) | 
|  | ? IST_8x8_HEIGHT_RED | 
|  | : ((size == 3) ? IST_ADST_NZ_CNT : IST_8x8_HEIGHT); | 
|  | reduced_width = IST_8x8_WIDTH; | 
|  | } | 
|  | for (int j = 0; j < reduced_width; j++) { | 
|  | int32_t resi = 0; | 
|  | const int16_t *kernel_tmp = kernel; | 
|  | int *srcPtr = src; | 
|  | for (int i = 0; i < reduced_height; i++) { | 
|  | resi += *srcPtr++ * *kernel_tmp; | 
|  | kernel_tmp += reduced_width; | 
|  | } | 
|  | *out++ = clamp_value(ROUND_POWER_OF_TWO_SIGNED(resi, shift), 8 + bd); | 
|  | kernel++; | 
|  | } | 
|  | } | 
|  |  | 
|  | void av1_inv_stxfm(tran_low_t *coeff, TxfmParam *txfm_param) { | 
|  | const TX_TYPE stx_type = txfm_param->sec_tx_type; | 
|  |  | 
|  | const int width = tx_size_wide[txfm_param->tx_size] <= 32 | 
|  | ? tx_size_wide[txfm_param->tx_size] | 
|  | : 32; | 
|  | const int height = tx_size_high[txfm_param->tx_size] <= 32 | 
|  | ? tx_size_high[txfm_param->tx_size] | 
|  | : 32; | 
|  |  | 
|  | if ((width >= 4 && height >= 4) && stx_type) { | 
|  | const PREDICTION_MODE intra_mode = | 
|  | (txfm_param->is_inter ? DC_PRED : txfm_param->intra_mode); | 
|  | PREDICTION_MODE mode = 0, mode_t = 0; | 
|  | const int log2width = tx_size_wide_log2[txfm_param->tx_size]; | 
|  |  | 
|  | const int sb_size = (width >= 8 && height >= 8) ? 8 : 4; | 
|  | const int16_t *scan_order_out; | 
|  | // Align scan order of IST with primary transform scan order | 
|  | const SCAN_ORDER *scan_order_in = | 
|  | get_scan(txfm_param->tx_size, txfm_param->tx_type); | 
|  | const int16_t *const scan = scan_order_in->scan; | 
|  | tran_low_t buf0[64] = { 0 }, buf1[64] = { 0 }; | 
|  | tran_low_t *tmp = buf0; | 
|  | tran_low_t *src = coeff; | 
|  |  | 
|  | int reduced_width = sb_size == 8 ? IST_8x8_WIDTH : IST_4x4_WIDTH; | 
|  | for (int r = 0; r < reduced_width; r++) { | 
|  | // Align scan order of IST with primary transform scan order | 
|  | *tmp = src[scan[r]]; | 
|  | tmp++; | 
|  | } | 
|  | int8_t transpose = 0; | 
|  | mode = AOMMIN(intra_mode, SMOOTH_H_PRED); | 
|  | if ((mode == H_PRED) || (mode == D157_PRED) || (mode == D67_PRED) || | 
|  | (mode == SMOOTH_H_PRED)) | 
|  | transpose = 1; | 
|  | #if STX_COEFF_DEBUG | 
|  | fprintf(stderr, | 
|  | "[inv stx] inter %d ptx %d txs %dx%d tp %d stx_set %d stx_type %d\n" | 
|  | "(stx coeff)\n", | 
|  | txfm_param->is_inter, get_primary_tx_type(txfm_param->tx_type), | 
|  | width, height, transpose, txfm_param->sec_tx_set, stx_type); | 
|  | for (int i = 0; i < height; i++) { | 
|  | for (int j = 0; j < width; j++) { | 
|  | fprintf(stderr, "%d,", coeff[i * width + j]); | 
|  | } | 
|  | fprintf(stderr, "\n"); | 
|  | } | 
|  | #endif  // STX_COEFF_DEBUG | 
|  | mode_t = txfm_param->sec_tx_set; | 
|  | assert(mode_t < IST_SET_SIZE); | 
|  | if (transpose) { | 
|  | scan_order_out = (sb_size == 4) | 
|  | ? stx_scan_orders_transpose_4x4[log2width - 2] | 
|  | : stx_scan_orders_transpose_8x8[log2width - 2]; | 
|  | } else { | 
|  | scan_order_out = (sb_size == 4) ? stx_scan_orders_4x4[log2width - 2] | 
|  | : stx_scan_orders_8x8[log2width - 2]; | 
|  | } | 
|  | const int st_size_class = | 
|  | (width == 8 && height == 8 && txfm_param->tx_type == DCT_DCT) ? 1 | 
|  | : (width >= 8 && height >= 8) ? (txfm_param->tx_type == DCT_DCT ? 2 : 3) | 
|  | : 0; | 
|  | inv_stxfm(buf0, buf1, mode_t, stx_type - 1, st_size_class, txfm_param->bd); | 
|  | tmp = buf1; | 
|  | src = coeff; | 
|  | memset(src, 0, width * height * sizeof(tran_low_t)); | 
|  | const int16_t *sup_reg_mapping = | 
|  | &coeff8x8_mapping[txfm_param->sec_tx_set * 3 + stx_type - 1][0]; | 
|  | for (int r = 0; r < reduced_width; r++) { | 
|  | if (sb_size == 8) | 
|  | src[scan_order_out[sup_reg_mapping[r]]] = *tmp; | 
|  | else | 
|  | src[scan_order_out[r]] = *tmp; | 
|  | tmp++; | 
|  | } | 
|  | #if STX_COEFF_DEBUG | 
|  | fprintf(stderr, "(ptx coeff)\n"); | 
|  | for (int i = 0; i < height; i++) { | 
|  | for (int j = 0; j < width; j++) { | 
|  | fprintf(stderr, "%d,", coeff[i * width + j]); | 
|  | } | 
|  | fprintf(stderr, "\n"); | 
|  | } | 
|  | #endif  // STX_COEFF_DEBUG | 
|  | } | 
|  | } |