| /* |
| * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include <math.h> |
| |
| #include "./aom_dsp_rtcd.h" |
| #include "./av1_rtcd.h" |
| #include "aom_dsp/inv_txfm.h" |
| #include "aom_ports/mem.h" |
| #include "av1/common/av1_inv_txfm1d_cfg.h" |
| #include "av1/common/blockd.h" |
| #include "av1/common/enums.h" |
| #include "av1/common/idct.h" |
| #if CONFIG_DAALA_TX4 || CONFIG_DAALA_TX8 || CONFIG_DAALA_TX16 || \ |
| CONFIG_DAALA_TX32 || CONFIG_DAALA_TX64 |
| #include "av1/common/daala_tx.h" |
| #endif |
| |
| int av1_get_tx_scale(const TX_SIZE tx_size) { |
| const int pels = tx_size_2d[tx_size]; |
| return (pels > 256) + (pels > 1024) + (pels > 4096); |
| } |
| |
| // NOTE: The implementation of all inverses need to be aware of the fact |
| // that input and output could be the same buffer. |
| |
| #if CONFIG_EXT_TX |
| static void iidtx4_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| for (i = 0; i < 4; ++i) { |
| output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2); |
| } |
| } |
| |
| static void iidtx8_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| for (i = 0; i < 8; ++i) { |
| output[i] = input[i] * 2; |
| } |
| } |
| |
| static void iidtx16_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| for (i = 0; i < 16; ++i) { |
| output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2); |
| } |
| } |
| |
| static void iidtx32_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| for (i = 0; i < 32; ++i) { |
| output[i] = input[i] * 4; |
| } |
| } |
| |
| #if CONFIG_TX64X64 && !CONFIG_DAALA_TX64 |
| static void iidtx64_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| for (i = 0; i < 64; ++i) { |
| output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2); |
| } |
| } |
| #endif // CONFIG_TX64X64 |
| #endif // CONFIG_EXT_TX |
| |
| // For use in lieu of ADST |
| static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| tran_low_t inputhalf[16]; |
| // Multiply input by sqrt(2) |
| for (i = 0; i < 16; ++i) { |
| inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2); |
| } |
| for (i = 0; i < 16; ++i) { |
| output[i] = input[16 + i] * 4; |
| } |
| aom_idct16_c(inputhalf, output + 16); |
| // Note overall scaling factor is 4 times orthogonal |
| } |
| |
| #if CONFIG_TX64X64 && !CONFIG_DAALA_TX64 |
| static void idct64_col_c(const tran_low_t *input, tran_low_t *output) { |
| int32_t in[64], out[64]; |
| int i; |
| for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i]; |
| av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64); |
| for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i]; |
| } |
| |
| static void idct64_row_c(const tran_low_t *input, tran_low_t *output) { |
| int32_t in[64], out[64]; |
| int i; |
| for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i]; |
| av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64); |
| for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i]; |
| } |
| |
| // For use in lieu of ADST |
| static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) { |
| int i; |
| tran_low_t inputhalf[32]; |
| // Multiply input by sqrt(2) |
| for (i = 0; i < 32; ++i) { |
| inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2); |
| } |
| for (i = 0; i < 32; ++i) { |
| output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2); |
| } |
| aom_idct32_c(inputhalf, output + 32); |
| // Note overall scaling factor is 4 * sqrt(2) times orthogonal |
| } |
| #endif // CONFIG_TX64X64 |
| |
| // Inverse identity transform and add. |
| #if CONFIG_EXT_TX |
| static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| int bsx, int bsy, TX_TYPE tx_type) { |
| int r, c; |
| const int pels = bsx * bsy; |
| const int shift = 3 - ((pels > 256) + (pels > 1024)); |
| if (tx_type == IDTX) { |
| for (r = 0; r < bsy; ++r) { |
| for (c = 0; c < bsx; ++c) |
| dest[c] = clip_pixel_add(dest[c], input[c] >> shift); |
| dest += stride; |
| input += bsx; |
| } |
| } |
| } |
| #endif // CONFIG_EXT_TX |
| |
| #define FLIPUD_PTR(dest, stride, size) \ |
| do { \ |
| (dest) = (dest) + ((size)-1) * (stride); \ |
| (stride) = -(stride); \ |
| } while (0) |
| |
| #if CONFIG_EXT_TX |
| static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src, |
| int *sstride, TX_TYPE tx_type, int sizey, |
| int sizex) { |
| // Note that the transpose of src will be added to dst. In order to LR |
| // flip the addends (in dst coordinates), we UD flip the src. To UD flip |
| // the addends, we UD flip the dst. |
| switch (tx_type) { |
| case DCT_DCT: |
| case ADST_DCT: |
| case DCT_ADST: |
| case ADST_ADST: |
| case IDTX: |
| case V_DCT: |
| case H_DCT: |
| case V_ADST: |
| case H_ADST: break; |
| case FLIPADST_DCT: |
| case FLIPADST_ADST: |
| case V_FLIPADST: |
| // flip UD |
| FLIPUD_PTR(*dst, *dstride, sizey); |
| break; |
| case DCT_FLIPADST: |
| case ADST_FLIPADST: |
| case H_FLIPADST: |
| // flip LR |
| FLIPUD_PTR(*src, *sstride, sizex); |
| break; |
| case FLIPADST_FLIPADST: |
| // flip UD |
| FLIPUD_PTR(*dst, *dstride, sizey); |
| // flip LR |
| FLIPUD_PTR(*src, *sstride, sizex); |
| break; |
| default: assert(0); break; |
| } |
| } |
| #endif // CONFIG_EXT_TX |
| |
| #if CONFIG_HIGHBITDEPTH |
| #if CONFIG_EXT_TX && CONFIG_TX64X64 |
| static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8, |
| int stride, int bsx, int bsy, TX_TYPE tx_type, |
| int bd) { |
| int r, c; |
| const int pels = bsx * bsy; |
| const int shift = 3 - ((pels > 256) + (pels > 1024)); |
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| |
| if (tx_type == IDTX) { |
| for (r = 0; r < bsy; ++r) { |
| for (c = 0; c < bsx; ++c) |
| dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd); |
| dest += stride; |
| input += bsx; |
| } |
| } |
| } |
| #endif // CONFIG_EXT_TX && CONFIG_TX64X64 |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| #if CONFIG_LGT || CONFIG_LGT_FROM_PRED |
| void ilgt4(const tran_low_t *input, tran_low_t *output, |
| const tran_high_t *lgtmtx) { |
| if (!lgtmtx) assert(0); |
| #if CONFIG_LGT_FROM_PRED |
| // For DCT/ADST, use butterfly implementations |
| if (lgtmtx[0] == DCT4) { |
| aom_idct4_c(input, output); |
| return; |
| } else if (lgtmtx[0] == ADST4) { |
| aom_iadst4_c(input, output); |
| return; |
| } |
| #endif // CONFIG_LGT_FROM_PRED |
| |
| // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,4 |
| tran_high_t s[4] = { 0 }; |
| for (int i = 0; i < 4; ++i) |
| for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i]; |
| |
| for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i])); |
| } |
| |
| void ilgt8(const tran_low_t *input, tran_low_t *output, |
| const tran_high_t *lgtmtx) { |
| if (!lgtmtx) assert(0); |
| #if CONFIG_LGT_FROM_PRED |
| // For DCT/ADST, use butterfly implementations |
| if (lgtmtx[0] == DCT8) { |
| aom_idct8_c(input, output); |
| return; |
| } else if (lgtmtx[0] == ADST8) { |
| aom_iadst8_c(input, output); |
| return; |
| } |
| #endif // CONFIG_LGT_FROM_PRED |
| |
| // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,8 |
| tran_high_t s[8] = { 0 }; |
| for (int i = 0; i < 8; ++i) |
| for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i]; |
| |
| for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i])); |
| } |
| #endif // CONFIG_LGT || CONFIG_LGT_FROM_PRED |
| |
| #if CONFIG_LGT |
| // get_lgt4 and get_lgt8 return 1 and pick a lgt matrix if LGT is chosen to |
| // apply. Otherwise they return 0 |
| int get_lgt4(const TxfmParam *txfm_param, int is_col, |
| const tran_high_t **lgtmtx) { |
| if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D || |
| vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) { |
| lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0]; |
| return 1; |
| } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D || |
| htx_tab[txfm_param->tx_type] == FLIPADST_1D)) { |
| lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0]; |
| return 1; |
| } |
| lgtmtx[0] = NULL; |
| return 0; |
| } |
| |
| int get_lgt8(const TxfmParam *txfm_param, int is_col, |
| const tran_high_t **lgtmtx) { |
| if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D || |
| vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) { |
| lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0]; |
| return 1; |
| } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D || |
| htx_tab[txfm_param->tx_type] == FLIPADST_1D)) { |
| lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0]; |
| return 1; |
| } |
| lgtmtx[0] = NULL; |
| return 0; |
| } |
| #endif // CONFIG_LGT |
| |
| #if CONFIG_LGT_FROM_PRED |
| void ilgt16up(const tran_low_t *input, tran_low_t *output, |
| const tran_high_t *lgtmtx) { |
| if (lgtmtx[0] == DCT16) { |
| aom_idct16_c(input, output); |
| return; |
| } else if (lgtmtx[0] == ADST16) { |
| aom_iadst16_c(input, output); |
| return; |
| } else if (lgtmtx[0] == DCT32) { |
| aom_idct32_c(input, output); |
| return; |
| } else if (lgtmtx[0] == ADST32) { |
| ihalfright32_c(input, output); |
| return; |
| } else { |
| assert(0); |
| } |
| } |
| |
| void get_discontinuity_1d(uint8_t *arr, int n, int *idx_max_diff) { |
| *idx_max_diff = -1; |
| |
| int temp = 0, max_diff = 0, min_diff = INT_MAX; |
| for (int i = 1; i < n; ++i) { |
| temp = abs(arr[i] - arr[i - 1]); |
| if (temp > max_diff) { |
| max_diff = temp; |
| *idx_max_diff = i; |
| } |
| if (temp < min_diff) min_diff = temp; |
| } |
| } |
| |
| void get_discontinuity_2d(uint8_t *dst, int stride, int n, int is_col, |
| int *idx_max_diff, int ntx) { |
| *idx_max_diff = -1; |
| |
| int diff = 0, temp = 0, max_diff = 0, min_diff = INT_MAX; |
| for (int i = 1; i < n; ++i) { |
| temp = 0; |
| for (int j = 0; j < ntx; ++j) { |
| if (is_col) // vertical diff |
| diff = dst[i * stride + j] - dst[(i - 1) * stride + j]; |
| else // horizontal diff |
| diff = dst[j * stride + i] - dst[j * stride + i - 1]; |
| temp += diff * diff; |
| } |
| // temp/w is the i-th avg square diff |
| if (temp > max_diff) { |
| max_diff = temp; |
| *idx_max_diff = i; |
| } |
| if (temp < min_diff) min_diff = temp; |
| } |
| } |
| |
| int idx_selfloop_wrt_mode(PREDICTION_MODE mode, int is_col) { |
| // 0: no self-loop |
| // 1: small self-loop |
| // 2: medium self-loop |
| // 3: large self-loop |
| switch (mode) { |
| case DC_PRED: |
| case SMOOTH_PRED: |
| // predition is good for both directions: large SLs for row and col |
| return 3; |
| case PAETH_PRED: return 0; |
| #if CONFIG_SMOOTH_HV |
| case SMOOTH_H_PRED: |
| #endif |
| case H_PRED: |
| // prediction is good for H direction: large SL for row only |
| return is_col ? 0 : 3; |
| #if CONFIG_SMOOTH_HV |
| case SMOOTH_V_PRED: |
| #endif |
| case V_PRED: |
| // prediction is good for V direction: large SL for col only |
| return is_col ? 3 : 0; |
| #if LGT_SL_INTRA |
| // directional mode: choose SL based on the direction |
| case D45_PRED: return is_col ? 2 : 0; |
| case D63_PRED: return is_col ? 3 : 0; |
| case D117_PRED: return is_col ? 3 : 1; |
| case D135_PRED: return 2; |
| case D153_PRED: return is_col ? 1 : 3; |
| case D207_PRED: return is_col ? 0 : 3; |
| #else |
| case D45_PRED: |
| case D63_PRED: |
| case D117_PRED: return is_col ? 3 : 0; |
| case D135_PRED: |
| case D153_PRED: |
| case D207_PRED: return is_col ? 0 : 3; |
| #endif |
| // inter: no SL |
| default: return 0; |
| } |
| } |
| |
| void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col, |
| const tran_high_t **lgtmtx, int ntx) { |
| PREDICTION_MODE mode = txfm_param->mode; |
| int stride = txfm_param->stride; |
| uint8_t *dst = txfm_param->dst; |
| int bp = -1; |
| uint8_t arr[4]; |
| |
| // Each lgt4mtx_arr[k][i] corresponds to a line graph with a self-loop on |
| // the first node, and possibly a weak edge within the line graph. i is |
| // the index of the weak edge (between the i-th and (i+1)-th pixels, i=0 |
| // means no weak edge). k corresponds to the first self-loop's weight |
| const tran_high_t *lgt4mtx_arr[4][4] = { |
| { &lgt4_000[0][0], &lgt4_000w1[0][0], &lgt4_000w2[0][0], |
| &lgt4_000w3[0][0] }, |
| { &lgt4_060[0][0], &lgt4_060_000w1[0][0], &lgt4_060_000w2[0][0], |
| &lgt4_060_000w3[0][0] }, |
| { &lgt4_100[0][0], &lgt4_100_000w1[0][0], &lgt4_100_000w2[0][0], |
| &lgt4_100_000w3[0][0] }, |
| { &lgt4_150[0][0], &lgt4_150_000w1[0][0], &lgt4_150_000w2[0][0], |
| &lgt4_150_000w3[0][0] }, |
| }; |
| |
| // initialize to DCT or some LGTs, and then change later if necessary |
| int idx_sl = idx_selfloop_wrt_mode(mode, is_col); |
| lgtmtx[0] = lgt4mtx_arr[idx_sl][0]; |
| |
| // find the break point and replace the line graph by the one with a |
| // break point |
| if (mode == DC_PRED || mode == SMOOTH_PRED) { |
| // Do not use break point, since 1) is_left_available and is_top_available |
| // in DC_PRED are not known by txfm_param for now, so accessing |
| // both boundaries anyway may cause a mismatch 2) DC prediciton |
| // typically yields very smooth residues so having the break point |
| // does not usually improve the RD result. |
| return; |
| } else if (mode == PAETH_PRED) { |
| // PAETH_PRED: use both 1D top boundary and 1D left boundary |
| if (is_col) |
| for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride]; |
| else |
| for (int i = 0; i < 4; ++i) arr[i] = dst[i]; |
| get_discontinuity_1d(&arr[0], 4, &bp); |
| } else if (mode == V_PRED) { |
| // V_PRED: use 1D top boundary only |
| if (is_col) return; |
| for (int i = 0; i < 4; ++i) arr[i] = dst[i]; |
| get_discontinuity_1d(&arr[0], 4, &bp); |
| } else if (mode == H_PRED) { |
| // H_PRED: use 1D left boundary only |
| if (!is_col) return; |
| for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride]; |
| get_discontinuity_1d(&arr[0], 4, &bp); |
| #if CONFIG_SMOOTH_HV |
| } else if (mode == SMOOTH_V_PRED) { |
| if (is_col) return; |
| for (int i = 0; i < 4; ++i) arr[i] = dst[-stride + i]; |
| get_discontinuity_1d(&arr[0], 4, &bp); |
| } else if (mode == SMOOTH_H_PRED) { |
| if (!is_col) return; |
| for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride - 1]; |
| get_discontinuity_1d(&arr[0], 4, &bp); |
| #endif |
| } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) { |
| // directional modes closer to vertical (maybe include D135 later) |
| if (!is_col) get_discontinuity_2d(dst, stride, 4, 0, &bp, ntx); |
| } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) { |
| // directional modes closer to horizontal |
| if (is_col) get_discontinuity_2d(dst, stride, 4, 1, &bp, ntx); |
| } else if (mode > PAETH_PRED) { |
| // inter |
| get_discontinuity_2d(dst, stride, 4, is_col, &bp, ntx); |
| } |
| |
| #if LGT_SL_INTRA |
| if (bp != -1) lgtmtx[0] = lgt4mtx_arr[idx_sl][bp]; |
| #else |
| if (bp != -1) lgtmtx[0] = lgt4mtx_arr[0][bp]; |
| #endif |
| } |
| |
| void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col, |
| const tran_high_t **lgtmtx, int ntx) { |
| PREDICTION_MODE mode = txfm_param->mode; |
| int stride = txfm_param->stride; |
| uint8_t *dst = txfm_param->dst; |
| int bp = -1; |
| uint8_t arr[8]; |
| |
| const tran_high_t *lgt8mtx_arr[4][8] = { |
| { &lgt8_000[0][0], &lgt8_000w1[0][0], &lgt8_000w2[0][0], &lgt8_000w3[0][0], |
| &lgt8_000w4[0][0], &lgt8_000w5[0][0], &lgt8_000w6[0][0], |
| &lgt8_000w7[0][0] }, |
| { &lgt8_060[0][0], &lgt8_060_000w1[0][0], &lgt8_060_000w2[0][0], |
| &lgt8_060_000w3[0][0], &lgt8_060_000w4[0][0], &lgt8_060_000w5[0][0], |
| &lgt8_060_000w6[0][0], &lgt8_060_000w7[0][0] }, |
| { &lgt8_100[0][0], &lgt8_100_000w1[0][0], &lgt8_100_000w2[0][0], |
| &lgt8_100_000w3[0][0], &lgt8_100_000w4[0][0], &lgt8_100_000w5[0][0], |
| &lgt8_100_000w6[0][0], &lgt8_100_000w7[0][0] }, |
| { &lgt8_150[0][0], &lgt8_150_000w1[0][0], &lgt8_150_000w2[0][0], |
| &lgt8_150_000w3[0][0], &lgt8_150_000w4[0][0], &lgt8_150_000w5[0][0], |
| &lgt8_150_000w6[0][0], &lgt8_150_000w7[0][0] }, |
| }; |
| |
| int idx_sl = idx_selfloop_wrt_mode(mode, is_col); |
| lgtmtx[0] = lgt8mtx_arr[idx_sl][0]; |
| |
| if (mode == DC_PRED || mode == SMOOTH_PRED) { |
| return; |
| } else if (mode == PAETH_PRED) { |
| if (is_col) |
| for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride]; |
| else |
| for (int i = 0; i < 8; ++i) arr[i] = dst[i]; |
| get_discontinuity_1d(&arr[0], 8, &bp); |
| } else if (mode == V_PRED) { |
| if (is_col) return; |
| for (int i = 0; i < 8; ++i) arr[i] = dst[i]; |
| get_discontinuity_1d(&arr[0], 8, &bp); |
| } else if (mode == H_PRED) { |
| if (!is_col) return; |
| for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride]; |
| get_discontinuity_1d(&arr[0], 8, &bp); |
| #if CONFIG_SMOOTH_HV |
| } else if (mode == SMOOTH_V_PRED) { |
| if (is_col) return; |
| for (int i = 0; i < 8; ++i) arr[i] = dst[-stride + i]; |
| get_discontinuity_1d(&arr[0], 8, &bp); |
| } else if (mode == SMOOTH_H_PRED) { |
| if (!is_col) return; |
| for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride - 1]; |
| get_discontinuity_1d(&arr[0], 8, &bp); |
| #endif |
| } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) { |
| if (!is_col) get_discontinuity_2d(dst, stride, 8, 0, &bp, ntx); |
| } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) { |
| if (is_col) get_discontinuity_2d(dst, stride, 8, 1, &bp, ntx); |
| } else if (mode > PAETH_PRED) { |
| get_discontinuity_2d(dst, stride, 8, is_col, &bp, ntx); |
| } |
| |
| #if LGT_SL_INTRA |
| if (bp != -1) lgtmtx[0] = lgt8mtx_arr[idx_sl][bp]; |
| #else |
| if (bp != -1) lgtmtx[0] = lgt8mtx_arr[0][bp]; |
| #endif |
| } |
| |
| // Since LGTs with length >8 are not implemented now, the following function |
| // will just call DCT or ADST |
| void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col, |
| const tran_high_t **lgtmtx, int ntx) { |
| int tx_length = is_col ? tx_size_high[txfm_param->tx_size] |
| : tx_size_wide[txfm_param->tx_size]; |
| assert(tx_length == 16 || tx_length == 32); |
| PREDICTION_MODE mode = txfm_param->mode; |
| |
| (void)ntx; |
| const tran_high_t *dctmtx = |
| tx_length == 16 ? &lgt16_000[0][0] : &lgt32_000[0][0]; |
| const tran_high_t *adstmtx = |
| tx_length == 16 ? &lgt16_200[0][0] : &lgt32_200[0][0]; |
| |
| switch (mode) { |
| case DC_PRED: |
| case PAETH_PRED: |
| case SMOOTH_PRED: |
| // prediction from both top and left -> ADST |
| lgtmtx[0] = adstmtx; |
| break; |
| case V_PRED: |
| case D45_PRED: |
| case D63_PRED: |
| case D117_PRED: |
| #if CONFIG_SMOOTH_HV |
| case SMOOTH_V_PRED: |
| #endif |
| // prediction from the top more than from the left -> ADST |
| lgtmtx[0] = is_col ? adstmtx : dctmtx; |
| break; |
| case H_PRED: |
| case D135_PRED: |
| case D153_PRED: |
| case D207_PRED: |
| #if CONFIG_SMOOTH_HV |
| case SMOOTH_H_PRED: |
| #endif |
| // prediction from the left more than from the top -> DCT |
| lgtmtx[0] = is_col ? dctmtx : adstmtx; |
| break; |
| default: lgtmtx[0] = dctmtx; break; |
| } |
| } |
| |
| typedef void (*IlgtFunc)(const tran_low_t *input, tran_low_t *output, |
| const tran_high_t *lgtmtx); |
| |
| static IlgtFunc ilgt_func[4] = { ilgt4, ilgt8, ilgt16up, ilgt16up }; |
| |
| typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col, |
| const tran_high_t **lgtmtx, int ntx); |
| |
| static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred, |
| get_lgt16up_from_pred, |
| get_lgt16up_from_pred }; |
| |
| // this inline function corresponds to the up scaling before the transpose |
| // operation in the av1_iht* functions |
| static INLINE tran_low_t inv_upscale_wrt_txsize(const tran_high_t val, |
| const TX_SIZE tx_size) { |
| switch (tx_size) { |
| case TX_4X4: |
| case TX_8X8: |
| case TX_4X16: |
| case TX_16X4: |
| case TX_8X32: |
| case TX_32X8: return (tran_low_t)val; |
| case TX_4X8: |
| case TX_8X4: |
| case TX_8X16: |
| case TX_16X8: return (tran_low_t)dct_const_round_shift(val * Sqrt2); |
| default: assert(0); break; |
| } |
| return 0; |
| } |
| |
| // This inline function corresponds to the bit shift before summing with the |
| // destination in the av1_iht* functions |
| static INLINE tran_low_t inv_downscale_wrt_txsize(const tran_low_t val, |
| const TX_SIZE tx_size) { |
| switch (tx_size) { |
| case TX_4X4: return ROUND_POWER_OF_TWO(val, 4); |
| case TX_4X8: |
| case TX_8X4: |
| case TX_8X8: |
| case TX_4X16: |
| case TX_16X4: return ROUND_POWER_OF_TWO(val, 5); |
| case TX_8X16: |
| case TX_16X8: |
| case TX_8X32: |
| case TX_32X8: return ROUND_POWER_OF_TWO(val, 6); |
| default: assert(0); break; |
| } |
| return 0; |
| } |
| |
| void ilgt2d_from_pred_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_SIZE tx_size = txfm_param->tx_size; |
| const int w = tx_size_wide[tx_size]; |
| const int h = tx_size_high[tx_size]; |
| const int wlog2 = tx_size_wide_log2[tx_size]; |
| const int hlog2 = tx_size_high_log2[tx_size]; |
| assert(w <= 8 || h <= 8); |
| |
| int i, j; |
| // largest 1D size allowed for LGT: 32 |
| // largest 2D size allowed for LGT: 8x32=256 |
| tran_low_t tmp[256], out[256], temp1d[32]; |
| const tran_high_t *lgtmtx_col[1]; |
| const tran_high_t *lgtmtx_row[1]; |
| get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w); |
| get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h); |
| |
| // for inverse transform, to be consistent with av1_iht functions, we always |
| // apply row transforms first and column transforms second, but both |
| // row-first and column-first versions are implemented here for future |
| // tests (use different lgtmtx_col[i], and choose row or column tx first |
| // depending on transforms). |
| #if 1 |
| // inverse column transforms |
| for (i = 0; i < w; ++i) { |
| // transpose |
| for (j = 0; j < h; ++j) tmp[i * h + j] = input[j * w + i]; |
| ilgt_func[hlog2 - 2](&tmp[i * h], temp1d, lgtmtx_col[0]); |
| // upscale, and store in place |
| for (j = 0; j < h; ++j) |
| tmp[i * h + j] = inv_upscale_wrt_txsize(temp1d[j], tx_size); |
| } |
| // inverse row transforms |
| for (i = 0; i < h; ++i) { |
| for (j = 0; j < w; ++j) temp1d[j] = tmp[j * h + i]; |
| ilgt_func[wlog2 - 2](temp1d, &out[i * w], lgtmtx_row[0]); |
| } |
| // downscale + sum with the destination |
| for (i = 0; i < h; ++i) { |
| for (j = 0; j < w; ++j) { |
| int d = i * stride + j; |
| int s = i * w + j; |
| dest[d] = |
| clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size)); |
| } |
| } |
| #else |
| // inverse row transforms |
| for (i = 0; i < h; ++i) { |
| ilgt_func[wlog2 - 2](input, temp1d, lgtmtx_row[0]); |
| // upscale and transpose (tmp[j*h+i] <--> tmp[j][i]) |
| for (j = 0; j < w; ++j) |
| tmp[j * h + i] = inv_upscale_wrt_txsize(temp1d[j], tx_size); |
| input += w; |
| } |
| // inverse column transforms |
| for (i = 0; i < w; ++i) |
| ilgt_func[hlog2 - 2](&tmp[i * h], &out[i * h], lgtmtx_col[0]); |
| // here, out[] is the transpose of 2D block of transform coefficients |
| |
| // downscale + transform + sum with dest |
| for (i = 0; i < h; ++i) { |
| for (j = 0; j < w; ++j) { |
| int d = i * stride + j; |
| int s = j * h + i; |
| dest[d] = |
| clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size)); |
| } |
| } |
| #endif |
| } |
| #endif // CONFIG_LGT_FROM_PRED |
| |
| void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if !CONFIG_DAALA_TX4 |
| if (tx_type == DCT_DCT) { |
| aom_idct4x4_16_add(input, dest, stride); |
| return; |
| } |
| #endif |
| static const transform_2d IHT_4[] = { |
| #if CONFIG_DAALA_TX4 |
| { daala_idct4, daala_idct4 }, // DCT_DCT = 0 |
| { daala_idst4, daala_idct4 }, // ADST_DCT = 1 |
| { daala_idct4, daala_idst4 }, // DCT_ADST = 2 |
| { daala_idst4, daala_idst4 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst4, daala_idct4 }, // FLIPADST_DCT |
| { daala_idct4, daala_idst4 }, // DCT_FLIPADST |
| { daala_idst4, daala_idst4 }, // FLIPADST_FLIPADST |
| { daala_idst4, daala_idst4 }, // ADST_FLIPADST |
| { daala_idst4, daala_idst4 }, // FLIPADST_ADST |
| { daala_idtx4, daala_idtx4 }, // IDTX |
| { daala_idct4, daala_idtx4 }, // V_DCT |
| { daala_idtx4, daala_idct4 }, // H_DCT |
| { daala_idst4, daala_idtx4 }, // V_ADST |
| { daala_idtx4, daala_idst4 }, // H_ADST |
| { daala_idst4, daala_idtx4 }, // V_FLIPADST |
| { daala_idtx4, daala_idst4 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0 |
| { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1 |
| { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2 |
| { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT |
| { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST |
| { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST |
| { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST |
| { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST |
| { iidtx4_c, iidtx4_c }, // IDTX |
| { aom_idct4_c, iidtx4_c }, // V_DCT |
| { iidtx4_c, aom_idct4_c }, // H_DCT |
| { aom_iadst4_c, iidtx4_c }, // V_ADST |
| { iidtx4_c, aom_iadst4_c }, // H_ADST |
| { aom_iadst4_c, iidtx4_c }, // V_FLIPADST |
| { iidtx4_c, aom_iadst4_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| int i, j; |
| tran_low_t tmp[4][4]; |
| tran_low_t out[4][4]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = 4; |
| |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); |
| int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // inverse transform row vectors |
| for (i = 0; i < 4; ++i) { |
| #if CONFIG_DAALA_TX4 |
| tran_low_t temp_in[4]; |
| for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2; |
| IHT_4[tx_type].rows(temp_in, out[i]); |
| #else |
| #if CONFIG_LGT |
| if (use_lgt_row) |
| ilgt4(input, out[i], lgtmtx_row[0]); |
| else |
| #endif |
| IHT_4[tx_type].rows(input, out[i]); |
| #endif |
| input += 4; |
| } |
| |
| // transpose |
| for (i = 0; i < 4; i++) { |
| for (j = 0; j < 4; j++) { |
| tmp[j][i] = out[i][j]; |
| } |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < 4; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt4(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_4[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < 4; ++i) { |
| for (j = 0; j < 4; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX4 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_4x8[] = { |
| #if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 |
| { daala_idct8, daala_idct4 }, // DCT_DCT = 0 |
| { daala_idst8, daala_idct4 }, // ADST_DCT = 1 |
| { daala_idct8, daala_idst4 }, // DCT_ADST = 2 |
| { daala_idst8, daala_idst4 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst8, daala_idct4 }, // FLIPADST_DCT |
| { daala_idct8, daala_idst4 }, // DCT_FLIPADST |
| { daala_idst8, daala_idst4 }, // FLIPADST_FLIPADST |
| { daala_idst8, daala_idst4 }, // ADST_FLIPADST |
| { daala_idst8, daala_idst4 }, // FLIPADST_ADST |
| { daala_idtx8, daala_idtx4 }, // IDTX |
| { daala_idct8, daala_idtx4 }, // V_DCT |
| { daala_idtx8, daala_idct4 }, // H_DCT |
| { daala_idst8, daala_idtx4 }, // V_ADST |
| { daala_idtx8, daala_idst4 }, // H_ADST |
| { daala_idst8, daala_idtx4 }, // V_FLIPADST |
| { daala_idtx8, daala_idst4 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct8_c, aom_idct4_c }, // DCT_DCT |
| { aom_iadst8_c, aom_idct4_c }, // ADST_DCT |
| { aom_idct8_c, aom_iadst4_c }, // DCT_ADST |
| { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT |
| { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST |
| { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST |
| { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST |
| { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST |
| { iidtx8_c, iidtx4_c }, // IDTX |
| { aom_idct8_c, iidtx4_c }, // V_DCT |
| { iidtx8_c, aom_idct4_c }, // H_DCT |
| { aom_iadst8_c, iidtx4_c }, // V_ADST |
| { iidtx8_c, aom_iadst4_c }, // H_ADST |
| { aom_iadst8_c, iidtx4_c }, // V_FLIPADST |
| { iidtx8_c, aom_iadst4_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| const int n = 4; |
| const int n2 = 8; |
| int i, j; |
| tran_low_t out[4][8], tmp[4][8], outtmp[4]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n2; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); |
| int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // Multi-way scaling matrix (bits): |
| // LGT/AV1 row,col input+0, rowTX+.5, mid+.5, colTX+1, out-5 == -3 |
| // LGT row, Daala col input+0, rowTX+.5, mid+.5, colTX+0, out-4 == -3 |
| // Daala row, LGT col input+1, rowTX+0, mid+0, colTX+1, out-5 == -3 |
| // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3 |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n2; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_row) { |
| // Scaling cases 1 and 2 above |
| // No input scaling |
| // Row transform (LGT; scales up .5 bits) |
| ilgt4(input, outtmp, lgtmtx_row[0]); |
| // Transpose and mid scaling up by .5 bit |
| for (j = 0; j < n; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| } else { |
| #endif |
| #if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 |
| // Daala row transform; Scaling cases 3 and 4 above |
| tran_low_t temp_in[4]; |
| // Input scaling up by 1 bit |
| for (j = 0; j < n; j++) temp_in[j] = input[j] * 2; |
| // Row transform; Daala does not scale |
| IHT_4x8[tx_type].rows(temp_in, outtmp); |
| // Transpose; no mid scaling |
| for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j]; |
| #else |
| // AV1 row transform; Scaling case 1 only |
| // Row transform (AV1 scales up .5 bits) |
| IHT_4x8[tx_type].rows(input, outtmp); |
| // Transpose and mid scaling up by .5 bit |
| for (j = 0; j < n; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| #if CONFIG_LGT |
| } |
| #endif |
| input += n; |
| } |
| |
| // inverse transform column vectors |
| // AV1/LGT column TX scales up by 1 bit, Daala does not scale |
| for (i = 0; i < n; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt8(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_4x8[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n2; ++i) { |
| for (j = 0; j < n; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| // Output Scaling cases 1, 3 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| else |
| #endif |
| // Output scaling cases 2, 4 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| // Output scaling case 1 only |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_8x4[] = { |
| #if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 |
| { daala_idct4, daala_idct8 }, // DCT_DCT = 0 |
| { daala_idst4, daala_idct8 }, // ADST_DCT = 1 |
| { daala_idct4, daala_idst8 }, // DCT_ADST = 2 |
| { daala_idst4, daala_idst8 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst4, daala_idct8 }, // FLIPADST_DCT |
| { daala_idct4, daala_idst8 }, // DCT_FLIPADST |
| { daala_idst4, daala_idst8 }, // FLIPADST_FLIPADST |
| { daala_idst4, daala_idst8 }, // ADST_FLIPADST |
| { daala_idst4, daala_idst8 }, // FLIPADST_ADST |
| { daala_idtx4, daala_idtx8 }, // IDTX |
| { daala_idct4, daala_idtx8 }, // V_DCT |
| { daala_idtx4, daala_idct8 }, // H_DCT |
| { daala_idst4, daala_idtx8 }, // V_ADST |
| { daala_idtx4, daala_idst8 }, // H_ADST |
| { daala_idst4, daala_idtx8 }, // V_FLIPADST |
| { daala_idtx4, daala_idst8 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct4_c, aom_idct8_c }, // DCT_DCT |
| { aom_iadst4_c, aom_idct8_c }, // ADST_DCT |
| { aom_idct4_c, aom_iadst8_c }, // DCT_ADST |
| { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT |
| { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST |
| { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST |
| { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST |
| { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST |
| { iidtx4_c, iidtx8_c }, // IDTX |
| { aom_idct4_c, iidtx8_c }, // V_DCT |
| { iidtx4_c, aom_idct8_c }, // H_DCT |
| { aom_iadst4_c, iidtx8_c }, // V_ADST |
| { iidtx4_c, aom_iadst8_c }, // H_ADST |
| { aom_iadst4_c, iidtx8_c }, // V_FLIPADST |
| { iidtx4_c, aom_iadst8_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| const int n = 4; |
| const int n2 = 8; |
| |
| int i, j; |
| tran_low_t out[8][4], tmp[8][4], outtmp[8]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); |
| int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // Multi-way scaling matrix (bits): |
| // LGT/AV1 row,col input+0, rowTX+1, mid+.5, colTX+.5, out-5 == -3 |
| // LGT row, Daala col input+0, rowTX+1, mid+.5, colTX+.5, out-4 == -3 |
| // Daala row, LGT col input+1, rowTX+0, mid+0, colTX+1, out-5 == -3 |
| // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3 |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_row) { |
| // Scaling cases 1 and 2 above |
| // No input scaling |
| // Row transform (LGT; scales up 1 bit) |
| ilgt8(input, outtmp, lgtmtx_row[0]); |
| // Transpose and mid scaling up by .5 bit |
| for (j = 0; j < n2; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| } else { |
| #endif |
| #if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 |
| // Daala row transform; Scaling cases 3 and 4 above |
| tran_low_t temp_in[8]; |
| // Input scaling up by 1 bit |
| for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2; |
| // Row transform; Daala does not scale |
| IHT_8x4[tx_type].rows(temp_in, outtmp); |
| // Transpose; no mid scaling |
| for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j]; |
| #else |
| // AV1 row transform; Scaling case 1 only |
| // Row transform (AV1 scales up 1 bit) |
| IHT_8x4[tx_type].rows(input, outtmp); |
| // Transpose and mid scaling up by .5 bit |
| for (j = 0; j < n2; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| #if CONFIG_LGT |
| } |
| #endif |
| input += n2; |
| } |
| |
| // inverse transform column vectors |
| // AV1 and LGT scale up by .5 bits; Daala does not scale |
| for (i = 0; i < n2; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt4(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_8x4[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n; ++i) { |
| for (j = 0; j < n2; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| // Output scaling cases 1, 3 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| else |
| #endif |
| // Output scaling cases 2, 4 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| // Output scaling case 1 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_4x16[] = { |
| { aom_idct16_c, aom_idct4_c }, // DCT_DCT |
| { aom_iadst16_c, aom_idct4_c }, // ADST_DCT |
| { aom_idct16_c, aom_iadst4_c }, // DCT_ADST |
| { aom_iadst16_c, aom_iadst4_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst16_c, aom_idct4_c }, // FLIPADST_DCT |
| { aom_idct16_c, aom_iadst4_c }, // DCT_FLIPADST |
| { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_FLIPADST |
| { aom_iadst16_c, aom_iadst4_c }, // ADST_FLIPADST |
| { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_ADST |
| { iidtx16_c, iidtx4_c }, // IDTX |
| { aom_idct16_c, iidtx4_c }, // V_DCT |
| { iidtx16_c, aom_idct4_c }, // H_DCT |
| { aom_iadst16_c, iidtx4_c }, // V_ADST |
| { iidtx16_c, aom_iadst4_c }, // H_ADST |
| { aom_iadst16_c, iidtx4_c }, // V_FLIPADST |
| { iidtx16_c, aom_iadst4_c }, // H_FLIPADST |
| #endif |
| }; |
| |
| const int n = 4; |
| const int n4 = 16; |
| int i, j; |
| tran_low_t out[4][16], tmp[4][16], outtmp[4]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n4; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n4; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_row) |
| ilgt4(input, outtmp, lgtmtx_row[0]); |
| else |
| #endif |
| IHT_4x16[tx_type].rows(input, outtmp); |
| for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j]; |
| input += n; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n; ++i) { |
| IHT_4x16[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n4; ++i) { |
| for (j = 0; j < n; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| } |
| } |
| } |
| |
| void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_16x4[] = { |
| { aom_idct4_c, aom_idct16_c }, // DCT_DCT |
| { aom_iadst4_c, aom_idct16_c }, // ADST_DCT |
| { aom_idct4_c, aom_iadst16_c }, // DCT_ADST |
| { aom_iadst4_c, aom_iadst16_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst4_c, aom_idct16_c }, // FLIPADST_DCT |
| { aom_idct4_c, aom_iadst16_c }, // DCT_FLIPADST |
| { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_FLIPADST |
| { aom_iadst4_c, aom_iadst16_c }, // ADST_FLIPADST |
| { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_ADST |
| { iidtx4_c, iidtx16_c }, // IDTX |
| { aom_idct4_c, iidtx16_c }, // V_DCT |
| { iidtx4_c, aom_idct16_c }, // H_DCT |
| { aom_iadst4_c, iidtx16_c }, // V_ADST |
| { iidtx4_c, aom_iadst16_c }, // H_ADST |
| { aom_iadst4_c, iidtx16_c }, // V_FLIPADST |
| { iidtx4_c, aom_iadst16_c }, // H_FLIPADST |
| #endif |
| }; |
| |
| const int n = 4; |
| const int n4 = 16; |
| |
| int i, j; |
| tran_low_t out[16][4], tmp[16][4], outtmp[16]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); |
| #endif |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n; ++i) { |
| IHT_16x4[tx_type].rows(input, outtmp); |
| for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j]; |
| input += n4; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n4; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt4(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_16x4[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n; ++i) { |
| for (j = 0; j < n4; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| } |
| } |
| } |
| |
| void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_8x16[] = { |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| { daala_idct16, daala_idct8 }, // DCT_DCT = 0 |
| { daala_idst16, daala_idct8 }, // ADST_DCT = 1 |
| { daala_idct16, daala_idst8 }, // DCT_ADST = 2 |
| { daala_idst16, daala_idst8 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst16, daala_idct8 }, // FLIPADST_DCT |
| { daala_idct16, daala_idst8 }, // DCT_FLIPADST |
| { daala_idst16, daala_idst8 }, // FLIPADST_FLIPADST |
| { daala_idst16, daala_idst8 }, // ADST_FLIPADST |
| { daala_idst16, daala_idst8 }, // FLIPADST_ADST |
| { daala_idtx16, daala_idtx8 }, // IDTX |
| { daala_idct16, daala_idtx8 }, // V_DCT |
| { daala_idtx16, daala_idct8 }, // H_DCT |
| { daala_idst16, daala_idtx8 }, // V_ADST |
| { daala_idtx16, daala_idst8 }, // H_ADST |
| { daala_idst16, daala_idtx8 }, // V_FLIPADST |
| { daala_idtx16, daala_idst8 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct16_c, aom_idct8_c }, // DCT_DCT |
| { aom_iadst16_c, aom_idct8_c }, // ADST_DCT |
| { aom_idct16_c, aom_iadst8_c }, // DCT_ADST |
| { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT |
| { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST |
| { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST |
| { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST |
| { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST |
| { iidtx16_c, iidtx8_c }, // IDTX |
| { aom_idct16_c, iidtx8_c }, // V_DCT |
| { iidtx16_c, aom_idct8_c }, // H_DCT |
| { aom_iadst16_c, iidtx8_c }, // V_ADST |
| { iidtx16_c, aom_iadst8_c }, // H_ADST |
| { aom_iadst16_c, iidtx8_c }, // V_FLIPADST |
| { iidtx16_c, aom_iadst8_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| const int n = 8; |
| const int n2 = 16; |
| int i, j; |
| tran_low_t out[8][16], tmp[8][16], outtmp[8]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n2; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // Multi-way scaling matrix (bits): |
| // LGT/AV1 row, AV1 col input+0, rowTX+1, mid+.5, colTX+1.5, out-6 == -3 |
| // LGT row, Daala col input+0, rowTX+1, mid+0, colTX+0, out-4 == -3 |
| // Daala row, LGT col N/A (no 16-point LGT) |
| // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3 |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n2; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_row) { |
| // Scaling cases 1 and 2 above |
| // No input scaling |
| // Row transform (LGT; scales up 1 bit) |
| ilgt8(input, outtmp, lgtmtx_row[0]); |
| // Transpose and mid scaling |
| for (j = 0; j < n; ++j) { |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| // Mid scaling case 2 |
| tmp[j][i] = outtmp[j]; |
| #else |
| // Mid scaling case 1 |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| } |
| } else { |
| #endif |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| tran_low_t temp_in[8]; |
| // Input scaling case 4 |
| for (j = 0; j < n; j++) temp_in[j] = input[j] * 2; |
| // Row transform (Daala does not scale) |
| IHT_8x16[tx_type].rows(temp_in, outtmp); |
| // Transpose (no mid scaling) |
| for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j]; |
| #else |
| // Case 1; no input scaling |
| // Row transform (AV1 scales up 1 bit) |
| IHT_8x16[tx_type].rows(input, outtmp); |
| // Transpose and mid scaling up .5 bits |
| for (j = 0; j < n; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| #if CONFIG_LGT |
| } |
| #endif |
| input += n; |
| } |
| |
| // inverse transform column vectors |
| // AV1 column TX scales up by 1.5 bit, Daala does not scale |
| for (i = 0; i < n; ++i) { |
| IHT_8x16[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n2; ++i) { |
| for (j = 0; j < n; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| // Output scaling cases 2 and 4 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| // Output scaling case 1 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_16x8[] = { |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| { daala_idct8, daala_idct16 }, // DCT_DCT = 0 |
| { daala_idst8, daala_idct16 }, // ADST_DCT = 1 |
| { daala_idct8, daala_idst16 }, // DCT_ADST = 2 |
| { daala_idst8, daala_idst16 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst8, daala_idct16 }, // FLIPADST_DCT |
| { daala_idct8, daala_idst16 }, // DCT_FLIPADST |
| { daala_idst8, daala_idst16 }, // FLIPADST_FLIPADST |
| { daala_idst8, daala_idst16 }, // ADST_FLIPADST |
| { daala_idst8, daala_idst16 }, // FLIPADST_ADST |
| { daala_idtx8, daala_idtx16 }, // IDTX |
| { daala_idct8, daala_idtx16 }, // V_DCT |
| { daala_idtx8, daala_idct16 }, // H_DCT |
| { daala_idst8, daala_idtx16 }, // V_ADST |
| { daala_idtx8, daala_idst16 }, // H_ADST |
| { daala_idst8, daala_idtx16 }, // V_FLIPADST |
| { daala_idtx8, daala_idst16 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct8_c, aom_idct16_c }, // DCT_DCT |
| { aom_iadst8_c, aom_idct16_c }, // ADST_DCT |
| { aom_idct8_c, aom_iadst16_c }, // DCT_ADST |
| { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT |
| { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST |
| { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST |
| { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST |
| { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST |
| { iidtx8_c, iidtx16_c }, // IDTX |
| { aom_idct8_c, iidtx16_c }, // V_DCT |
| { iidtx8_c, aom_idct16_c }, // H_DCT |
| { aom_iadst8_c, iidtx16_c }, // V_ADST |
| { iidtx8_c, aom_iadst16_c }, // H_ADST |
| { aom_iadst8_c, iidtx16_c }, // V_FLIPADST |
| { iidtx8_c, aom_iadst16_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| const int n = 8; |
| const int n2 = 16; |
| |
| int i, j; |
| tran_low_t out[16][8], tmp[16][8], outtmp[16]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); |
| #endif |
| |
| // Multi-way scaling matrix (bits): |
| // AV1 row, LGT/AV1 col input+0, rowTX+1.5, mid+.5, colTX+1, out-6 == -3 |
| // LGT row, Daala col N/A (no 16-point LGT) |
| // Daala row, LGT col input+1, rowTX+0, mid+1, colTX+1, out-6 == -3 |
| // Daala row, col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3 |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n; ++i) { |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| tran_low_t temp_in[16]; |
| // Input scaling cases 3 and 4 |
| for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2; |
| // Daala row TX, no scaling |
| IHT_16x8[tx_type].rows(temp_in, outtmp); |
| // Transpose and mid scaling |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| // Case 3 |
| for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 2; |
| else |
| #endif |
| // Case 4 |
| for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j]; |
| #else |
| // Case 1 |
| // No input scaling |
| // Row transform, AV1 scales up by 1.5 bits |
| IHT_16x8[tx_type].rows(input, outtmp); |
| // Transpose and mid scaling up .5 bits |
| for (j = 0; j < n2; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| input += n2; |
| } |
| |
| // inverse transform column vectors |
| // AV!/LGT scales up by 1 bit, Daala does not scale |
| for (i = 0; i < n2; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt8(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_16x8[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n; ++i) { |
| for (j = 0; j < n2; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| // Output scaling |
| #if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| // case 3 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| else |
| #endif |
| // case 4 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| // case 1 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_8x32[] = { |
| { aom_idct32_c, aom_idct8_c }, // DCT_DCT |
| { ihalfright32_c, aom_idct8_c }, // ADST_DCT |
| { aom_idct32_c, aom_iadst8_c }, // DCT_ADST |
| { ihalfright32_c, aom_iadst8_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { ihalfright32_c, aom_idct8_c }, // FLIPADST_DCT |
| { aom_idct32_c, aom_iadst8_c }, // DCT_FLIPADST |
| { ihalfright32_c, aom_iadst8_c }, // FLIPADST_FLIPADST |
| { ihalfright32_c, aom_iadst8_c }, // ADST_FLIPADST |
| { ihalfright32_c, aom_iadst8_c }, // FLIPADST_ADST |
| { iidtx32_c, iidtx8_c }, // IDTX |
| { aom_idct32_c, iidtx8_c }, // V_DCT |
| { iidtx32_c, aom_idct8_c }, // H_DCT |
| { ihalfright32_c, iidtx8_c }, // V_ADST |
| { iidtx32_c, aom_iadst8_c }, // H_ADST |
| { ihalfright32_c, iidtx8_c }, // V_FLIPADST |
| { iidtx32_c, aom_iadst8_c }, // H_FLIPADST |
| #endif |
| }; |
| |
| const int n = 8; |
| const int n4 = 32; |
| int i, j; |
| tran_low_t out[8][32], tmp[8][32], outtmp[8]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n4; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n4; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_row) |
| ilgt8(input, outtmp, lgtmtx_row[0]); |
| else |
| #endif |
| IHT_8x32[tx_type].rows(input, outtmp); |
| for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j]; |
| input += n; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n; ++i) { |
| IHT_8x32[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n4; ++i) { |
| for (j = 0; j < n; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| } |
| } |
| } |
| |
| void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_32x8[] = { |
| { aom_idct8_c, aom_idct32_c }, // DCT_DCT |
| { aom_iadst8_c, aom_idct32_c }, // ADST_DCT |
| { aom_idct8_c, ihalfright32_c }, // DCT_ADST |
| { aom_iadst8_c, ihalfright32_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst8_c, aom_idct32_c }, // FLIPADST_DCT |
| { aom_idct8_c, ihalfright32_c }, // DCT_FLIPADST |
| { aom_iadst8_c, ihalfright32_c }, // FLIPADST_FLIPADST |
| { aom_iadst8_c, ihalfright32_c }, // ADST_FLIPADST |
| { aom_iadst8_c, ihalfright32_c }, // FLIPADST_ADST |
| { iidtx8_c, iidtx32_c }, // IDTX |
| { aom_idct8_c, iidtx32_c }, // V_DCT |
| { iidtx8_c, aom_idct32_c }, // H_DCT |
| { aom_iadst8_c, iidtx32_c }, // V_ADST |
| { iidtx8_c, ihalfright32_c }, // H_ADST |
| { aom_iadst8_c, iidtx32_c }, // V_FLIPADST |
| { iidtx8_c, ihalfright32_c }, // H_FLIPADST |
| #endif |
| }; |
| |
| const int n = 8; |
| const int n4 = 32; |
| |
| int i, j; |
| tran_low_t out[32][8], tmp[32][8], outtmp[32]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); |
| #endif |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n; ++i) { |
| IHT_32x8[tx_type].rows(input, outtmp); |
| for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j]; |
| input += n4; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n4; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt8(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_32x8[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n; ++i) { |
| for (j = 0; j < n4; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| } |
| } |
| } |
| |
| void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_16x32[] = { |
| #if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32 |
| { daala_idct32, daala_idct16 }, // DCT_DCT = 0 |
| { daala_idst32, daala_idct16 }, // ADST_DCT = 1 |
| { daala_idct32, daala_idst16 }, // DCT_ADST = 2 |
| { daala_idst32, daala_idst16 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst32, daala_idct16 }, // FLIPADST_DCT |
| { daala_idct32, daala_idst16 }, // DCT_FLIPADST |
| { daala_idst32, daala_idst16 }, // FLIPADST_FLIPADST |
| { daala_idst32, daala_idst16 }, // ADST_FLIPADST |
| { daala_idst32, daala_idst16 }, // FLIPADST_ADST |
| { daala_idtx32, daala_idtx16 }, // IDTX |
| { daala_idct32, daala_idtx16 }, // V_DCT |
| { daala_idtx32, daala_idct16 }, // H_DCT |
| { daala_idst32, daala_idtx16 }, // V_ADST |
| { daala_idtx32, daala_idst16 }, // H_ADST |
| { daala_idst32, daala_idtx16 }, // V_FLIPADST |
| { daala_idtx32, daala_idst16 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct32_c, aom_idct16_c }, // DCT_DCT |
| { ihalfright32_c, aom_idct16_c }, // ADST_DCT |
| { aom_idct32_c, aom_iadst16_c }, // DCT_ADST |
| { ihalfright32_c, aom_iadst16_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT |
| { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST |
| { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST |
| { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST |
| { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST |
| { iidtx32_c, iidtx16_c }, // IDTX |
| { aom_idct32_c, iidtx16_c }, // V_DCT |
| { iidtx32_c, aom_idct16_c }, // H_DCT |
| { ihalfright32_c, iidtx16_c }, // V_ADST |
| { iidtx32_c, aom_iadst16_c }, // H_ADST |
| { ihalfright32_c, iidtx16_c }, // V_FLIPADST |
| { iidtx32_c, aom_iadst16_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| const int n = 16; |
| const int n2 = 32; |
| int i, j; |
| tran_low_t out[16][32], tmp[16][32], outtmp[16]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n2; |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n2; ++i) { |
| #if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32 |
| tran_low_t temp_in[16]; |
| for (j = 0; j < n; j++) temp_in[j] = input[j] * 2; |
| IHT_16x32[tx_type].rows(temp_in, outtmp); |
| for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j] * 4; |
| #else |
| IHT_16x32[tx_type].rows(input, outtmp); |
| for (j = 0; j < n; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| input += n; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]); |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n2; ++i) { |
| for (j = 0; j < n; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_32x16[] = { |
| #if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32 |
| { daala_idct16, daala_idct32 }, // DCT_DCT = 0 |
| { daala_idst16, daala_idct32 }, // ADST_DCT = 1 |
| { daala_idct16, daala_idst32 }, // DCT_ADST = 2 |
| { daala_idst16, daala_idst32 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst16, daala_idct32 }, // FLIPADST_DCT |
| { daala_idct16, daala_idst32 }, // DCT_FLIPADST |
| { daala_idst16, daala_idst32 }, // FLIPADST_FLIPADST |
| { daala_idst16, daala_idst32 }, // ADST_FLIPADST |
| { daala_idst16, daala_idst32 }, // FLIPADST_ADST |
| { daala_idtx16, daala_idtx32 }, // IDTX |
| { daala_idct16, daala_idtx32 }, // V_DCT |
| { daala_idtx16, daala_idct32 }, // H_DCT |
| { daala_idst16, daala_idtx32 }, // V_ADST |
| { daala_idtx16, daala_idst32 }, // H_ADST |
| { daala_idst16, daala_idtx32 }, // V_FLIPADST |
| { daala_idtx16, daala_idst32 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct16_c, aom_idct32_c }, // DCT_DCT |
| { aom_iadst16_c, aom_idct32_c }, // ADST_DCT |
| { aom_idct16_c, ihalfright32_c }, // DCT_ADST |
| { aom_iadst16_c, ihalfright32_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT |
| { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST |
| { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST |
| { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST |
| { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST |
| { iidtx16_c, iidtx32_c }, // IDTX |
| { aom_idct16_c, iidtx32_c }, // V_DCT |
| { iidtx16_c, aom_idct32_c }, // H_DCT |
| { aom_iadst16_c, iidtx32_c }, // V_ADST |
| { iidtx16_c, ihalfright32_c }, // H_ADST |
| { aom_iadst16_c, iidtx32_c }, // V_FLIPADST |
| { iidtx16_c, ihalfright32_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| const int n = 16; |
| const int n2 = 32; |
| |
| int i, j; |
| tran_low_t out[32][16], tmp[32][16], outtmp[32]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n; |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n; ++i) { |
| #if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32 |
| tran_low_t temp_in[32]; |
| for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2; |
| IHT_32x16[tx_type].rows(temp_in, outtmp); |
| for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 4; |
| #else |
| IHT_32x16[tx_type].rows(input, outtmp); |
| for (j = 0; j < n2; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); |
| #endif |
| input += n2; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]); |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n; ++i) { |
| for (j = 0; j < n2; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_8[] = { |
| #if CONFIG_DAALA_TX8 |
| { daala_idct8, daala_idct8 }, // DCT_DCT = 0 |
| { daala_idst8, daala_idct8 }, // ADST_DCT = 1 |
| { daala_idct8, daala_idst8 }, // DCT_ADST = 2 |
| { daala_idst8, daala_idst8 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst8, daala_idct8 }, // FLIPADST_DCT |
| { daala_idct8, daala_idst8 }, // DCT_FLIPADST |
| { daala_idst8, daala_idst8 }, // FLIPADST_FLIPADST |
| { daala_idst8, daala_idst8 }, // ADST_FLIPADST |
| { daala_idst8, daala_idst8 }, // FLIPADST_ADST |
| { daala_idtx8, daala_idtx8 }, // IDTX |
| { daala_idct8, daala_idtx8 }, // V_DCT |
| { daala_idtx8, daala_idct8 }, // H_DCT |
| { daala_idst8, daala_idtx8 }, // V_ADST |
| { daala_idtx8, daala_idst8 }, // H_ADST |
| { daala_idst8, daala_idtx8 }, // V_FLIPADST |
| { daala_idtx8, daala_idst8 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0 |
| { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1 |
| { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2 |
| { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT |
| { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST |
| { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST |
| { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST |
| { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST |
| { iidtx8_c, iidtx8_c }, // IDTX |
| { aom_idct8_c, iidtx8_c }, // V_DCT |
| { iidtx8_c, aom_idct8_c }, // H_DCT |
| { aom_iadst8_c, iidtx8_c }, // V_ADST |
| { iidtx8_c, aom_iadst8_c }, // H_ADST |
| { aom_iadst8_c, iidtx8_c }, // V_FLIPADST |
| { iidtx8_c, aom_iadst8_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| int i, j; |
| tran_low_t tmp[8][8]; |
| tran_low_t out[8][8]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = 8; |
| |
| #if CONFIG_LGT |
| const tran_high_t *lgtmtx_col[1]; |
| const tran_high_t *lgtmtx_row[1]; |
| int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); |
| int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); |
| #endif |
| |
| // inverse transform row vectors |
| for (i = 0; i < 8; ++i) { |
| #if CONFIG_DAALA_TX8 |
| tran_low_t temp_in[8]; |
| for (j = 0; j < 8; j++) temp_in[j] = input[j] * 2; |
| IHT_8[tx_type].rows(temp_in, out[i]); |
| #else |
| #if CONFIG_LGT |
| if (use_lgt_row) |
| ilgt8(input, out[i], lgtmtx_row[0]); |
| else |
| #endif |
| IHT_8[tx_type].rows(input, out[i]); |
| #endif |
| input += 8; |
| } |
| |
| // transpose |
| for (i = 0; i < 8; i++) { |
| for (j = 0; j < 8; j++) { |
| tmp[j][i] = out[i][j]; |
| } |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < 8; ++i) { |
| #if CONFIG_LGT |
| if (use_lgt_col) |
| ilgt8(tmp[i], out[i], lgtmtx_col[0]); |
| else |
| #endif |
| IHT_8[tx_type].cols(tmp[i], out[i]); |
| } |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < 8; ++i) { |
| for (j = 0; j < 8; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX8 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_16[] = { |
| #if CONFIG_DAALA_TX16 |
| { daala_idct16, daala_idct16 }, // DCT_DCT = 0 |
| { daala_idst16, daala_idct16 }, // ADST_DCT = 1 |
| { daala_idct16, daala_idst16 }, // DCT_ADST = 2 |
| { daala_idst16, daala_idst16 }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { daala_idst16, daala_idct16 }, // FLIPADST_DCT |
| { daala_idct16, daala_idst16 }, // DCT_FLIPADST |
| { daala_idst16, daala_idst16 }, // FLIPADST_FLIPADST |
| { daala_idst16, daala_idst16 }, // ADST_FLIPADST |
| { daala_idst16, daala_idst16 }, // FLIPADST_ADST |
| { daala_idtx16, daala_idtx16 }, // IDTX |
| { daala_idct16, daala_idtx16 }, // V_DCT |
| { daala_idtx16, daala_idct16 }, // H_DCT |
| { daala_idst16, daala_idtx16 }, // V_ADST |
| { daala_idtx16, daala_idst16 }, // H_ADST |
| { daala_idst16, daala_idtx16 }, // V_FLIPADST |
| { daala_idtx16, daala_idst16 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0 |
| { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1 |
| { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2 |
| { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3 |
| #if CONFIG_EXT_TX |
| { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT |
| { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST |
| { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST |
| { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST |
| { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST |
| { iidtx16_c, iidtx16_c }, // IDTX |
| { aom_idct16_c, iidtx16_c }, // V_DCT |
| { iidtx16_c, aom_idct16_c }, // H_DCT |
| { aom_iadst16_c, iidtx16_c }, // V_ADST |
| { iidtx16_c, aom_iadst16_c }, // H_ADST |
| { aom_iadst16_c, iidtx16_c }, // V_FLIPADST |
| { iidtx16_c, aom_iadst16_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| int i, j; |
| tran_low_t tmp[16][16]; |
| tran_low_t out[16][16]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = 16; |
| |
| // inverse transform row vectors |
| for (i = 0; i < 16; ++i) { |
| #if CONFIG_DAALA_TX16 |
| tran_low_t temp_in[16]; |
| for (j = 0; j < 16; j++) temp_in[j] = input[j] * 2; |
| IHT_16[tx_type].rows(temp_in, out[i]); |
| #else |
| IHT_16[tx_type].rows(input, out[i]); |
| #endif |
| input += 16; |
| } |
| |
| // transpose |
| for (i = 0; i < 16; i++) { |
| for (j = 0; j < 16; j++) { |
| tmp[j][i] = out[i][j]; |
| } |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < 16; ++i) IHT_16[tx_type].cols(tmp[i], out[i]); |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < 16; ++i) { |
| for (j = 0; j < 16; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX16 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| #endif |
| } |
| } |
| } |
| |
| #if CONFIG_EXT_TX || CONFIG_DAALA_TX32 |
| void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_32[] = { |
| #if CONFIG_DAALA_TX32 |
| { daala_idct32, daala_idct32 }, // DCT_DCT |
| #if CONFIG_EXT_TX |
| { daala_idst32, daala_idct32 }, // ADST_DCT |
| { daala_idct32, daala_idst32 }, // DCT_ADST |
| { daala_idst32, daala_idst32 }, // ADST_ADST |
| { daala_idst32, daala_idct32 }, // FLIPADST_DCT |
| { daala_idct32, daala_idst32 }, // DCT_FLIPADST |
| { daala_idst32, daala_idst32 }, // FLIPADST_FLIPADST |
| { daala_idst32, daala_idst32 }, // ADST_FLIPADST |
| { daala_idst32, daala_idst32 }, // FLIPADST_ADST |
| { daala_idtx32, daala_idtx32 }, // IDTX |
| { daala_idct32, daala_idtx32 }, // V_DCT |
| { daala_idtx32, daala_idct32 }, // H_DCT |
| { daala_idst32, daala_idtx32 }, // V_ADST |
| { daala_idtx32, daala_idst32 }, // H_ADST |
| { daala_idst32, daala_idtx32 }, // V_FLIPADST |
| { daala_idtx32, daala_idst32 }, // H_FLIPADST |
| #endif |
| #else |
| { aom_idct32_c, aom_idct32_c }, // DCT_DCT |
| #if CONFIG_EXT_TX |
| { ihalfright32_c, aom_idct32_c }, // ADST_DCT |
| { aom_idct32_c, ihalfright32_c }, // DCT_ADST |
| { ihalfright32_c, ihalfright32_c }, // ADST_ADST |
| { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT |
| { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST |
| { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST |
| { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST |
| { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST |
| { iidtx32_c, iidtx32_c }, // IDTX |
| { aom_idct32_c, iidtx32_c }, // V_DCT |
| { iidtx32_c, aom_idct32_c }, // H_DCT |
| { ihalfright32_c, iidtx32_c }, // V_ADST |
| { iidtx32_c, ihalfright32_c }, // H_ADST |
| { ihalfright32_c, iidtx32_c }, // V_FLIPADST |
| { iidtx32_c, ihalfright32_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| int i, j; |
| tran_low_t tmp[32][32]; |
| tran_low_t out[32][32]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = 32; |
| |
| // inverse transform row vectors |
| for (i = 0; i < 32; ++i) { |
| #if CONFIG_DAALA_TX32 |
| tran_low_t temp_in[32]; |
| for (j = 0; j < 32; j++) temp_in[j] = input[j] * 2; |
| IHT_32[tx_type].rows(temp_in, out[i]); |
| #else |
| IHT_32[tx_type].rows(input, out[i]); |
| #endif |
| input += 32; |
| } |
| |
| // transpose |
| for (i = 0; i < 32; i++) { |
| for (j = 0; j < 32; j++) { |
| #if CONFIG_DAALA_TX32 |
| tmp[j][i] = out[i][j] * 4; |
| #else |
| tmp[j][i] = out[i][j]; |
| #endif |
| } |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < 32; ++i) IHT_32[tx_type].cols(tmp[i], out[i]); |
| |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32); |
| |
| // Sum with the destination |
| for (i = 0; i < 32; ++i) { |
| for (j = 0; j < 32; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX32 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); |
| #endif |
| } |
| } |
| } |
| #endif // CONFIG_EXT_TX || CONFIG_DAALA_TX32 |
| |
| #if CONFIG_TX64X64 |
| void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_64[] = { |
| #if CONFIG_DAALA_TX64 |
| { daala_idct64, daala_idct64 }, // DCT_DCT |
| { daala_idst64, daala_idct64 }, // ADST_DCT |
| { daala_idct64, daala_idst64 }, // DCT_ADST |
| { daala_idst64, daala_idst64 }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { daala_idst64, daala_idct64 }, // FLIPADST_DCT |
| { daala_idct64, daala_idst64 }, // DCT_FLIPADST |
| { daala_idst64, daala_idst64 }, // FLIPADST_FLIPADST |
| { daala_idst64, daala_idst64 }, // ADST_FLIPADST |
| { daala_idst64, daala_idst64 }, // FLIPADST_ADST |
| { daala_idtx64, daala_idtx64 }, // IDTX |
| { daala_idct64, daala_idtx64 }, // V_DCT |
| { daala_idtx64, daala_idct64 }, // H_DCT |
| { daala_idst64, daala_idtx64 }, // V_ADST |
| { daala_idtx64, daala_idst64 }, // H_ADST |
| { daala_idst64, daala_idtx64 }, // V_FLIPADST |
| { daala_idtx64, daala_idst64 }, // H_FLIPADST |
| #endif |
| #else |
| { idct64_col_c, idct64_row_c }, // DCT_DCT |
| { ihalfright64_c, idct64_row_c }, // ADST_DCT |
| { idct64_col_c, ihalfright64_c }, // DCT_ADST |
| { ihalfright64_c, ihalfright64_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT |
| { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST |
| { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST |
| { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST |
| { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST |
| { iidtx64_c, iidtx64_c }, // IDTX |
| { idct64_col_c, iidtx64_c }, // V_DCT |
| { iidtx64_c, idct64_row_c }, // H_DCT |
| { ihalfright64_c, iidtx64_c }, // V_ADST |
| { iidtx64_c, ihalfright64_c }, // H_ADST |
| { ihalfright64_c, iidtx64_c }, // V_FLIPADST |
| { iidtx64_c, ihalfright64_c }, // H_FLIPADST |
| #endif |
| #endif |
| }; |
| |
| int i, j; |
| tran_low_t tmp[64][64]; |
| tran_low_t out[64][64]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = 64; |
| |
| // inverse transform row vectors |
| for (i = 0; i < 64; ++i) { |
| #if CONFIG_DAALA_TX64 |
| tran_low_t temp_in[64]; |
| for (j = 0; j < 64; j++) temp_in[j] = input[j] * 2; |
| IHT_64[tx_type].rows(temp_in, out[i]); |
| // Do not rescale intermediate for Daala |
| #else |
| IHT_64[tx_type].rows(input, out[i]); |
| for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1); |
| #endif |
| input += 64; |
| } |
| |
| // transpose |
| for (i = 0; i < 64; i++) { |
| for (j = 0; j < 64; j++) { |
| tmp[j][i] = out[i][j]; |
| } |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < 64; ++i) IHT_64[tx_type].cols(tmp[i], out[i]); |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64); |
| #endif // CONFIG_EXT_TX |
| |
| // Sum with the destination |
| for (i = 0; i < 64; ++i) { |
| for (j = 0; j < 64; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| #if CONFIG_DAALA_TX64 |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 2)); |
| #else |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| #endif |
| } |
| } |
| } |
| |
| void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_64x32[] = { |
| { aom_idct32_c, idct64_row_c }, // DCT_DCT |
| { ihalfright32_c, idct64_row_c }, // ADST_DCT |
| { aom_idct32_c, ihalfright64_c }, // DCT_ADST |
| { ihalfright32_c, ihalfright64_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { ihalfright32_c, idct64_row_c }, // FLIPADST_DCT |
| { aom_idct32_c, ihalfright64_c }, // DCT_FLIPADST |
| { ihalfright32_c, ihalfright64_c }, // FLIPADST_FLIPADST |
| { ihalfright32_c, ihalfright64_c }, // ADST_FLIPADST |
| { ihalfright32_c, ihalfright64_c }, // FLIPADST_ADST |
| { iidtx32_c, iidtx64_c }, // IDTX |
| { aom_idct32_c, iidtx64_c }, // V_DCT |
| { iidtx32_c, idct64_row_c }, // H_DCT |
| { ihalfright32_c, iidtx64_c }, // V_ADST |
| { iidtx32_c, ihalfright64_c }, // H_ADST |
| { ihalfright32_c, iidtx64_c }, // V_FLIPADST |
| { iidtx32_c, ihalfright64_c }, // H_FLIPADST |
| #endif |
| }; |
| const int n = 32; |
| const int n2 = 64; |
| |
| int i, j; |
| tran_low_t out[64][32], tmp[64][32], outtmp[64]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n; |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n; ++i) { |
| IHT_64x32[tx_type].rows(input, outtmp); |
| for (j = 0; j < n2; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2); |
| input += n2; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n2; ++i) IHT_64x32[tx_type].cols(tmp[i], out[i]); |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n; ++i) { |
| for (j = 0; j < n2; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| } |
| } |
| } |
| |
| void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| #if CONFIG_MRC_TX |
| assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); |
| #endif // CONFIG_MRC_TX |
| #if CONFIG_DCT_ONLY |
| assert(tx_type == DCT_DCT); |
| #endif |
| static const transform_2d IHT_32x64[] = { |
| { idct64_col_c, aom_idct32_c }, // DCT_DCT |
| { ihalfright64_c, aom_idct32_c }, // ADST_DCT |
| { idct64_col_c, ihalfright32_c }, // DCT_ADST |
| { ihalfright64_c, ihalfright32_c }, // ADST_ADST |
| #if CONFIG_EXT_TX |
| { ihalfright64_c, aom_idct32_c }, // FLIPADST_DCT |
| { idct64_col_c, ihalfright32_c }, // DCT_FLIPADST |
| { ihalfright64_c, ihalfright32_c }, // FLIPADST_FLIPADST |
| { ihalfright64_c, ihalfright32_c }, // ADST_FLIPADST |
| { ihalfright64_c, ihalfright32_c }, // FLIPADST_ADST |
| { iidtx64_c, iidtx32_c }, // IDTX |
| { idct64_col_c, iidtx32_c }, // V_DCT |
| { iidtx64_c, aom_idct32_c }, // H_DCT |
| { ihalfright64_c, iidtx32_c }, // V_ADST |
| { iidtx64_c, ihalfright32_c }, // H_ADST |
| { ihalfright64_c, iidtx32_c }, // V_FLIPADST |
| { iidtx64_c, ihalfright32_c }, // H_FLIPADST |
| #endif |
| }; |
| |
| const int n = 32; |
| const int n2 = 64; |
| int i, j; |
| tran_low_t out[32][64], tmp[32][64], outtmp[32]; |
| tran_low_t *outp = &out[0][0]; |
| int outstride = n2; |
| |
| // inverse transform row vectors and transpose |
| for (i = 0; i < n2; ++i) { |
| IHT_32x64[tx_type].rows(input, outtmp); |
| for (j = 0; j < n; ++j) |
| tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2); |
| input += n; |
| } |
| |
| // inverse transform column vectors |
| for (i = 0; i < n; ++i) IHT_32x64[tx_type].cols(tmp[i], out[i]); |
| |
| #if CONFIG_EXT_TX |
| maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n); |
| #endif |
| |
| // Sum with the destination |
| for (i = 0; i < n2; ++i) { |
| for (j = 0; j < n; ++j) { |
| int d = i * stride + j; |
| int s = j * outstride + i; |
| dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5)); |
| } |
| } |
| } |
| |
| #endif // CONFIG_TX64X64 |
| |
| // idct |
| void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const int eob = txfm_param->eob; |
| if (eob > 1) |
| av1_iht4x4_16_add(input, dest, stride, txfm_param); |
| else |
| aom_idct4x4_1_add(input, dest, stride); |
| } |
| |
| void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const int eob = txfm_param->eob; |
| if (eob > 1) |
| aom_iwht4x4_16_add(input, dest, stride); |
| else |
| aom_iwht4x4_1_add(input, dest, stride); |
| } |
| |
| #if !CONFIG_DAALA_TX8 |
| static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| // If dc is 1, then input[0] is the reconstructed value, do not need |
| // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
| |
| // The calculation can be simplified if there are not many non-zero dct |
| // coefficients. Use eobs to decide what to do. |
| // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c. |
| // Combine that with code here. |
| #if CONFIG_ADAPT_SCAN |
| const int16_t half = txfm_param->eob_threshold[0]; |
| #else |
| const int16_t half = 12; |
| #endif |
| |
| const int eob = txfm_param->eob; |
| if (eob == 1) |
| // DC only DCT coefficient |
| aom_idct8x8_1_add(input, dest, stride); |
| else if (eob <= half) |
| aom_idct8x8_12_add(input, dest, stride); |
| else |
| aom_idct8x8_64_add(input, dest, stride); |
| } |
| #endif |
| |
| #if !CONFIG_DAALA_TX16 |
| static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| // The calculation can be simplified if there are not many non-zero dct |
| // coefficients. Use eobs to separate different cases. |
| #if CONFIG_ADAPT_SCAN |
| const int16_t half = txfm_param->eob_threshold[0]; |
| const int16_t quarter = txfm_param->eob_threshold[1]; |
| #else |
| const int16_t half = 38; |
| const int16_t quarter = 10; |
| #endif |
| |
| const int eob = txfm_param->eob; |
| if (eob == 1) /* DC only DCT coefficient. */ |
| aom_idct16x16_1_add(input, dest, stride); |
| else if (eob <= quarter) |
| aom_idct16x16_10_add(input, dest, stride); |
| else if (eob <= half) |
| aom_idct16x16_38_add(input, dest, stride); |
| else |
| aom_idct16x16_256_add(input, dest, stride); |
| } |
| #endif |
| |
| #if CONFIG_MRC_TX |
| static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| #if CONFIG_ADAPT_SCAN |
| const int16_t half = txfm_param->eob_threshold[0]; |
| const int16_t quarter = txfm_param->eob_threshold[1]; |
| #else |
| const int16_t half = 135; |
| const int16_t quarter = 34; |
| #endif |
| |
| const int eob = txfm_param->eob; |
| int n_masked_vals = 0; |
| uint8_t *mask; |
| uint8_t mask_tmp[32 * 32]; |
| if (eob == 1) { |
| aom_idct32x32_1_add_c(input, dest, stride); |
| } else { |
| if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) || |
| (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) { |
| mask = txfm_param->mask; |
| } else { |
| n_masked_vals = |
| get_mrc_pred_mask(txfm_param->dst, txfm_param->stride, mask_tmp, 32, |
| 32, 32, txfm_param->is_inter); |
| if (!is_valid_mrc_mask(n_masked_vals, 32, 32)) |
| assert(0 && "Invalid MRC mask"); |
| mask = mask_tmp; |
| } |
| if (eob <= quarter) |
| // non-zero coeff only in upper-left 8x8 |
| aom_imrc32x32_34_add_c(input, dest, stride, mask); |
| else if (eob <= half) |
| // non-zero coeff only in upper-left 16x16 |
| aom_imrc32x32_135_add_c(input, dest, stride, mask); |
| else |
| aom_imrc32x32_1024_add_c(input, dest, stride, mask); |
| } |
| } |
| #endif // CONFIG_MRC_TX |
| |
| #if !CONFIG_DAALA_TX32 |
| static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| #if CONFIG_ADAPT_SCAN |
| const int16_t half = txfm_param->eob_threshold[0]; |
| const int16_t quarter = txfm_param->eob_threshold[1]; |
| #else |
| const int16_t half = 135; |
| const int16_t quarter = 34; |
| #endif |
| |
| const int eob = txfm_param->eob; |
| if (eob == 1) |
| aom_idct32x32_1_add(input, dest, stride); |
| else if (eob <= quarter) |
| // non-zero coeff only in upper-left 8x8 |
| aom_idct32x32_34_add(input, dest, stride); |
| else if (eob <= half) |
| // non-zero coeff only in upper-left 16x16 |
| aom_idct32x32_135_add(input, dest, stride); |
| else |
| aom_idct32x32_1024_add(input, dest, stride); |
| } |
| #endif |
| |
| #if CONFIG_TX64X64 && !CONFIG_DAALA_TX64 |
| static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| (void)txfm_param; |
| av1_iht64x64_4096_add(input, dest, stride, txfm_param); |
| } |
| #endif // CONFIG_TX64X64 && !CONFIG_DAALA_TX64 |
| |
| static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride, |
| const TxfmParam *txfm_param) { |
| const TX_TYPE tx_type = txfm_param->tx_type; |
| if (txfm_param->lossless) { |
| assert(tx_type == DCT_DCT); |
| av1_iwht4x4_add(input, dest, stride, txfm_param); |
| return; |
| } |
| |
| switch (tx_type) { |
| #if !CONFIG_DAALA_TX4 |
| case DCT_DCT: av1_idct4x4_add(input, dest, stride, txfm_param); break; |
| #else |
| case DCT_DCT: |
| #endif |
| case ADST_DCT: |
| case DCT_ADST: |
| case ADST_ADST: |
| #if CONFIG_LGT || CONFIG_DAALA_TX4 |
| // LGT only exists in C verson |
| av1_iht4x4_16_add_c(input, dest, stride, txfm_param); |
| break; |
| #else |
| av1_iht4x4_16_add(input, dest, stride, txfm_param); |
| break; |
| #endif |
| #if CONFIG_EXT_TX |
| case FLIPADST_DCT: |
| case DCT_FLIPADST: |
| case FLIPADST_FLIPADST: |
| case ADST_FLIPADST: |
|