blob: b51acb7f855ea05c6c292e06b7f3b9156e095e5e [file] [log] [blame]
/*
* Copyright (c) 2021, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* aomedia.org/license/patent-license/.
*/
#include <assert.h>
#include <math.h>
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/intrapred_common.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/bitops.h"
static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
const uint8_t *above, const uint8_t *left) {
int r;
(void)left;
for (r = 0; r < bh; r++) {
memcpy(dst, above, bw);
dst += stride;
}
}
static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
const uint8_t *above, const uint8_t *left) {
int r;
(void)above;
for (r = 0; r < bh; r++) {
memset(dst, left[r], bw);
dst += stride;
}
}
static INLINE int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; }
static INLINE uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
uint16_t top_left) {
const int base = top + left - top_left;
const int p_left = abs_diff(base, left);
const int p_top = abs_diff(base, top);
const int p_top_left = abs_diff(base, top_left);
// Return nearest to base of left, top and top_left.
return (p_left <= p_top && p_left <= p_top_left) ? left
: (p_top <= p_top_left) ? top
: top_left;
}
static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int r, c;
const uint8_t ytop_left = above[-1];
for (r = 0; r < bh; r++) {
for (c = 0; c < bw; c++)
dst[c] = (uint8_t)paeth_predictor_single(left[r], above[c], ytop_left);
dst += stride;
}
}
// Some basic checks on weights for smooth predictor.
#define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
pred_scale) \
assert(weights_w[0] < weights_scale); \
assert(weights_h[0] < weights_scale); \
assert(weights_scale - weights_w[bw - 1] < weights_scale); \
assert(weights_scale - weights_h[bh - 1] < weights_scale); \
assert(pred_scale < 31) // ensures no overflow when calculating predictor.
#define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))
static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
// scale = 2 * 2^sm_weight_log2_scale
const int log2_scale = 1 + sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
log2_scale + sizeof(*dst));
int r;
for (r = 0; r < bh; ++r) {
int c;
for (c = 0; c < bw; ++c) {
const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
sm_weights_w[c], scale - sm_weights_w[c] };
uint32_t this_pred = 0;
int i;
assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint8_t *const sm_weights = sm_weight_arrays + bh;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
int r;
for (r = 0; r < bh; r++) {
int c;
for (c = 0; c < bw; ++c) {
const uint8_t pixels[] = { above[c], below_pred };
const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
uint32_t this_pred = 0;
assert(scale >= sm_weights[r]);
int i;
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
const uint8_t *const sm_weights = sm_weight_arrays + bw;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
int r;
for (r = 0; r < bh; r++) {
int c;
for (c = 0; c < bw; ++c) {
const uint8_t pixels[] = { left[r], right_pred };
const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
uint32_t this_pred = 0;
assert(scale >= sm_weights[c]);
int i;
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int r;
(void)above;
(void)left;
for (r = 0; r < bh; r++) {
memset(dst, 128, bw);
dst += stride;
}
}
static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int i, r, expected_dc, sum = 0;
(void)above;
for (i = 0; i < bh; i++) sum += left[i];
expected_dc = (sum + (bh >> 1)) / bh;
for (r = 0; r < bh; r++) {
memset(dst, expected_dc, bw);
dst += stride;
}
}
static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int i, r, expected_dc, sum = 0;
(void)left;
for (i = 0; i < bw; i++) sum += above[i];
expected_dc = (sum + (bw >> 1)) / bw;
for (r = 0; r < bh; r++) {
memset(dst, expected_dc, bw);
dst += stride;
}
}
static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
const uint8_t *above, const uint8_t *left) {
int i, r, expected_dc, sum = 0;
const int count = bw + bh;
for (i = 0; i < bw; i++) {
sum += above[i];
}
for (i = 0; i < bh; i++) {
sum += left[i];
}
expected_dc = (sum + (count >> 1)) / count;
for (r = 0; r < bh; r++) {
memset(dst, expected_dc, bw);
dst += stride;
}
}
static INLINE int divide_using_multiply_shift(int num, int shift1,
int multiplier, int shift2) {
const int interm = num >> shift1;
return (int)((int64_t)interm * multiplier >> shift2);
}
static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int r;
(void)left;
(void)bd;
for (r = 0; r < bh; r++) {
memcpy(dst, above, bw * sizeof(uint16_t));
dst += stride;
}
}
static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int r;
(void)above;
(void)bd;
for (r = 0; r < bh; r++) {
aom_memset16(dst, left[r], bw);
dst += stride;
}
}
static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int r, c;
const uint16_t ytop_left = above[-1];
(void)bd;
for (r = 0; r < bh; r++) {
for (c = 0; c < bw; c++)
dst[c] = paeth_predictor_single(left[r], above[c], ytop_left);
dst += stride;
}
}
#if CONFIG_BLEND_MODE
#define BLEND_WEIGHT_MAX 32
static const uint8_t blk_size_log2[65] = {
0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6
};
#endif // CONFIG_BLEND_MODE
static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
(void)bd;
#if CONFIG_BLEND_MODE
const uint16_t bl = left[bh]; // estimated by bottom-left pixel
const uint16_t tr = above[bw]; // estimated by top-right pixel
uint16_t *pred = dst;
const int scale =
ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2);
assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1);
for (int r = 0; r < bh; r++) {
const int s_top =
BLEND_WEIGHT_MAX >>
AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((r << 1) >> scale));
const uint32_t l = left[r];
for (int c = 0; c < bw; c++) {
const int s_left =
BLEND_WEIGHT_MAX >>
AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((c << 1) >> scale));
const uint32_t top = above[c];
uint32_t predv = (above[c] * (bh - 1 - r) + bl * (r + 1)) * bw;
uint32_t predh = (left[r] * (bw - 1 - c) + tr * (c + 1)) * bh;
predv = (s_top * top * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_top) * predv);
assert(predv < UINT_MAX);
predh = (s_left * l * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_left) * predh);
assert(predh < UINT_MAX);
const int bits = 1 + 6 + blk_size_log2[bh] + blk_size_log2[bw];
pred[c] = divide_round((predv + predh), bits);
}
pred += stride;
}
#else
const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
// scale = 2 * 2^sm_weight_log2_scale
const int log2_scale = 1 + sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
log2_scale + sizeof(*dst));
int r;
for (r = 0; r < bh; ++r) {
int c;
for (c = 0; c < bw; ++c) {
const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
sm_weights_w[c], scale - sm_weights_w[c] };
uint32_t this_pred = 0;
int i;
assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
#endif // CONFIG_BLEND_MODE
}
static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
(void)bd;
#if CONFIG_BLEND_MODE
const uint16_t bl = left[bh]; // estimated by bottom-left pixel
uint16_t *pred = dst;
const int scale =
ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2);
assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1);
for (int r = 0; r < bh; ++r) {
const int s_top =
BLEND_WEIGHT_MAX >>
AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((r << 1) >> scale));
for (int c = 0; c < bw; ++c) {
const uint32_t top = above[c];
uint32_t predv = (above[c] * (bh - 1 - r) + bl * (r + 1)) * bw;
assert(predv < UINT_MAX);
const int bits = 6 + blk_size_log2[bh] + blk_size_log2[bw];
pred[c] = divide_round(
(s_top * top * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_top) * predv),
bits);
}
pred += stride;
}
#else
const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint8_t *const sm_weights = sm_weight_arrays + bh;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
int r;
for (r = 0; r < bh; r++) {
int c;
for (c = 0; c < bw; ++c) {
const uint16_t pixels[] = { above[c], below_pred };
const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
uint32_t this_pred = 0;
assert(scale >= sm_weights[r]);
int i;
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
#endif // CONFIG_BLEND_MODE
}
static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
(void)bd;
#if CONFIG_BLEND_MODE
const uint16_t tr = above[bw]; // estimated by top-right pixel
uint16_t *pred = dst;
const int scale =
ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2);
assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1);
for (int r = 0; r < bh; r++) {
const uint32_t l = left[r];
for (int c = 0; c < bw; c++) {
const int s_left =
BLEND_WEIGHT_MAX >>
AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((c << 1) >> scale));
uint32_t predh = (left[r] * (bw - 1 - c) + tr * (c + 1)) * bh;
assert(predh < UINT_MAX);
const int bits = 6 + blk_size_log2[bh] + blk_size_log2[bw];
pred[c] = divide_round(
(s_left * l * (bw * bh) + (BLEND_WEIGHT_MAX * 2 - s_left) * predh),
bits);
}
pred += stride;
}
#else
const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
const uint8_t *const sm_weights = sm_weight_arrays + bw;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
sm_weights_sanity_checks(sm_weights, sm_weights, scale,
log2_scale + sizeof(*dst));
int r;
for (r = 0; r < bh; r++) {
int c;
for (c = 0; c < bw; ++c) {
const uint16_t pixels[] = { left[r], right_pred };
const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
uint32_t this_pred = 0;
assert(scale >= sm_weights[c]);
int i;
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
#endif // CONFIG_BLEND_MODE
}
static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
int r;
(void)above;
(void)left;
for (r = 0; r < bh; r++) {
aom_memset16(dst, 128 << (bd - 8), bw);
dst += stride;
}
}
static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
int i, r, expected_dc, sum = 0;
(void)above;
(void)bd;
for (i = 0; i < bh; i++) sum += left[i];
expected_dc = (sum + (bh >> 1)) / bh;
for (r = 0; r < bh; r++) {
aom_memset16(dst, expected_dc, bw);
dst += stride;
}
}
static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
int i, r, expected_dc, sum = 0;
(void)left;
(void)bd;
for (i = 0; i < bw; i++) sum += above[i];
expected_dc = (sum + (bw >> 1)) / bw;
for (r = 0; r < bh; r++) {
aom_memset16(dst, expected_dc, bw);
dst += stride;
}
}
static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int i, r, expected_dc, sum = 0;
const int count = bw + bh;
(void)bd;
for (i = 0; i < bw; i++) {
sum += above[i];
}
for (i = 0; i < bh; i++) {
sum += left[i];
}
expected_dc = (sum + (count >> 1)) / count;
for (r = 0; r < bh; r++) {
aom_memset16(dst, expected_dc, bw);
dst += stride;
}
}
#if CONFIG_IBP_DC
const uint8_t ibp_weights[5][16] = {
{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 171, 213, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 154, 179, 205, 230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 142, 156, 171, 185, 199, 213, 228, 242, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 136, 143, 151, 158, 166, 173, 181, 188, 196, 203, 211, 218, 226, 233, 241,
248 }
};
const uint8_t size_to_weights_index[9] = { 0, 1, 2, 0, 3, 0, 0, 0, 4 };
static INLINE void highbd_ibp_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
int r, c;
(void)above;
(void)bd;
int len = bw >> 2;
const uint8_t weights_index = size_to_weights_index[bw >> 3];
const uint8_t *weights = ibp_weights[weights_index];
for (r = 0; r < bh; r++) {
for (c = 0; c < len; c++) {
int val = ROUND_POWER_OF_TWO(
left[r] * (256 - weights[c]) + dst[c] * weights[c], IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
}
static INLINE void highbd_ibp_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
int r, c;
(void)left;
(void)bd;
int len = bh >> 2;
const uint8_t weights_index = size_to_weights_index[bh >> 3];
const uint8_t *weights = ibp_weights[weights_index];
for (r = 0; r < len; r++) {
for (c = 0; c < bw; c++) {
int val = ROUND_POWER_OF_TWO(
above[c] * (256 - weights[r]) + dst[c] * weights[r],
IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
}
static INLINE void highbd_ibp_dc_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
int r, c;
(void)bd;
uint16_t *orig_dst = dst;
int len_h = bh >> 2;
int len_w = bw >> 2;
uint8_t weights_index = size_to_weights_index[bh >> 3];
const uint8_t *weights = ibp_weights[weights_index];
for (r = 0; r < len_h; r++) {
for (c = 0; c < bw; c++) {
int val = ROUND_POWER_OF_TWO(
above[c] * (256 - weights[r]) + dst[c] * weights[r],
IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
dst = orig_dst;
weights_index = size_to_weights_index[bw >> 3];
weights = ibp_weights[weights_index];
for (r = 0; r < bh; r++) {
for (c = 0; c < len_w; c++) {
int val = ROUND_POWER_OF_TWO(
left[r] * (256 - weights[c]) + dst[c] * weights[c], IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
}
static INLINE void ibp_dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int r, c;
(void)above;
const uint8_t weights_index = size_to_weights_index[bw >> 3];
const uint8_t *weights = ibp_weights[weights_index];
int len = bw >> 2;
for (r = 0; r < bh; r++) {
for (c = 0; c < len; c++) {
int val = ROUND_POWER_OF_TWO(
left[r] * (256 - weights[c]) + dst[c] * weights[c], IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
}
static INLINE void ibp_dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int r, c;
(void)left;
const uint8_t weights_index = size_to_weights_index[bh >> 3];
const uint8_t *weights = ibp_weights[weights_index];
int len = bh >> 2;
for (r = 0; r < len; r++) {
for (c = 0; c < bw; c++) {
int val = ROUND_POWER_OF_TWO(
above[c] * (256 - weights[r]) + dst[c] * weights[r],
IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
}
static INLINE void ibp_dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
int r, c;
uint8_t *orig_dst = dst;
uint8_t weights_index = size_to_weights_index[bh >> 3];
const uint8_t *weights = ibp_weights[weights_index];
int len_w = bw >> 2;
int len_h = bh >> 2;
for (r = 0; r < len_h; r++) {
for (c = 0; c < bw; c++) {
int val = ROUND_POWER_OF_TWO(
above[c] * (256 - weights[r]) + dst[c] * weights[r],
IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
dst = orig_dst;
weights_index = size_to_weights_index[bw >> 3];
weights = ibp_weights[weights_index];
for (r = 0; r < bh; r++) {
for (c = 0; c < len_w; c++) {
int val = ROUND_POWER_OF_TWO(
left[r] * (256 - weights[c]) + dst[c] * weights[c], IBP_WEIGHT_SHIFT);
dst[c] = val;
}
dst += stride;
}
}
#endif
// The constants (multiplier and shifts) for a given block size are obtained
// as follows:
// - Let sum_w_h = block width + block height.
// - Shift 'sum_w_h' right until we reach an odd number. Let the number of
// shifts for that block size be called 'shift1' (see the parameter in
// dc_predictor_rect() function), and let the odd number be 'd'.
#if CONFIG_FLEX_PARTITION
// d has only 4 possible values:
// * d = 3 for a 1:2 rect block,
// * d = 5 for a 1:4 rect block,
// * d = 9 for a 1:8 rect block,
// * d = 17 for a 1:16 rect block,
// - Find multipliers for dividing by 3, 5, 9 and 17 using the "Algorithm 1" in:
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
// by ensuring that m + n = 21 (in that algorithm). This ensures that our 2nd
// shift will be 21, regardless of the block size.
// Note: Strictly speaking, 2nd shift needs to be 21 only for bit depth = 12
// and rectangular blocks with ratio 1:16/16:1.
// Other cases can use scaled-down multipliers with a smaller shifts instead.
// This special optimization can be used when writing assembly code.
#define HIGHBD_DC_SHIFT2 21
#define HIGHBD_DC_MULTIPLIER_1X2 0xAAAAB
// Note: This constant is odd, but a smaller even constant (0x199a) with the
// appropriate shift should work for neon in 8/10-bit.
#define HIGHBD_DC_MULTIPLIER_1X4 0x66667
#define HIGHBD_DC_MULTIPLIER_1X8 0x38E39
#define HIGHBD_DC_MULTIPLIER_1X16 0x1E1E2
#else
// d has only 2 possible values:
// * d = 3 for a 1:2 rect block,
// * d = 5 for a 1:4 rect block.
// - Find multipliers for dividing by 3 and 5 using the "Algorithm 1" in:
// - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
// using the "Algorithm 1" in:
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
// by ensuring that m + n = 17 (in that algorithm). This ensures that our 2nd
// shift will be 17, regardless of the block size.
// Note: For low bitdepth, assembly code may be optimized by using smaller
// constants for smaller block sizes, where the range of the 'sum' is
// restricted to fewer bits.
// Note: Strictly speaking, 2nd shift needs to be 17 only when:
// - bit depth == 12, and
// - bw + bh is divisible by 5 (as opposed to divisible by 3).
// All other cases can use half the multipliers with a shift of 16 instead.
// This special optimization can be used when writing assembly code.
#define HIGHBD_DC_SHIFT2 17
#define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
// Note: This constant is odd, but a smaller even constant (0x199a) with the
// appropriate shift should work for neon in 8/10-bit.
#define HIGHBD_DC_MULTIPLIER_1X4 0x6667
#endif // CONFIG_FLEX_PARTITION
static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd,
int shift1, uint32_t multiplier) {
int sum = 0;
(void)bd;
for (int i = 0; i < bw; i++) {
sum += above[i];
}
for (int i = 0; i < bh; i++) {
sum += left[i];
}
const int expected_dc = divide_using_multiply_shift(
sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
assert(expected_dc < (1 << bd));
for (int r = 0; r < bh; r++) {
aom_memset16(dst, expected_dc, bw);
dst += stride;
}
}
#undef HIGHBD_DC_SHIFT2
void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
HIGHBD_DC_MULTIPLIER_1X2);
}
#if CONFIG_FLEX_PARTITION
void aom_highbd_dc_predictor_4x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 32, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X8);
}
void aom_highbd_dc_predictor_32x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 4, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X8);
}
void aom_highbd_dc_predictor_8x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 64, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X8);
}
void aom_highbd_dc_predictor_64x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 8, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X8);
}
void aom_highbd_dc_predictor_4x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 64, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X16);
}
void aom_highbd_dc_predictor_64x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 4, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X16);
}
#endif // CONFIG_FLEX_PARTITION
#undef HIGHBD_DC_MULTIPLIER_1X2
#undef HIGHBD_DC_MULTIPLIER_1X4
#if CONFIG_FLEX_PARTITION
#undef HIGHBD_DC_MULTIPLIER_1X8
#undef HIGHBD_DC_MULTIPLIER_1X16
#endif // CONFIG_FLEX_PARTITION
// This serves as a wrapper function, so that all the prediction functions
// can be unified and accessed as a pointer array. Note that the boundary
// above and left are not necessarily used all the time.
#define intra_pred_sized(type, width, height) \
void aom_##type##_predictor_##width##x##height##_c( \
uint8_t *dst, ptrdiff_t stride, const uint8_t *above, \
const uint8_t *left) { \
type##_predictor(dst, stride, width, height, above, left); \
}
#define intra_pred_highbd_sized(type, width, height) \
void aom_highbd_##type##_predictor_##width##x##height##_c( \
uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
const uint16_t *left, int bd) { \
highbd_##type##_predictor(dst, stride, width, height, above, left, bd); \
}
/* clang-format off */
#if CONFIG_FLEX_PARTITION
#define intra_pred_rectangular(type) \
intra_pred_sized(type, 4, 8) \
intra_pred_sized(type, 8, 4) \
intra_pred_sized(type, 8, 16) \
intra_pred_sized(type, 16, 8) \
intra_pred_sized(type, 16, 32) \
intra_pred_sized(type, 32, 16) \
intra_pred_sized(type, 32, 64) \
intra_pred_sized(type, 64, 32) \
intra_pred_sized(type, 4, 16) \
intra_pred_sized(type, 16, 4) \
intra_pred_sized(type, 8, 32) \
intra_pred_sized(type, 32, 8) \
intra_pred_sized(type, 16, 64) \
intra_pred_sized(type, 64, 16) \
intra_pred_sized(type, 4, 32) \
intra_pred_sized(type, 32, 4) \
intra_pred_sized(type, 8, 64) \
intra_pred_sized(type, 64, 8) \
intra_pred_sized(type, 4, 64) \
intra_pred_sized(type, 64, 4) \
intra_pred_highbd_sized(type, 4, 8) \
intra_pred_highbd_sized(type, 8, 4) \
intra_pred_highbd_sized(type, 8, 16) \
intra_pred_highbd_sized(type, 16, 8) \
intra_pred_highbd_sized(type, 16, 32) \
intra_pred_highbd_sized(type, 32, 16) \
intra_pred_highbd_sized(type, 32, 64) \
intra_pred_highbd_sized(type, 64, 32) \
intra_pred_highbd_sized(type, 4, 16) \
intra_pred_highbd_sized(type, 16, 4) \
intra_pred_highbd_sized(type, 8, 32) \
intra_pred_highbd_sized(type, 32, 8) \
intra_pred_highbd_sized(type, 16, 64) \
intra_pred_highbd_sized(type, 64, 16) \
intra_pred_highbd_sized(type, 4, 32) \
intra_pred_highbd_sized(type, 32, 4) \
intra_pred_highbd_sized(type, 8, 64) \
intra_pred_highbd_sized(type, 64, 8) \
intra_pred_highbd_sized(type, 4, 64) \
intra_pred_highbd_sized(type, 64, 4)
#else
#define intra_pred_rectangular(type) \
intra_pred_sized(type, 4, 8) \
intra_pred_sized(type, 8, 4) \
intra_pred_sized(type, 8, 16) \
intra_pred_sized(type, 16, 8) \
intra_pred_sized(type, 16, 32) \
intra_pred_sized(type, 32, 16) \
intra_pred_sized(type, 32, 64) \
intra_pred_sized(type, 64, 32) \
intra_pred_sized(type, 4, 16) \
intra_pred_sized(type, 16, 4) \
intra_pred_sized(type, 8, 32) \
intra_pred_sized(type, 32, 8) \
intra_pred_sized(type, 16, 64) \
intra_pred_sized(type, 64, 16) \
intra_pred_highbd_sized(type, 4, 8) \
intra_pred_highbd_sized(type, 8, 4) \
intra_pred_highbd_sized(type, 8, 16) \
intra_pred_highbd_sized(type, 16, 8) \
intra_pred_highbd_sized(type, 16, 32) \
intra_pred_highbd_sized(type, 32, 16) \
intra_pred_highbd_sized(type, 32, 64) \
intra_pred_highbd_sized(type, 64, 32) \
intra_pred_highbd_sized(type, 4, 16) \
intra_pred_highbd_sized(type, 16, 4) \
intra_pred_highbd_sized(type, 8, 32) \
intra_pred_highbd_sized(type, 32, 8) \
intra_pred_highbd_sized(type, 16, 64) \
intra_pred_highbd_sized(type, 64, 16)
#endif // CONFIG_FLEX_PARTITION
#define intra_pred_above_4x4(type) \
intra_pred_sized(type, 8, 8) \
intra_pred_sized(type, 16, 16) \
intra_pred_sized(type, 32, 32) \
intra_pred_sized(type, 64, 64) \
intra_pred_highbd_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 8, 8) \
intra_pred_highbd_sized(type, 16, 16) \
intra_pred_highbd_sized(type, 32, 32) \
intra_pred_highbd_sized(type, 64, 64) \
intra_pred_rectangular(type)
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 4, 4) \
intra_pred_above_4x4(type)
#define intra_pred_square(type) \
intra_pred_sized(type, 4, 4) \
intra_pred_sized(type, 8, 8) \
intra_pred_sized(type, 16, 16) \
intra_pred_sized(type, 32, 32) \
intra_pred_sized(type, 64, 64) \
intra_pred_highbd_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 8, 8) \
intra_pred_highbd_sized(type, 16, 16) \
intra_pred_highbd_sized(type, 32, 32) \
intra_pred_highbd_sized(type, 64, 64)
intra_pred_allsizes(v)
intra_pred_allsizes(h)
intra_pred_allsizes(smooth)
intra_pred_allsizes(smooth_v)
intra_pred_allsizes(smooth_h)
intra_pred_allsizes(paeth)
intra_pred_allsizes(dc_128)
intra_pred_allsizes(dc_left)
intra_pred_allsizes(dc_top)
intra_pred_square(dc)
#if CONFIG_IBP_DC
intra_pred_allsizes(ibp_dc_left)
intra_pred_allsizes(ibp_dc_top)
intra_pred_allsizes(ibp_dc)
#endif
/* clang-format on */
#undef intra_pred_allsizes