Merge "Add unit test for delta-q (aq-mode=4)" into nextgenv2
diff --git a/aom_dsp/arm/fwd_txfm_neon.c b/aom_dsp/arm/fwd_txfm_neon.c
index 17ce29e..1cf8a3a 100644
--- a/aom_dsp/arm/fwd_txfm_neon.c
+++ b/aom_dsp/arm/fwd_txfm_neon.c
@@ -53,10 +53,10 @@
v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64);
v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64);
v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64);
- v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64);
- v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64);
- v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64);
- v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64);
+ v_t0_lo = vmulq_n_s32(v_t0_lo, (int32_t)cospi_16_64);
+ v_t0_hi = vmulq_n_s32(v_t0_hi, (int32_t)cospi_16_64);
+ v_t1_lo = vmulq_n_s32(v_t1_lo, (int32_t)cospi_16_64);
+ v_t1_hi = vmulq_n_s32(v_t1_hi, (int32_t)cospi_16_64);
{
const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
diff --git a/aom_dsp/arm/idct16x16_add_neon.c b/aom_dsp/arm/idct16x16_add_neon.c
index 3d545f8..b4cb7a0 100644
--- a/aom_dsp/arm/idct16x16_add_neon.c
+++ b/aom_dsp/arm/idct16x16_add_neon.c
@@ -137,8 +137,8 @@
d31s16 = vget_high_s16(q15s16);
// stage 3
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_4_64);
q2s32 = vmull_s16(d18s16, d0s16);
q3s32 = vmull_s16(d19s16, d0s16);
@@ -150,8 +150,8 @@
q5s32 = vmlal_s16(q5s32, d30s16, d0s16);
q6s32 = vmlal_s16(q6s32, d31s16, d0s16);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
+ d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+ d3s16 = vdup_n_s16((int16_t)cospi_20_64);
d8s16 = vqrshrn_n_s32(q2s32, 14);
d9s16 = vqrshrn_n_s32(q3s32, 14);
@@ -178,15 +178,15 @@
q6s16 = vcombine_s16(d12s16, d13s16);
// stage 4
- d30s16 = vdup_n_s16(cospi_16_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_16_64);
q2s32 = vmull_s16(d16s16, d30s16);
q11s32 = vmull_s16(d17s16, d30s16);
q0s32 = vmull_s16(d24s16, d30s16);
q1s32 = vmull_s16(d25s16, d30s16);
- d30s16 = vdup_n_s16(cospi_24_64);
- d31s16 = vdup_n_s16(cospi_8_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_24_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_8_64);
q3s32 = vaddq_s32(q2s32, q0s32);
q12s32 = vaddq_s32(q11s32, q1s32);
@@ -232,7 +232,7 @@
q2s16 = vsubq_s16(q9s16, q10s16);
q3s16 = vsubq_s16(q8s16, q11s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q11s32 = vmull_s16(d26s16, d16s16);
q12s32 = vmull_s16(d27s16, d16s16);
@@ -378,8 +378,8 @@
d31s16 = vget_high_s16(q15s16);
// stage 3
- d12s16 = vdup_n_s16(cospi_30_64);
- d13s16 = vdup_n_s16(cospi_2_64);
+ d12s16 = vdup_n_s16((int16_t)cospi_30_64);
+ d13s16 = vdup_n_s16((int16_t)cospi_2_64);
q2s32 = vmull_s16(d16s16, d12s16);
q3s32 = vmull_s16(d17s16, d12s16);
@@ -398,8 +398,8 @@
q0s16 = vcombine_s16(d0s16, d1s16);
q7s16 = vcombine_s16(d14s16, d15s16);
- d30s16 = vdup_n_s16(cospi_14_64);
- d31s16 = vdup_n_s16(cospi_18_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_14_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_18_64);
q2s32 = vmull_s16(d24s16, d30s16);
q3s32 = vmull_s16(d25s16, d30s16);
@@ -418,8 +418,8 @@
q1s16 = vcombine_s16(d2s16, d3s16);
q6s16 = vcombine_s16(d12s16, d13s16);
- d30s16 = vdup_n_s16(cospi_22_64);
- d31s16 = vdup_n_s16(cospi_10_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_22_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_10_64);
q11s32 = vmull_s16(d20s16, d30s16);
q12s32 = vmull_s16(d21s16, d30s16);
@@ -438,8 +438,8 @@
q2s16 = vcombine_s16(d4s16, d5s16);
q5s16 = vcombine_s16(d10s16, d11s16);
- d30s16 = vdup_n_s16(cospi_6_64);
- d31s16 = vdup_n_s16(cospi_26_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_6_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_26_64);
q10s32 = vmull_s16(d28s16, d30s16);
q11s32 = vmull_s16(d29s16, d30s16);
@@ -478,8 +478,8 @@
d28s16 = vget_low_s16(q14s16);
d29s16 = vget_high_s16(q14s16);
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_24_64);
q2s32 = vmull_s16(d18s16, d31s16);
q3s32 = vmull_s16(d19s16, d31s16);
@@ -539,7 +539,7 @@
d26s16 = vget_low_s16(q13s16);
d27s16 = vget_high_s16(q13s16);
- d14s16 = vdup_n_s16(cospi_16_64);
+ d14s16 = vdup_n_s16((int16_t)cospi_16_64);
q3s32 = vmull_s16(d26s16, d14s16);
q4s32 = vmull_s16(d27s16, d14s16);
@@ -903,15 +903,15 @@
&q15s16);
// stage 3
- q0s16 = vdupq_n_s16(cospi_28_64 * 2);
- q1s16 = vdupq_n_s16(cospi_4_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)(cospi_28_64 * 2));
+ q1s16 = vdupq_n_s16((int16_t)(cospi_4_64 * 2));
q4s16 = vqrdmulhq_s16(q9s16, q0s16);
q7s16 = vqrdmulhq_s16(q9s16, q1s16);
// stage 4
- q1s16 = vdupq_n_s16(cospi_16_64 * 2);
- d4s16 = vdup_n_s16(cospi_16_64);
+ q1s16 = vdupq_n_s16((int16_t)(cospi_16_64 * 2));
+ d4s16 = vdup_n_s16((int16_t)cospi_16_64);
q8s16 = vqrdmulhq_s16(q8s16, q1s16);
@@ -1046,13 +1046,13 @@
&q15s16);
// stage 3
- q6s16 = vdupq_n_s16(cospi_30_64 * 2);
+ q6s16 = vdupq_n_s16((int16_t)(cospi_30_64 * 2));
q0s16 = vqrdmulhq_s16(q8s16, q6s16);
- q6s16 = vdupq_n_s16(cospi_2_64 * 2);
+ q6s16 = vdupq_n_s16((int16_t)(cospi_2_64 * 2));
q7s16 = vqrdmulhq_s16(q8s16, q6s16);
q15s16 = vdupq_n_s16(-cospi_26_64 * 2);
- q14s16 = vdupq_n_s16(cospi_6_64 * 2);
+ q14s16 = vdupq_n_s16((int16_t)(cospi_6_64 * 2));
q3s16 = vqrdmulhq_s16(q9s16, q15s16);
q4s16 = vqrdmulhq_s16(q9s16, q14s16);
@@ -1066,8 +1066,8 @@
d14s16 = vget_low_s16(q7s16);
d15s16 = vget_high_s16(q7s16);
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_24_64);
q12s32 = vmull_s16(d14s16, d31s16);
q5s32 = vmull_s16(d15s16, d31s16);
@@ -1124,7 +1124,7 @@
d26s16 = vget_low_s16(q13s16);
d27s16 = vget_high_s16(q13s16);
- d14s16 = vdup_n_s16(cospi_16_64);
+ d14s16 = vdup_n_s16((int16_t)cospi_16_64);
q3s32 = vmull_s16(d26s16, d14s16);
q4s32 = vmull_s16(d27s16, d14s16);
q0s32 = vmull_s16(d20s16, d14s16);
diff --git a/aom_dsp/arm/idct4x4_add_neon.c b/aom_dsp/arm/idct4x4_add_neon.c
index 397c617..763be1a 100644
--- a/aom_dsp/arm/idct4x4_add_neon.c
+++ b/aom_dsp/arm/idct4x4_add_neon.c
@@ -11,6 +11,8 @@
#include <arm_neon.h>
+#include "aom_dsp/txfm_common.h"
+
void aom_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
uint8x8_t d26u8, d27u8;
uint32x2_t d26u32, d27u32;
@@ -22,9 +24,6 @@
int16x4x2_t d0x2s16, d1x2s16;
int32x4x2_t q0x2s32;
uint8_t *d;
- int16_t cospi_8_64 = 15137;
- int16_t cospi_16_64 = 11585;
- int16_t cospi_24_64 = 6270;
d26u32 = d27u32 = vdup_n_u32(0);
@@ -41,8 +40,8 @@
q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
- d20s16 = vdup_n_s16(cospi_8_64);
- d21s16 = vdup_n_s16(cospi_16_64);
+ d20s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d21s16 = vdup_n_s16((int16_t)cospi_16_64);
q0x2s32 =
vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
@@ -51,7 +50,7 @@
d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
- d22s16 = vdup_n_s16(cospi_24_64);
+ d22s16 = vdup_n_s16((int16_t)cospi_24_64);
// stage 1
d23s16 = vadd_s16(d16s16, d18s16);
diff --git a/aom_dsp/arm/idct8x8_1_add_neon.c b/aom_dsp/arm/idct8x8_1_add_neon.c
index fcc2a2f..c7926f9 100644
--- a/aom_dsp/arm/idct8x8_1_add_neon.c
+++ b/aom_dsp/arm/idct8x8_1_add_neon.c
@@ -20,7 +20,7 @@
uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
int16x8_t q0s16;
uint8_t *d1, *d2;
- int16_t i, a1, cospi_16_64 = 11585;
+ int16_t i, a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 5);
diff --git a/aom_dsp/arm/idct8x8_add_neon.c b/aom_dsp/arm/idct8x8_add_neon.c
index 8e75210..8ad7086 100644
--- a/aom_dsp/arm/idct8x8_add_neon.c
+++ b/aom_dsp/arm/idct8x8_add_neon.c
@@ -90,10 +90,10 @@
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_4_64);
+ d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+ d3s16 = vdup_n_s16((int16_t)cospi_20_64);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
@@ -146,7 +146,7 @@
q6s16 = vcombine_s16(d12s16, d13s16);
q7s16 = vcombine_s16(d14s16, d15s16);
- d0s16 = vdup_n_s16(cospi_16_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_16_64);
q2s32 = vmull_s16(d16s16, d0s16);
q3s32 = vmull_s16(d17s16, d0s16);
@@ -158,8 +158,8 @@
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
- d0s16 = vdup_n_s16(cospi_24_64);
- d1s16 = vdup_n_s16(cospi_8_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_24_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_8_64);
d18s16 = vqrshrn_n_s32(q2s32, 14);
d19s16 = vqrshrn_n_s32(q3s32, 14);
@@ -199,7 +199,7 @@
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
@@ -356,29 +356,29 @@
// First transform rows
// stage 1
- q0s16 = vdupq_n_s16(cospi_28_64 * 2);
- q1s16 = vdupq_n_s16(cospi_4_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_28_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_4_64 * 2);
q4s16 = vqrdmulhq_s16(q9s16, q0s16);
- q0s16 = vdupq_n_s16(-cospi_20_64 * 2);
+ q0s16 = vdupq_n_s16(-(int16_t)cospi_20_64 * 2);
q7s16 = vqrdmulhq_s16(q9s16, q1s16);
- q1s16 = vdupq_n_s16(cospi_12_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_12_64 * 2);
q5s16 = vqrdmulhq_s16(q11s16, q0s16);
- q0s16 = vdupq_n_s16(cospi_16_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_16_64 * 2);
q6s16 = vqrdmulhq_s16(q11s16, q1s16);
// stage 2 & stage 3 - even half
- q1s16 = vdupq_n_s16(cospi_24_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_24_64 * 2);
q9s16 = vqrdmulhq_s16(q8s16, q0s16);
- q0s16 = vdupq_n_s16(cospi_8_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_8_64 * 2);
q13s16 = vqrdmulhq_s16(q10s16, q1s16);
@@ -400,7 +400,7 @@
d28s16 = vget_low_s16(q14s16);
d29s16 = vget_high_s16(q14s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
q11s32 = vmull_s16(d28s16, d16s16);
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index 68e1339..d0282f5 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -103,7 +103,7 @@
}
// Returns the position in the bit reader in bits.
-static INLINE ptrdiff_t aom_reader_tell(const aom_reader *r) {
+static INLINE uint32_t aom_reader_tell(const aom_reader *r) {
#if CONFIG_ANS
(void)r;
assert(0 && "aom_reader_tell() is unimplemented for ANS");
@@ -116,7 +116,7 @@
}
// Returns the position in the bit reader in 1/8th bits.
-static INLINE ptrdiff_t aom_reader_tell_frac(const aom_reader *r) {
+static INLINE uint32_t aom_reader_tell_frac(const aom_reader *r) {
#if CONFIG_ANS
(void)r;
assert(0 && "aom_reader_tell_frac() is unimplemented for ANS");
diff --git a/aom_dsp/daalaboolreader.c b/aom_dsp/daalaboolreader.c
index f0da8eb..0fc7b14 100644
--- a/aom_dsp/daalaboolreader.c
+++ b/aom_dsp/daalaboolreader.c
@@ -28,10 +28,10 @@
return r->buffer_end;
}
-ptrdiff_t aom_daala_reader_tell(const daala_reader *r) {
+uint32_t aom_daala_reader_tell(const daala_reader *r) {
return od_ec_dec_tell(&r->ec);
}
-ptrdiff_t aom_daala_reader_tell_frac(const daala_reader *r) {
+uint32_t aom_daala_reader_tell_frac(const daala_reader *r) {
return od_ec_dec_tell_frac(&r->ec);
}
diff --git a/aom_dsp/daalaboolreader.h b/aom_dsp/daalaboolreader.h
index 10dc391..9d6cebd 100644
--- a/aom_dsp/daalaboolreader.h
+++ b/aom_dsp/daalaboolreader.h
@@ -36,8 +36,8 @@
int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size);
const uint8_t *aom_daala_reader_find_end(daala_reader *r);
-ptrdiff_t aom_daala_reader_tell(const daala_reader *r);
-ptrdiff_t aom_daala_reader_tell_frac(const daala_reader *r);
+uint32_t aom_daala_reader_tell(const daala_reader *r);
+uint32_t aom_daala_reader_tell_frac(const daala_reader *r);
static INLINE int aom_daala_read(daala_reader *r, int prob) {
if (prob == 128) {
diff --git a/aom_dsp/dkboolreader.h b/aom_dsp/dkboolreader.h
index bc4b02f..add480a 100644
--- a/aom_dsp/dkboolreader.h
+++ b/aom_dsp/dkboolreader.h
@@ -12,6 +12,7 @@
#ifndef AOM_DSP_DKBOOLREADER_H_
#define AOM_DSP_DKBOOLREADER_H_
+#include <assert.h>
#include <stddef.h>
#include <limits.h>
@@ -67,10 +68,11 @@
const uint8_t *aom_dk_reader_find_end(struct aom_dk_reader *r);
-static INLINE ptrdiff_t aom_dk_reader_tell(const struct aom_dk_reader *r) {
- const size_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
+static INLINE uint32_t aom_dk_reader_tell(const struct aom_dk_reader *r) {
+ const uint32_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
const int count =
(r->count < LOTS_OF_BITS) ? r->count : r->count - LOTS_OF_BITS;
+ assert(r->buffer >= r->buffer_start);
return bits_read - (count + CHAR_BIT);
}
@@ -78,7 +80,7 @@
3 => 1/8th bits.*/
#define DK_BITRES (3)
-static INLINE ptrdiff_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
+static INLINE uint32_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
uint32_t num_bits;
uint32_t range;
int l;
diff --git a/aom_dsp/fwd_txfm.c b/aom_dsp/fwd_txfm.c
index fadae2b..547919f 100644
--- a/aom_dsp/fwd_txfm.c
+++ b/aom_dsp/fwd_txfm.c
@@ -9,8 +9,9 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_dsp_rtcd.h"
#include "aom_dsp/fwd_txfm.h"
+#include <assert.h>
+#include "./aom_dsp_rtcd.h"
void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
@@ -22,36 +23,37 @@
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[4 * 4];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
+ const tran_low_t *in_low = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
- tran_high_t input[4]; // canbe16
+ tran_high_t in_high[4]; // canbe16
tran_high_t step[4]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 4; ++i) {
// Load inputs.
- if (0 == pass) {
- input[0] = in_pass0[0 * stride] * 16;
- input[1] = in_pass0[1 * stride] * 16;
- input[2] = in_pass0[2 * stride] * 16;
- input[3] = in_pass0[3 * stride] * 16;
- if (i == 0 && input[0]) {
- input[0] += 1;
+ if (pass == 0) {
+ in_high[0] = input[0 * stride] * 16;
+ in_high[1] = input[1 * stride] * 16;
+ in_high[2] = input[2 * stride] * 16;
+ in_high[3] = input[3 * stride] * 16;
+ if (i == 0 && in_high[0]) {
+ ++in_high[0];
}
} else {
- input[0] = in[0 * 4];
- input[1] = in[1 * 4];
- input[2] = in[2 * 4];
- input[3] = in[3 * 4];
+ assert(in_low != NULL);
+ in_high[0] = in_low[0 * 4];
+ in_high[1] = in_low[1 * 4];
+ in_high[2] = in_low[2 * 4];
+ in_high[3] = in_low[3 * 4];
+ ++in_low;
}
// Transform.
- step[0] = input[0] + input[3];
- step[1] = input[1] + input[2];
- step[2] = input[1] - input[2];
- step[3] = input[0] - input[3];
+ step[0] = in_high[0] + in_high[3];
+ step[1] = in_high[1] + in_high[2];
+ step[2] = in_high[1] - in_high[2];
+ step[3] = in_high[0] - in_high[3];
temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64;
out[0] = (tran_low_t)fdct_round_shift(temp1);
@@ -61,12 +63,11 @@
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[3] = (tran_low_t)fdct_round_shift(temp2);
// Do next column (which is a transposed row in second/horizontal pass)
- in_pass0++;
- in++;
+ ++input;
out += 4;
}
// Setup in/out for next pass.
- in = intermediate;
+ in_low = intermediate;
out = output;
}
@@ -100,7 +101,6 @@
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
- int i;
for (i = 0; i < 8; i++) {
// stage 1
if (pass == 0) {
@@ -191,56 +191,57 @@
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[256];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
+ const tran_low_t *in_low = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
- tran_high_t input[8]; // canbe16
+ tran_high_t in_high[8]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 16; i++) {
if (0 == pass) {
// Calculate input for the first 8 results.
- input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
- input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
- input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
- input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
- input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
- input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
- input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
- input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+ in_high[0] = (input[0 * stride] + input[15 * stride]) * 4;
+ in_high[1] = (input[1 * stride] + input[14 * stride]) * 4;
+ in_high[2] = (input[2 * stride] + input[13 * stride]) * 4;
+ in_high[3] = (input[3 * stride] + input[12 * stride]) * 4;
+ in_high[4] = (input[4 * stride] + input[11 * stride]) * 4;
+ in_high[5] = (input[5 * stride] + input[10 * stride]) * 4;
+ in_high[6] = (input[6 * stride] + input[9 * stride]) * 4;
+ in_high[7] = (input[7 * stride] + input[8 * stride]) * 4;
// Calculate input for the next 8 results.
- step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
- step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
- step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
- step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
- step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
- step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
- step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
- step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+ step1[0] = (input[7 * stride] - input[8 * stride]) * 4;
+ step1[1] = (input[6 * stride] - input[9 * stride]) * 4;
+ step1[2] = (input[5 * stride] - input[10 * stride]) * 4;
+ step1[3] = (input[4 * stride] - input[11 * stride]) * 4;
+ step1[4] = (input[3 * stride] - input[12 * stride]) * 4;
+ step1[5] = (input[2 * stride] - input[13 * stride]) * 4;
+ step1[6] = (input[1 * stride] - input[14 * stride]) * 4;
+ step1[7] = (input[0 * stride] - input[15 * stride]) * 4;
} else {
// Calculate input for the first 8 results.
- input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
- input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
- input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
- input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
- input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
- input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
- input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
- input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+ assert(in_low != NULL);
+ in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2);
+ in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2);
+ in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2);
+ in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2);
+ in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2);
+ in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2);
+ in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2);
+ in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2);
// Calculate input for the next 8 results.
- step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
- step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
- step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
- step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
- step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
- step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
- step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
- step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+ step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2);
+ step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2);
+ step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2);
+ step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2);
+ step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2);
+ step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2);
+ step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2);
+ step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2);
+ in_low++;
}
// Work on the first eight values; fdct8(input, even_results);
{
@@ -249,14 +250,14 @@
tran_high_t x0, x1, x2, x3; // canbe16
// stage 1
- s0 = input[0] + input[7];
- s1 = input[1] + input[6];
- s2 = input[2] + input[5];
- s3 = input[3] + input[4];
- s4 = input[3] - input[4];
- s5 = input[2] - input[5];
- s6 = input[1] - input[6];
- s7 = input[0] - input[7];
+ s0 = in_high[0] + in_high[7];
+ s1 = in_high[1] + in_high[6];
+ s2 = in_high[2] + in_high[5];
+ s3 = in_high[3] + in_high[4];
+ s4 = in_high[3] - in_high[4];
+ s5 = in_high[2] - in_high[5];
+ s6 = in_high[1] - in_high[6];
+ s7 = in_high[0] - in_high[7];
// fdct4(step, step);
x0 = s0 + s3;
@@ -351,12 +352,11 @@
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
// Do next column (which is a transposed row in second/horizontal pass)
- in++;
- in_pass0++;
+ input++;
out += 16;
}
// Setup in/out for next pass.
- in = intermediate;
+ in_low = intermediate;
out = output;
}
}
diff --git a/aom_dsp/mips/aom_convolve_msa.h b/aom_dsp/mips/aom_convolve_msa.h
index 4efbcbc..1a0ae4d 100644
--- a/aom_dsp/mips/aom_convolve_msa.h
+++ b/aom_dsp/mips/aom_convolve_msa.h
@@ -17,18 +17,18 @@
extern const uint8_t mc_filt_mask_arr[16 * 3];
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
- filt3) \
- ({ \
- v8i16 tmp0, tmp1; \
- \
- tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
- tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1); \
- tmp1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
- tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)vec3, (v16i8)filt3); \
- tmp0 = __msa_adds_s_h(tmp0, tmp1); \
- \
- tmp0; \
+#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
+ filt3) \
+ ({ \
+ v8i16 tmp_dpadd_0, tmp_dpadd_1; \
+ \
+ tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
+ tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
+ tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
+ tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
+ tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1); \
+ \
+ tmp_dpadd_0; \
})
#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_h0, \
@@ -115,11 +115,10 @@
stride) \
{ \
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
\
PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m); \
PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
- ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
+ ST8x4_UB(tmp0_m, tmp1_m, pdst, stride); \
}
#endif /* AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ */
diff --git a/aom_dsp/mips/inv_txfm_msa.h b/aom_dsp/mips/inv_txfm_msa.h
index ce2065b..122667a 100644
--- a/aom_dsp/mips/inv_txfm_msa.h
+++ b/aom_dsp/mips/inv_txfm_msa.h
@@ -197,18 +197,18 @@
out2, out3) \
{ \
v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
- v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v4i32 tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd; \
\
ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m); \
ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m); \
DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, cst0, cst0, cst1, \
- cst1, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
- SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
- PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1); \
+ cst1, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \
+ SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \
+ PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out0, out1); \
DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, cst2, cst2, cst3, \
- cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
- SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
- PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out2, out3); \
+ cst3, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \
+ SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \
+ PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out2, out3); \
}
/* idct 8x8 macro */
diff --git a/aom_dsp/mips/loopfilter_msa.h b/aom_dsp/mips/loopfilter_msa.h
index c1cabc2..4505942 100644
--- a/aom_dsp/mips/loopfilter_msa.h
+++ b/aom_dsp/mips/loopfilter_msa.h
@@ -123,35 +123,35 @@
p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
}
-#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
- { \
- v16u8 tmp, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
- v16u8 zero_in = { 0 }; \
- \
- tmp = __msa_ori_b(zero_in, 1); \
- p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
- q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
- p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
- q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
- \
- p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
- flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
- p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
- flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
- \
- flat_out = (tmp < (v16u8)flat_out); \
- flat_out = __msa_xori_b(flat_out, 0xff); \
- flat_out = flat_out & (mask); \
+#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
+ { \
+ v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
+ v16u8 zero_in = { 0 }; \
+ \
+ tmp_flat4 = __msa_ori_b(zero_in, 1); \
+ p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
+ q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
+ p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
+ q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
+ \
+ p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
+ flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
+ p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
+ flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
+ \
+ flat_out = (tmp_flat4 < (v16u8)flat_out); \
+ flat_out = __msa_xori_b(flat_out, 0xff); \
+ flat_out = flat_out & (mask); \
}
#define AOM_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
q6_in, q7_in, flat_in, flat2_out) \
{ \
- v16u8 tmp, zero_in = { 0 }; \
+ v16u8 tmp_flat5, zero_in = { 0 }; \
v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \
v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \
\
- tmp = __msa_ori_b(zero_in, 1); \
+ tmp_flat5 = __msa_ori_b(zero_in, 1); \
p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \
q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \
p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \
@@ -169,7 +169,7 @@
p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \
flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \
\
- flat2_out = (tmp < (v16u8)flat2_out); \
+ flat2_out = (tmp_flat5 < (v16u8)flat2_out); \
flat2_out = __msa_xori_b(flat2_out, 0xff); \
flat2_out = flat2_out & flat_in; \
}
@@ -178,38 +178,38 @@
p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
q1_filt8_out, q2_filt8_out) \
{ \
- v8u16 tmp0, tmp1, tmp2; \
+ v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2; \
\
- tmp2 = p2_in + p1_in + p0_in; \
- tmp0 = p3_in << 1; \
+ tmp_filt8_2 = p2_in + p1_in + p0_in; \
+ tmp_filt8_0 = p3_in << 1; \
\
- tmp0 = tmp0 + tmp2 + q0_in; \
- tmp1 = tmp0 + p3_in + p2_in; \
- p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in; \
+ tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in; \
+ p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp1 = tmp0 + p1_in + q1_in; \
- p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in; \
+ p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp1 = q2_in + q1_in + q0_in; \
- tmp2 = tmp2 + tmp1; \
- tmp0 = tmp2 + (p0_in); \
- tmp0 = tmp0 + (p3_in); \
- p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp0, 3); \
+ tmp_filt8_1 = q2_in + q1_in + q0_in; \
+ tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1; \
+ tmp_filt8_0 = tmp_filt8_2 + (p0_in); \
+ tmp_filt8_0 = tmp_filt8_0 + (p3_in); \
+ p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3); \
\
- tmp0 = q2_in + q3_in; \
- tmp0 = p0_in + tmp1 + tmp0; \
- tmp1 = q3_in + q3_in; \
- tmp1 = tmp1 + tmp0; \
- q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_0 = q2_in + q3_in; \
+ tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0; \
+ tmp_filt8_1 = q3_in + q3_in; \
+ tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0; \
+ q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp0 = tmp2 + q3_in; \
- tmp1 = tmp0 + q0_in; \
- q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_0 = tmp_filt8_2 + q3_in; \
+ tmp_filt8_1 = tmp_filt8_0 + q0_in; \
+ q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp1 = tmp0 - p2_in; \
- tmp0 = q1_in + q3_in; \
- tmp1 = tmp0 + tmp1; \
- q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_1 = tmp_filt8_0 - p2_in; \
+ tmp_filt8_0 = q1_in + q3_in; \
+ tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1; \
+ q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
}
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
diff --git a/aom_dsp/mips/macros_msa.h b/aom_dsp/mips/macros_msa.h
index 7d0ba4b..48fbcfd 100644
--- a/aom_dsp/mips/macros_msa.h
+++ b/aom_dsp/mips/macros_msa.h
@@ -169,20 +169,20 @@
val_m; \
})
#else // !(__mips == 64)
-#define LD(psrc) \
- ({ \
- const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
- uint32_t val0_m, val1_m; \
- uint64_t val_m = 0; \
- \
- val0_m = LW(psrc_m1); \
- val1_m = LW(psrc_m1 + 4); \
- \
- val_m = (uint64_t)(val1_m); \
- val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
- val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
- \
- val_m; \
+#define LD(psrc) \
+ ({ \
+ const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m_combined = 0; \
+ \
+ val0_m = LW(psrc_m1); \
+ val1_m = LW(psrc_m1 + 4); \
+ \
+ val_m_combined = (uint64_t)(val1_m); \
+ val_m_combined = (uint64_t)((val_m_combined << 32) & 0xFFFFFFFF00000000); \
+ val_m_combined = (uint64_t)(val_m_combined | (uint64_t)val0_m); \
+ \
+ val_m_combined; \
})
#endif // (__mips == 64)
@@ -2020,13 +2020,12 @@
pdst, stride) \
{ \
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
\
tmp0_m = PCKEV_XORI128_UB(in0, in1); \
tmp1_m = PCKEV_XORI128_UB(in2, in3); \
ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
- ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
+ ST8x4_UB(tmp0_m, tmp1_m, pdst, stride); \
}
/* Description : Pack even byte elements and store byte vector in destination
diff --git a/aom_dsp/x86/inv_txfm_sse2.c b/aom_dsp/x86/inv_txfm_sse2.c
index 61f548a..4735d97 100644
--- a/aom_dsp/x86/inv_txfm_sse2.c
+++ b/aom_dsp/x86/inv_txfm_sse2.c
@@ -2372,7 +2372,6 @@
#define IDCT32_34 \
/* Stage1 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \
const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \
\
@@ -2397,7 +2396,6 @@
\
/* Stage2 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \
const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \
\
@@ -2424,7 +2422,6 @@
\
/* Stage3 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \
const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \
\
@@ -2465,7 +2462,6 @@
\
/* Stage4 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \
const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \
\
@@ -3002,6 +2998,7 @@
// Only upper-left 8x8 has non-zero coeff
void aom_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride) {
+ const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -3107,7 +3104,6 @@
col[31] = _mm_sub_epi16(stp1_0, stp1_31);
for (i = 0; i < 4; i++) {
int j;
- const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block
array_transpose_8x8(col + i * 8, in);
IDCT32_34
diff --git a/aomdec.c b/aomdec.c
index d9f229d..e88c81f 100644
--- a/aomdec.c
+++ b/aomdec.c
@@ -893,7 +893,7 @@
if (single_file) {
if (use_y4m) {
- char buf[Y4M_BUFFER_SIZE] = { 0 };
+ char y4m_buf[Y4M_BUFFER_SIZE] = { 0 };
size_t len = 0;
if (img->fmt == AOM_IMG_FMT_I440 || img->fmt == AOM_IMG_FMT_I44016) {
fprintf(stderr, "Cannot produce y4m output for 440 sampling.\n");
@@ -902,21 +902,22 @@
if (frame_out == 1) {
// Y4M file header
len = y4m_write_file_header(
- buf, sizeof(buf), aom_input_ctx.width, aom_input_ctx.height,
- &aom_input_ctx.framerate, img->fmt, img->bit_depth);
+ y4m_buf, sizeof(y4m_buf), aom_input_ctx.width,
+ aom_input_ctx.height, &aom_input_ctx.framerate, img->fmt,
+ img->bit_depth);
if (do_md5) {
- MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len);
+ MD5Update(&md5_ctx, (md5byte *)y4m_buf, (unsigned int)len);
} else {
- fputs(buf, outfile);
+ fputs(y4m_buf, outfile);
}
}
// Y4M frame header
- len = y4m_write_frame_header(buf, sizeof(buf));
+ len = y4m_write_frame_header(y4m_buf, sizeof(y4m_buf));
if (do_md5) {
- MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len);
+ MD5Update(&md5_ctx, (md5byte *)y4m_buf, (unsigned int)len);
} else {
- fputs(buf, outfile);
+ fputs(y4m_buf, outfile);
}
} else {
if (frame_out == 1) {
diff --git a/aomenc.c b/aomenc.c
index 63ef753..3f9a87d 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -1753,13 +1753,11 @@
/* Get the internal reference frame */
if (strcmp(codec->name, "vp8") == 0) {
struct aom_ref_frame ref_enc, ref_dec;
- int width, height;
-
- width = (stream->config.cfg.g_w + 15) & ~15;
- height = (stream->config.cfg.g_h + 15) & ~15;
- aom_img_alloc(&ref_enc.img, AOM_IMG_FMT_I420, width, height, 1);
+ const unsigned int frame_width = (stream->config.cfg.g_w + 15) & ~15;
+ const unsigned int frame_height = (stream->config.cfg.g_h + 15) & ~15;
+ aom_img_alloc(&ref_enc.img, AOM_IMG_FMT_I420, frame_width, frame_height, 1);
enc_img = ref_enc.img;
- aom_img_alloc(&ref_dec.img, AOM_IMG_FMT_I420, width, height, 1);
+ aom_img_alloc(&ref_dec.img, AOM_IMG_FMT_I420, frame_width, frame_height, 1);
dec_img = ref_dec.img;
ref_enc.frame_type = AOM_LAST_FRAME;
@@ -2131,10 +2129,10 @@
} else {
const int64_t input_pos = ftello(input.file);
const int64_t input_pos_lagged = input_pos - lagged_count;
- const int64_t limit = input.length;
+ const int64_t input_limit = input.length;
rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0;
- remaining = limit - input_pos + lagged_count;
+ remaining = input_limit - input_pos + lagged_count;
}
average_rate =
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index d815ca7..fae7d04 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -871,7 +871,6 @@
index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - 1);
if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
- int i, j;
#ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA
uint8_t marker_test = 0xc0;
int mag_test = 2; // 1 - 4
@@ -890,6 +889,7 @@
*x++ = marker;
for (i = 0; i < ctx->pending_frame_count - 1; i++) {
unsigned int this_sz;
+ int j;
assert(ctx->pending_frame_sizes[i] > 0);
this_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1;
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 7b9d21c..7da80f0 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -580,7 +580,7 @@
// Initialize the decoder workers on the first frame.
if (ctx->frame_workers == NULL) {
- const aom_codec_err_t res = init_decoder(ctx);
+ res = init_decoder(ctx);
if (res != AOM_CODEC_OK) return res;
}
@@ -646,7 +646,6 @@
for (i = 0; i < frame_count; ++i) {
const uint8_t *data_start_copy = data_start;
const uint32_t frame_size = frame_sizes[i];
- aom_codec_err_t res;
if (data_start < data ||
frame_size > (uint32_t)(data_end - data_start)) {
set_error_detail(ctx, "Invalid frame size in index");
@@ -662,8 +661,7 @@
} else {
while (data_start < data_end) {
const uint32_t frame_size = (uint32_t)(data_end - data_start);
- const aom_codec_err_t res =
- decode_one(ctx, &data_start, frame_size, user_priv, deadline);
+ res = decode_one(ctx, &data_start, frame_size, user_priv, deadline);
if (res != AOM_CODEC_OK) return res;
// Account for suboptimal termination by the encoder.
diff --git a/av1/common/arm/neon/iht4x4_add_neon.c b/av1/common/arm/neon/iht4x4_add_neon.c
index baa786f..02572d4 100644
--- a/av1/common/arm/neon/iht4x4_add_neon.c
+++ b/av1/common/arm/neon/iht4x4_add_neon.c
@@ -12,18 +12,11 @@
#include <arm_neon.h>
#include <assert.h>
-#include "./av1_rtcd.h"
#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_dsp/txfm_common.h"
#include "av1/common/common.h"
-static int16_t sinpi_1_9 = 0x14a3;
-static int16_t sinpi_2_9 = 0x26c9;
-static int16_t sinpi_3_9 = 0x3441;
-static int16_t sinpi_4_9 = 0x3b6c;
-static int16_t cospi_8_64 = 0x3b21;
-static int16_t cospi_16_64 = 0x2d41;
-static int16_t cospi_24_64 = 0x187e;
-
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
int32x4_t q8s32, q9s32;
int16x4x2_t d0x2s16, d1x2s16;
@@ -43,18 +36,18 @@
static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16) {
- *d0s16 = vdup_n_s16(cospi_8_64);
- *d1s16 = vdup_n_s16(cospi_16_64);
- *d2s16 = vdup_n_s16(cospi_24_64);
+ *d0s16 = vdup_n_s16((int16_t)cospi_8_64);
+ *d1s16 = vdup_n_s16((int16_t)cospi_16_64);
+ *d2s16 = vdup_n_s16((int16_t)cospi_24_64);
return;
}
static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16) {
- *d3s16 = vdup_n_s16(sinpi_1_9);
- *d4s16 = vdup_n_s16(sinpi_2_9);
- *q3s16 = vdupq_n_s16(sinpi_3_9);
- *d5s16 = vdup_n_s16(sinpi_4_9);
+ *d3s16 = vdup_n_s16((int16_t)sinpi_1_9);
+ *d4s16 = vdup_n_s16((int16_t)sinpi_2_9);
+ *q3s16 = vdupq_n_s16((int16_t)sinpi_3_9);
+ *d5s16 = vdup_n_s16((int16_t)sinpi_4_9);
return;
}
@@ -121,7 +114,7 @@
q10s32 = vaddq_s32(q10s32, q13s32);
q10s32 = vaddq_s32(q10s32, q8s32);
q11s32 = vsubq_s32(q11s32, q14s32);
- q8s32 = vdupq_n_s32(sinpi_3_9);
+ q8s32 = vdupq_n_s32((int32_t)sinpi_3_9);
q11s32 = vsubq_s32(q11s32, q9s32);
q15s32 = vmulq_s32(q15s32, q8s32);
diff --git a/av1/common/arm/neon/iht8x8_add_neon.c b/av1/common/arm/neon/iht8x8_add_neon.c
index 15deabe..86798cc 100644
--- a/av1/common/arm/neon/iht8x8_add_neon.c
+++ b/av1/common/arm/neon/iht8x8_add_neon.c
@@ -12,26 +12,11 @@
#include <arm_neon.h>
#include <assert.h>
-#include "./av1_rtcd.h"
#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_dsp/txfm_common.h"
#include "av1/common/common.h"
-static int16_t cospi_2_64 = 16305;
-static int16_t cospi_4_64 = 16069;
-static int16_t cospi_6_64 = 15679;
-static int16_t cospi_8_64 = 15137;
-static int16_t cospi_10_64 = 14449;
-static int16_t cospi_12_64 = 13623;
-static int16_t cospi_14_64 = 12665;
-static int16_t cospi_16_64 = 11585;
-static int16_t cospi_18_64 = 10394;
-static int16_t cospi_20_64 = 9102;
-static int16_t cospi_22_64 = 7723;
-static int16_t cospi_24_64 = 6270;
-static int16_t cospi_26_64 = 4756;
-static int16_t cospi_28_64 = 3196;
-static int16_t cospi_30_64 = 1606;
-
static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q11s16,
int16x8_t *q12s16, int16x8_t *q13s16,
@@ -108,10 +93,10 @@
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_4_64);
+ d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+ d3s16 = vdup_n_s16((int16_t)cospi_20_64);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
@@ -164,7 +149,7 @@
q6s16 = vcombine_s16(d12s16, d13s16);
q7s16 = vcombine_s16(d14s16, d15s16);
- d0s16 = vdup_n_s16(cospi_16_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_16_64);
q2s32 = vmull_s16(d16s16, d0s16);
q3s32 = vmull_s16(d17s16, d0s16);
@@ -176,8 +161,8 @@
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
- d0s16 = vdup_n_s16(cospi_24_64);
- d1s16 = vdup_n_s16(cospi_8_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_24_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_8_64);
d18s16 = vqrshrn_n_s32(q2s32, 14);
d19s16 = vqrshrn_n_s32(q3s32, 14);
@@ -217,7 +202,7 @@
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
@@ -276,16 +261,16 @@
d30s16 = vget_low_s16(*q15s16);
d31s16 = vget_high_s16(*q15s16);
- d14s16 = vdup_n_s16(cospi_2_64);
- d15s16 = vdup_n_s16(cospi_30_64);
+ d14s16 = vdup_n_s16((int16_t)cospi_2_64);
+ d15s16 = vdup_n_s16((int16_t)cospi_30_64);
q1s32 = vmull_s16(d30s16, d14s16);
q2s32 = vmull_s16(d31s16, d14s16);
q3s32 = vmull_s16(d30s16, d15s16);
q4s32 = vmull_s16(d31s16, d15s16);
- d30s16 = vdup_n_s16(cospi_18_64);
- d31s16 = vdup_n_s16(cospi_14_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_18_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_14_64);
q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
@@ -324,15 +309,15 @@
d7s16 = vqrshrn_n_s32(q4s32, 14);
*q12s16 = vcombine_s16(d24s16, d25s16);
- d0s16 = vdup_n_s16(cospi_10_64);
- d1s16 = vdup_n_s16(cospi_22_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_10_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_22_64);
q4s32 = vmull_s16(d26s16, d0s16);
q5s32 = vmull_s16(d27s16, d0s16);
q2s32 = vmull_s16(d26s16, d1s16);
q6s32 = vmull_s16(d27s16, d1s16);
- d30s16 = vdup_n_s16(cospi_26_64);
- d31s16 = vdup_n_s16(cospi_6_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_26_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_6_64);
q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
@@ -367,8 +352,8 @@
q4s32 = vsubq_s32(q4s32, q0s32);
q5s32 = vsubq_s32(q5s32, q13s32);
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_24_64);
d18s16 = vqrshrn_n_s32(q9s32, 14);
d19s16 = vqrshrn_n_s32(q10s32, 14);
@@ -423,7 +408,7 @@
d15s16 = vqrshrn_n_s32(q0s32, 14);
*q14s16 = vcombine_s16(d28s16, d29s16);
- d30s16 = vdup_n_s16(cospi_16_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_16_64);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
diff --git a/av1/common/av1_fwd_txfm.c b/av1/common/av1_fwd_txfm.c
index 94bd043..84a3876 100644
--- a/av1/common/av1_fwd_txfm.c
+++ b/av1/common/av1_fwd_txfm.c
@@ -9,8 +9,9 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./av1_rtcd.h"
#include "av1/common/av1_fwd_txfm.h"
+#include <assert.h>
+#include "./av1_rtcd.h"
void av1_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
@@ -22,36 +23,37 @@
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[4 * 4];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
+ const tran_low_t *in_low = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
- tran_high_t input[4]; // canbe16
+ tran_high_t in_high[4]; // canbe16
tran_high_t step[4]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 4; ++i) {
// Load inputs.
if (0 == pass) {
- input[0] = in_pass0[0 * stride] * 16;
- input[1] = in_pass0[1 * stride] * 16;
- input[2] = in_pass0[2 * stride] * 16;
- input[3] = in_pass0[3 * stride] * 16;
- if (i == 0 && input[0]) {
- input[0] += 1;
+ in_high[0] = input[0 * stride] * 16;
+ in_high[1] = input[1 * stride] * 16;
+ in_high[2] = input[2 * stride] * 16;
+ in_high[3] = input[3 * stride] * 16;
+ if (i == 0 && in_high[0]) {
+ in_high[0] += 1;
}
} else {
- input[0] = in[0 * 4];
- input[1] = in[1 * 4];
- input[2] = in[2 * 4];
- input[3] = in[3 * 4];
+ assert(in_low != NULL);
+ in_high[0] = in_low[0 * 4];
+ in_high[1] = in_low[1 * 4];
+ in_high[2] = in_low[2 * 4];
+ in_high[3] = in_low[3 * 4];
+ in_low++;
}
// Transform.
- step[0] = input[0] + input[3];
- step[1] = input[1] + input[2];
- step[2] = input[1] - input[2];
- step[3] = input[0] - input[3];
+ step[0] = in_high[0] + in_high[3];
+ step[1] = in_high[1] + in_high[2];
+ step[2] = in_high[1] - in_high[2];
+ step[3] = in_high[0] - in_high[3];
temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64;
out[0] = (tran_low_t)fdct_round_shift(temp1);
@@ -61,12 +63,11 @@
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[3] = (tran_low_t)fdct_round_shift(temp2);
// Do next column (which is a transposed row in second/horizontal pass)
- in_pass0++;
- in++;
+ input++;
out += 4;
}
- // Setup in/out for next pass.
- in = intermediate;
+ // Setup in_low/out for next pass.
+ in_low = intermediate;
out = output;
}
@@ -101,7 +102,6 @@
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
- int i;
for (i = 0; i < 8; i++) {
// stage 1
if (pass == 0) {
@@ -193,56 +193,57 @@
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[256];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
+ const tran_low_t *in_low = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
- tran_high_t input[8]; // canbe16
+ tran_high_t in_high[8]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 16; i++) {
if (0 == pass) {
// Calculate input for the first 8 results.
- input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
- input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
- input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
- input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
- input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
- input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
- input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
- input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+ in_high[0] = (input[0 * stride] + input[15 * stride]) * 4;
+ in_high[1] = (input[1 * stride] + input[14 * stride]) * 4;
+ in_high[2] = (input[2 * stride] + input[13 * stride]) * 4;
+ in_high[3] = (input[3 * stride] + input[12 * stride]) * 4;
+ in_high[4] = (input[4 * stride] + input[11 * stride]) * 4;
+ in_high[5] = (input[5 * stride] + input[10 * stride]) * 4;
+ in_high[6] = (input[6 * stride] + input[9 * stride]) * 4;
+ in_high[7] = (input[7 * stride] + input[8 * stride]) * 4;
// Calculate input for the next 8 results.
- step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
- step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
- step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
- step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
- step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
- step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
- step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
- step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+ step1[0] = (input[7 * stride] - input[8 * stride]) * 4;
+ step1[1] = (input[6 * stride] - input[9 * stride]) * 4;
+ step1[2] = (input[5 * stride] - input[10 * stride]) * 4;
+ step1[3] = (input[4 * stride] - input[11 * stride]) * 4;
+ step1[4] = (input[3 * stride] - input[12 * stride]) * 4;
+ step1[5] = (input[2 * stride] - input[13 * stride]) * 4;
+ step1[6] = (input[1 * stride] - input[14 * stride]) * 4;
+ step1[7] = (input[0 * stride] - input[15 * stride]) * 4;
} else {
// Calculate input for the first 8 results.
- input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
- input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
- input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
- input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
- input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
- input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
- input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
- input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+ assert(in_low != NULL);
+ in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2);
+ in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2);
+ in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2);
+ in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2);
+ in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2);
+ in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2);
+ in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2);
+ in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2);
// Calculate input for the next 8 results.
- step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
- step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
- step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
- step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
- step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
- step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
- step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
- step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+ step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2);
+ step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2);
+ step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2);
+ step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2);
+ step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2);
+ step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2);
+ step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2);
+ step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2);
+ in_low++;
}
// Work on the first eight values; fdct8(input, even_results);
{
@@ -251,14 +252,14 @@
tran_high_t x0, x1, x2, x3; // canbe16
// stage 1
- s0 = input[0] + input[7];
- s1 = input[1] + input[6];
- s2 = input[2] + input[5];
- s3 = input[3] + input[4];
- s4 = input[3] - input[4];
- s5 = input[2] - input[5];
- s6 = input[1] - input[6];
- s7 = input[0] - input[7];
+ s0 = in_high[0] + in_high[7];
+ s1 = in_high[1] + in_high[6];
+ s2 = in_high[2] + in_high[5];
+ s3 = in_high[3] + in_high[4];
+ s4 = in_high[3] - in_high[4];
+ s5 = in_high[2] - in_high[5];
+ s6 = in_high[1] - in_high[6];
+ s7 = in_high[0] - in_high[7];
// fdct4(step, step);
x0 = s0 + s3;
@@ -353,12 +354,11 @@
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
// Do next column (which is a transposed row in second/horizontal pass)
- in++;
- in_pass0++;
+ input++;
out += 16;
}
// Setup in/out for next pass.
- in = intermediate;
+ in_low = intermediate;
out = output;
}
}
diff --git a/av1/common/av1_fwd_txfm1d.c b/av1/common/av1_fwd_txfm1d.c
index 4c695ae..3e9d5ec 100644
--- a/av1/common/av1_fwd_txfm1d.c
+++ b/av1/common/av1_fwd_txfm1d.c
@@ -40,6 +40,7 @@
}
#endif
+// TODO(angiebird): Make 1-d txfm functions static
void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
const int8_t *stage_range) {
const int32_t size = 4;
@@ -1528,3 +1529,798 @@
bf1[31] = -bf0[1];
range_check(stage, input, bf1, size, stage_range[stage]);
}
+
+#if CONFIG_TX64X64
+void av1_fdct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 64;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[64];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf1 = output;
+ bf1[0] = input[0] + input[63];
+ bf1[1] = input[1] + input[62];
+ bf1[2] = input[2] + input[61];
+ bf1[3] = input[3] + input[60];
+ bf1[4] = input[4] + input[59];
+ bf1[5] = input[5] + input[58];
+ bf1[6] = input[6] + input[57];
+ bf1[7] = input[7] + input[56];
+ bf1[8] = input[8] + input[55];
+ bf1[9] = input[9] + input[54];
+ bf1[10] = input[10] + input[53];
+ bf1[11] = input[11] + input[52];
+ bf1[12] = input[12] + input[51];
+ bf1[13] = input[13] + input[50];
+ bf1[14] = input[14] + input[49];
+ bf1[15] = input[15] + input[48];
+ bf1[16] = input[16] + input[47];
+ bf1[17] = input[17] + input[46];
+ bf1[18] = input[18] + input[45];
+ bf1[19] = input[19] + input[44];
+ bf1[20] = input[20] + input[43];
+ bf1[21] = input[21] + input[42];
+ bf1[22] = input[22] + input[41];
+ bf1[23] = input[23] + input[40];
+ bf1[24] = input[24] + input[39];
+ bf1[25] = input[25] + input[38];
+ bf1[26] = input[26] + input[37];
+ bf1[27] = input[27] + input[36];
+ bf1[28] = input[28] + input[35];
+ bf1[29] = input[29] + input[34];
+ bf1[30] = input[30] + input[33];
+ bf1[31] = input[31] + input[32];
+ bf1[32] = -input[32] + input[31];
+ bf1[33] = -input[33] + input[30];
+ bf1[34] = -input[34] + input[29];
+ bf1[35] = -input[35] + input[28];
+ bf1[36] = -input[36] + input[27];
+ bf1[37] = -input[37] + input[26];
+ bf1[38] = -input[38] + input[25];
+ bf1[39] = -input[39] + input[24];
+ bf1[40] = -input[40] + input[23];
+ bf1[41] = -input[41] + input[22];
+ bf1[42] = -input[42] + input[21];
+ bf1[43] = -input[43] + input[20];
+ bf1[44] = -input[44] + input[19];
+ bf1[45] = -input[45] + input[18];
+ bf1[46] = -input[46] + input[17];
+ bf1[47] = -input[47] + input[16];
+ bf1[48] = -input[48] + input[15];
+ bf1[49] = -input[49] + input[14];
+ bf1[50] = -input[50] + input[13];
+ bf1[51] = -input[51] + input[12];
+ bf1[52] = -input[52] + input[11];
+ bf1[53] = -input[53] + input[10];
+ bf1[54] = -input[54] + input[9];
+ bf1[55] = -input[55] + input[8];
+ bf1[56] = -input[56] + input[7];
+ bf1[57] = -input[57] + input[6];
+ bf1[58] = -input[58] + input[5];
+ bf1[59] = -input[59] + input[4];
+ bf1[60] = -input[60] + input[3];
+ bf1[61] = -input[61] + input[2];
+ bf1[62] = -input[62] + input[1];
+ bf1[63] = -input[63] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = -bf0[16] + bf0[15];
+ bf1[17] = -bf0[17] + bf0[14];
+ bf1[18] = -bf0[18] + bf0[13];
+ bf1[19] = -bf0[19] + bf0[12];
+ bf1[20] = -bf0[20] + bf0[11];
+ bf1[21] = -bf0[21] + bf0[10];
+ bf1[22] = -bf0[22] + bf0[9];
+ bf1[23] = -bf0[23] + bf0[8];
+ bf1[24] = -bf0[24] + bf0[7];
+ bf1[25] = -bf0[25] + bf0[6];
+ bf1[26] = -bf0[26] + bf0[5];
+ bf1[27] = -bf0[27] + bf0[4];
+ bf1[28] = -bf0[28] + bf0[3];
+ bf1[29] = -bf0[29] + bf0[2];
+ bf1[30] = -bf0[30] + bf0[1];
+ bf1[31] = -bf0[31] + bf0[0];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = bf0[37];
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = bf0[58];
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = -bf0[8] + bf0[7];
+ bf1[9] = -bf0[9] + bf0[6];
+ bf1[10] = -bf0[10] + bf0[5];
+ bf1[11] = -bf0[11] + bf0[4];
+ bf1[12] = -bf0[12] + bf0[3];
+ bf1[13] = -bf0[13] + bf0[2];
+ bf1[14] = -bf0[14] + bf0[1];
+ bf1[15] = -bf0[15] + bf0[0];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[47];
+ bf1[33] = bf0[33] + bf0[46];
+ bf1[34] = bf0[34] + bf0[45];
+ bf1[35] = bf0[35] + bf0[44];
+ bf1[36] = bf0[36] + bf0[43];
+ bf1[37] = bf0[37] + bf0[42];
+ bf1[38] = bf0[38] + bf0[41];
+ bf1[39] = bf0[39] + bf0[40];
+ bf1[40] = -bf0[40] + bf0[39];
+ bf1[41] = -bf0[41] + bf0[38];
+ bf1[42] = -bf0[42] + bf0[37];
+ bf1[43] = -bf0[43] + bf0[36];
+ bf1[44] = -bf0[44] + bf0[35];
+ bf1[45] = -bf0[45] + bf0[34];
+ bf1[46] = -bf0[46] + bf0[33];
+ bf1[47] = -bf0[47] + bf0[32];
+ bf1[48] = -bf0[48] + bf0[63];
+ bf1[49] = -bf0[49] + bf0[62];
+ bf1[50] = -bf0[50] + bf0[61];
+ bf1[51] = -bf0[51] + bf0[60];
+ bf1[52] = -bf0[52] + bf0[59];
+ bf1[53] = -bf0[53] + bf0[58];
+ bf1[54] = -bf0[54] + bf0[57];
+ bf1[55] = -bf0[55] + bf0[56];
+ bf1[56] = bf0[56] + bf0[55];
+ bf1[57] = bf0[57] + bf0[54];
+ bf1[58] = bf0[58] + bf0[53];
+ bf1[59] = bf0[59] + bf0[52];
+ bf1[60] = bf0[60] + bf0[51];
+ bf1[61] = bf0[61] + bf0[50];
+ bf1[62] = bf0[62] + bf0[49];
+ bf1[63] = bf0[63] + bf0[48];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = -bf0[20] + bf0[19];
+ bf1[21] = -bf0[21] + bf0[18];
+ bf1[22] = -bf0[22] + bf0[17];
+ bf1[23] = -bf0[23] + bf0[16];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[28] + bf0[27];
+ bf1[29] = bf0[29] + bf0[26];
+ bf1[30] = bf0[30] + bf0[25];
+ bf1[31] = bf0[31] + bf0[24];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[44] = bf0[44];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = bf0[50];
+ bf1[51] = bf0[51];
+ bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[39];
+ bf1[33] = bf0[33] + bf0[38];
+ bf1[34] = bf0[34] + bf0[37];
+ bf1[35] = bf0[35] + bf0[36];
+ bf1[36] = -bf0[36] + bf0[35];
+ bf1[37] = -bf0[37] + bf0[34];
+ bf1[38] = -bf0[38] + bf0[33];
+ bf1[39] = -bf0[39] + bf0[32];
+ bf1[40] = -bf0[40] + bf0[47];
+ bf1[41] = -bf0[41] + bf0[46];
+ bf1[42] = -bf0[42] + bf0[45];
+ bf1[43] = -bf0[43] + bf0[44];
+ bf1[44] = bf0[44] + bf0[43];
+ bf1[45] = bf0[45] + bf0[42];
+ bf1[46] = bf0[46] + bf0[41];
+ bf1[47] = bf0[47] + bf0[40];
+ bf1[48] = bf0[48] + bf0[55];
+ bf1[49] = bf0[49] + bf0[54];
+ bf1[50] = bf0[50] + bf0[53];
+ bf1[51] = bf0[51] + bf0[52];
+ bf1[52] = -bf0[52] + bf0[51];
+ bf1[53] = -bf0[53] + bf0[50];
+ bf1[54] = -bf0[54] + bf0[49];
+ bf1[55] = -bf0[55] + bf0[48];
+ bf1[56] = -bf0[56] + bf0[63];
+ bf1[57] = -bf0[57] + bf0[62];
+ bf1[58] = -bf0[58] + bf0[61];
+ bf1[59] = -bf0[59] + bf0[60];
+ bf1[60] = bf0[60] + bf0[59];
+ bf1[61] = bf0[61] + bf0[58];
+ bf1[62] = bf0[62] + bf0[57];
+ bf1[63] = bf0[63] + bf0[56];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = -bf0[18] + bf0[17];
+ bf1[19] = -bf0[19] + bf0[16];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[22] + bf0[21];
+ bf1[23] = bf0[23] + bf0[20];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = -bf0[26] + bf0[25];
+ bf1[27] = -bf0[27] + bf0[24];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[30] + bf0[29];
+ bf1[31] = bf0[31] + bf0[28];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = bf0[41];
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
+ bf1[54] = bf0[54];
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[35];
+ bf1[33] = bf0[33] + bf0[34];
+ bf1[34] = -bf0[34] + bf0[33];
+ bf1[35] = -bf0[35] + bf0[32];
+ bf1[36] = -bf0[36] + bf0[39];
+ bf1[37] = -bf0[37] + bf0[38];
+ bf1[38] = bf0[38] + bf0[37];
+ bf1[39] = bf0[39] + bf0[36];
+ bf1[40] = bf0[40] + bf0[43];
+ bf1[41] = bf0[41] + bf0[42];
+ bf1[42] = -bf0[42] + bf0[41];
+ bf1[43] = -bf0[43] + bf0[40];
+ bf1[44] = -bf0[44] + bf0[47];
+ bf1[45] = -bf0[45] + bf0[46];
+ bf1[46] = bf0[46] + bf0[45];
+ bf1[47] = bf0[47] + bf0[44];
+ bf1[48] = bf0[48] + bf0[51];
+ bf1[49] = bf0[49] + bf0[50];
+ bf1[50] = -bf0[50] + bf0[49];
+ bf1[51] = -bf0[51] + bf0[48];
+ bf1[52] = -bf0[52] + bf0[55];
+ bf1[53] = -bf0[53] + bf0[54];
+ bf1[54] = bf0[54] + bf0[53];
+ bf1[55] = bf0[55] + bf0[52];
+ bf1[56] = bf0[56] + bf0[59];
+ bf1[57] = bf0[57] + bf0[58];
+ bf1[58] = -bf0[58] + bf0[57];
+ bf1[59] = -bf0[59] + bf0[56];
+ bf1[60] = -bf0[60] + bf0[63];
+ bf1[61] = -bf0[61] + bf0[62];
+ bf1[62] = bf0[62] + bf0[61];
+ bf1[63] = bf0[63] + bf0[60];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = -bf0[17] + bf0[16];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[19] + bf0[18];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = -bf0[21] + bf0[20];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[23] + bf0[22];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = -bf0[25] + bf0[24];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[27] + bf0[26];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = -bf0[29] + bf0[28];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[31] + bf0[30];
+ bf1[32] = bf0[32];
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[43] = bf0[43];
+ bf1[44] = bf0[44];
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[52];
+ bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+ bf1[32] = bf0[32] + bf0[33];
+ bf1[33] = -bf0[33] + bf0[32];
+ bf1[34] = -bf0[34] + bf0[35];
+ bf1[35] = bf0[35] + bf0[34];
+ bf1[36] = bf0[36] + bf0[37];
+ bf1[37] = -bf0[37] + bf0[36];
+ bf1[38] = -bf0[38] + bf0[39];
+ bf1[39] = bf0[39] + bf0[38];
+ bf1[40] = bf0[40] + bf0[41];
+ bf1[41] = -bf0[41] + bf0[40];
+ bf1[42] = -bf0[42] + bf0[43];
+ bf1[43] = bf0[43] + bf0[42];
+ bf1[44] = bf0[44] + bf0[45];
+ bf1[45] = -bf0[45] + bf0[44];
+ bf1[46] = -bf0[46] + bf0[47];
+ bf1[47] = bf0[47] + bf0[46];
+ bf1[48] = bf0[48] + bf0[49];
+ bf1[49] = -bf0[49] + bf0[48];
+ bf1[50] = -bf0[50] + bf0[51];
+ bf1[51] = bf0[51] + bf0[50];
+ bf1[52] = bf0[52] + bf0[53];
+ bf1[53] = -bf0[53] + bf0[52];
+ bf1[54] = -bf0[54] + bf0[55];
+ bf1[55] = bf0[55] + bf0[54];
+ bf1[56] = bf0[56] + bf0[57];
+ bf1[57] = -bf0[57] + bf0[56];
+ bf1[58] = -bf0[58] + bf0[59];
+ bf1[59] = bf0[59] + bf0[58];
+ bf1[60] = bf0[60] + bf0[61];
+ bf1[61] = -bf0[61] + bf0[60];
+ bf1[62] = -bf0[62] + bf0[63];
+ bf1[63] = bf0[63] + bf0[62];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
+ bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
+ bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[32];
+ bf1[2] = bf0[16];
+ bf1[3] = bf0[48];
+ bf1[4] = bf0[8];
+ bf1[5] = bf0[40];
+ bf1[6] = bf0[24];
+ bf1[7] = bf0[56];
+ bf1[8] = bf0[4];
+ bf1[9] = bf0[36];
+ bf1[10] = bf0[20];
+ bf1[11] = bf0[52];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[44];
+ bf1[14] = bf0[28];
+ bf1[15] = bf0[60];
+ bf1[16] = bf0[2];
+ bf1[17] = bf0[34];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[50];
+ bf1[20] = bf0[10];
+ bf1[21] = bf0[42];
+ bf1[22] = bf0[26];
+ bf1[23] = bf0[58];
+ bf1[24] = bf0[6];
+ bf1[25] = bf0[38];
+ bf1[26] = bf0[22];
+ bf1[27] = bf0[54];
+ bf1[28] = bf0[14];
+ bf1[29] = bf0[46];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[62];
+ bf1[32] = bf0[1];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[17];
+ bf1[35] = bf0[49];
+ bf1[36] = bf0[9];
+ bf1[37] = bf0[41];
+ bf1[38] = bf0[25];
+ bf1[39] = bf0[57];
+ bf1[40] = bf0[5];
+ bf1[41] = bf0[37];
+ bf1[42] = bf0[21];
+ bf1[43] = bf0[53];
+ bf1[44] = bf0[13];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[29];
+ bf1[47] = bf0[61];
+ bf1[48] = bf0[3];
+ bf1[49] = bf0[35];
+ bf1[50] = bf0[19];
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[11];
+ bf1[53] = bf0[43];
+ bf1[54] = bf0[27];
+ bf1[55] = bf0[59];
+ bf1[56] = bf0[7];
+ bf1[57] = bf0[39];
+ bf1[58] = bf0[23];
+ bf1[59] = bf0[55];
+ bf1[60] = bf0[15];
+ bf1[61] = bf0[47];
+ bf1[62] = bf0[31];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+#endif // CONFIG_TX64X64
diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c
index 2e9bbcb..40d8403 100644
--- a/av1/common/av1_inv_txfm1d.c
+++ b/av1/common/av1_inv_txfm1d.c
@@ -40,6 +40,7 @@
}
#endif
+// TODO(angiebird): Make 1-d txfm functions static
void av1_idct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
const int8_t *stage_range) {
const int32_t size = 4;
@@ -1535,3 +1536,798 @@
bf1[31] = bf0[0];
range_check(stage, input, bf1, size, stage_range[stage]);
}
+
+#if CONFIG_TX64X64
+void av1_idct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 64;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[64];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[32];
+ bf1[2] = input[16];
+ bf1[3] = input[48];
+ bf1[4] = input[8];
+ bf1[5] = input[40];
+ bf1[6] = input[24];
+ bf1[7] = input[56];
+ bf1[8] = input[4];
+ bf1[9] = input[36];
+ bf1[10] = input[20];
+ bf1[11] = input[52];
+ bf1[12] = input[12];
+ bf1[13] = input[44];
+ bf1[14] = input[28];
+ bf1[15] = input[60];
+ bf1[16] = input[2];
+ bf1[17] = input[34];
+ bf1[18] = input[18];
+ bf1[19] = input[50];
+ bf1[20] = input[10];
+ bf1[21] = input[42];
+ bf1[22] = input[26];
+ bf1[23] = input[58];
+ bf1[24] = input[6];
+ bf1[25] = input[38];
+ bf1[26] = input[22];
+ bf1[27] = input[54];
+ bf1[28] = input[14];
+ bf1[29] = input[46];
+ bf1[30] = input[30];
+ bf1[31] = input[62];
+ bf1[32] = input[1];
+ bf1[33] = input[33];
+ bf1[34] = input[17];
+ bf1[35] = input[49];
+ bf1[36] = input[9];
+ bf1[37] = input[41];
+ bf1[38] = input[25];
+ bf1[39] = input[57];
+ bf1[40] = input[5];
+ bf1[41] = input[37];
+ bf1[42] = input[21];
+ bf1[43] = input[53];
+ bf1[44] = input[13];
+ bf1[45] = input[45];
+ bf1[46] = input[29];
+ bf1[47] = input[61];
+ bf1[48] = input[3];
+ bf1[49] = input[35];
+ bf1[50] = input[19];
+ bf1[51] = input[51];
+ bf1[52] = input[11];
+ bf1[53] = input[43];
+ bf1[54] = input[27];
+ bf1[55] = input[59];
+ bf1[56] = input[7];
+ bf1[57] = input[39];
+ bf1[58] = input[23];
+ bf1[59] = input[55];
+ bf1[60] = input[15];
+ bf1[61] = input[47];
+ bf1[62] = input[31];
+ bf1[63] = input[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit[stage]);
+ bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit[stage]);
+ bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+ bf1[32] = bf0[32] + bf0[33];
+ bf1[33] = bf0[32] - bf0[33];
+ bf1[34] = -bf0[34] + bf0[35];
+ bf1[35] = bf0[34] + bf0[35];
+ bf1[36] = bf0[36] + bf0[37];
+ bf1[37] = bf0[36] - bf0[37];
+ bf1[38] = -bf0[38] + bf0[39];
+ bf1[39] = bf0[38] + bf0[39];
+ bf1[40] = bf0[40] + bf0[41];
+ bf1[41] = bf0[40] - bf0[41];
+ bf1[42] = -bf0[42] + bf0[43];
+ bf1[43] = bf0[42] + bf0[43];
+ bf1[44] = bf0[44] + bf0[45];
+ bf1[45] = bf0[44] - bf0[45];
+ bf1[46] = -bf0[46] + bf0[47];
+ bf1[47] = bf0[46] + bf0[47];
+ bf1[48] = bf0[48] + bf0[49];
+ bf1[49] = bf0[48] - bf0[49];
+ bf1[50] = -bf0[50] + bf0[51];
+ bf1[51] = bf0[50] + bf0[51];
+ bf1[52] = bf0[52] + bf0[53];
+ bf1[53] = bf0[52] - bf0[53];
+ bf1[54] = -bf0[54] + bf0[55];
+ bf1[55] = bf0[54] + bf0[55];
+ bf1[56] = bf0[56] + bf0[57];
+ bf1[57] = bf0[56] - bf0[57];
+ bf1[58] = -bf0[58] + bf0[59];
+ bf1[59] = bf0[58] + bf0[59];
+ bf1[60] = bf0[60] + bf0[61];
+ bf1[61] = bf0[60] - bf0[61];
+ bf1[62] = -bf0[62] + bf0[63];
+ bf1[63] = bf0[62] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = bf0[16] - bf0[17];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[18] + bf0[19];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = bf0[20] - bf0[21];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[22] + bf0[23];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = bf0[24] - bf0[25];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[26] + bf0[27];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = bf0[28] - bf0[29];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[30] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[43] = bf0[43];
+ bf1[44] = bf0[44];
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit[stage]);
+ bf1[51] = bf0[51];
+ bf1[52] = bf0[52];
+ bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit[stage]);
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit[stage]);
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit[stage]);
+ bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit[stage]);
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[35];
+ bf1[33] = bf0[33] + bf0[34];
+ bf1[34] = bf0[33] - bf0[34];
+ bf1[35] = bf0[32] - bf0[35];
+ bf1[36] = -bf0[36] + bf0[39];
+ bf1[37] = -bf0[37] + bf0[38];
+ bf1[38] = bf0[37] + bf0[38];
+ bf1[39] = bf0[36] + bf0[39];
+ bf1[40] = bf0[40] + bf0[43];
+ bf1[41] = bf0[41] + bf0[42];
+ bf1[42] = bf0[41] - bf0[42];
+ bf1[43] = bf0[40] - bf0[43];
+ bf1[44] = -bf0[44] + bf0[47];
+ bf1[45] = -bf0[45] + bf0[46];
+ bf1[46] = bf0[45] + bf0[46];
+ bf1[47] = bf0[44] + bf0[47];
+ bf1[48] = bf0[48] + bf0[51];
+ bf1[49] = bf0[49] + bf0[50];
+ bf1[50] = bf0[49] - bf0[50];
+ bf1[51] = bf0[48] - bf0[51];
+ bf1[52] = -bf0[52] + bf0[55];
+ bf1[53] = -bf0[53] + bf0[54];
+ bf1[54] = bf0[53] + bf0[54];
+ bf1[55] = bf0[52] + bf0[55];
+ bf1[56] = bf0[56] + bf0[59];
+ bf1[57] = bf0[57] + bf0[58];
+ bf1[58] = bf0[57] - bf0[58];
+ bf1[59] = bf0[56] - bf0[59];
+ bf1[60] = -bf0[60] + bf0[63];
+ bf1[61] = -bf0[61] + bf0[62];
+ bf1[62] = bf0[61] + bf0[62];
+ bf1[63] = bf0[60] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = bf0[17] - bf0[18];
+ bf1[19] = bf0[16] - bf0[19];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[21] + bf0[22];
+ bf1[23] = bf0[20] + bf0[23];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = bf0[25] - bf0[26];
+ bf1[27] = bf0[24] - bf0[27];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[29] + bf0[30];
+ bf1[31] = bf0[28] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = bf0[40];
+ bf1[41] = bf0[41];
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit[stage]);
+ bf1[54] = bf0[54];
+ bf1[55] = bf0[55];
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit[stage]);
+ bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit[stage]);
+ bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit[stage]);
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[39];
+ bf1[33] = bf0[33] + bf0[38];
+ bf1[34] = bf0[34] + bf0[37];
+ bf1[35] = bf0[35] + bf0[36];
+ bf1[36] = bf0[35] - bf0[36];
+ bf1[37] = bf0[34] - bf0[37];
+ bf1[38] = bf0[33] - bf0[38];
+ bf1[39] = bf0[32] - bf0[39];
+ bf1[40] = -bf0[40] + bf0[47];
+ bf1[41] = -bf0[41] + bf0[46];
+ bf1[42] = -bf0[42] + bf0[45];
+ bf1[43] = -bf0[43] + bf0[44];
+ bf1[44] = bf0[43] + bf0[44];
+ bf1[45] = bf0[42] + bf0[45];
+ bf1[46] = bf0[41] + bf0[46];
+ bf1[47] = bf0[40] + bf0[47];
+ bf1[48] = bf0[48] + bf0[55];
+ bf1[49] = bf0[49] + bf0[54];
+ bf1[50] = bf0[50] + bf0[53];
+ bf1[51] = bf0[51] + bf0[52];
+ bf1[52] = bf0[51] - bf0[52];
+ bf1[53] = bf0[50] - bf0[53];
+ bf1[54] = bf0[49] - bf0[54];
+ bf1[55] = bf0[48] - bf0[55];
+ bf1[56] = -bf0[56] + bf0[63];
+ bf1[57] = -bf0[57] + bf0[62];
+ bf1[58] = -bf0[58] + bf0[61];
+ bf1[59] = -bf0[59] + bf0[60];
+ bf1[60] = bf0[59] + bf0[60];
+ bf1[61] = bf0[58] + bf0[61];
+ bf1[62] = bf0[57] + bf0[62];
+ bf1[63] = bf0[56] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = bf0[19] - bf0[20];
+ bf1[21] = bf0[18] - bf0[21];
+ bf1[22] = bf0[17] - bf0[22];
+ bf1[23] = bf0[16] - bf0[23];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[27] + bf0[28];
+ bf1[29] = bf0[26] + bf0[29];
+ bf1[30] = bf0[25] + bf0[30];
+ bf1[31] = bf0[24] + bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[44] = bf0[44];
+ bf1[45] = bf0[45];
+ bf1[46] = bf0[46];
+ bf1[47] = bf0[47];
+ bf1[48] = bf0[48];
+ bf1[49] = bf0[49];
+ bf1[50] = bf0[50];
+ bf1[51] = bf0[51];
+ bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit[stage]);
+ bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit[stage]);
+ bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit[stage]);
+ bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit[stage]);
+ bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit[stage]);
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ bf1[32] = bf0[32] + bf0[47];
+ bf1[33] = bf0[33] + bf0[46];
+ bf1[34] = bf0[34] + bf0[45];
+ bf1[35] = bf0[35] + bf0[44];
+ bf1[36] = bf0[36] + bf0[43];
+ bf1[37] = bf0[37] + bf0[42];
+ bf1[38] = bf0[38] + bf0[41];
+ bf1[39] = bf0[39] + bf0[40];
+ bf1[40] = bf0[39] - bf0[40];
+ bf1[41] = bf0[38] - bf0[41];
+ bf1[42] = bf0[37] - bf0[42];
+ bf1[43] = bf0[36] - bf0[43];
+ bf1[44] = bf0[35] - bf0[44];
+ bf1[45] = bf0[34] - bf0[45];
+ bf1[46] = bf0[33] - bf0[46];
+ bf1[47] = bf0[32] - bf0[47];
+ bf1[48] = -bf0[48] + bf0[63];
+ bf1[49] = -bf0[49] + bf0[62];
+ bf1[50] = -bf0[50] + bf0[61];
+ bf1[51] = -bf0[51] + bf0[60];
+ bf1[52] = -bf0[52] + bf0[59];
+ bf1[53] = -bf0[53] + bf0[58];
+ bf1[54] = -bf0[54] + bf0[57];
+ bf1[55] = -bf0[55] + bf0[56];
+ bf1[56] = bf0[55] + bf0[56];
+ bf1[57] = bf0[54] + bf0[57];
+ bf1[58] = bf0[53] + bf0[58];
+ bf1[59] = bf0[52] + bf0[59];
+ bf1[60] = bf0[51] + bf0[60];
+ bf1[61] = bf0[50] + bf0[61];
+ bf1[62] = bf0[49] + bf0[62];
+ bf1[63] = bf0[48] + bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = bf0[15] - bf0[16];
+ bf1[17] = bf0[14] - bf0[17];
+ bf1[18] = bf0[13] - bf0[18];
+ bf1[19] = bf0[12] - bf0[19];
+ bf1[20] = bf0[11] - bf0[20];
+ bf1[21] = bf0[10] - bf0[21];
+ bf1[22] = bf0[9] - bf0[22];
+ bf1[23] = bf0[8] - bf0[23];
+ bf1[24] = bf0[7] - bf0[24];
+ bf1[25] = bf0[6] - bf0[25];
+ bf1[26] = bf0[5] - bf0[26];
+ bf1[27] = bf0[4] - bf0[27];
+ bf1[28] = bf0[3] - bf0[28];
+ bf1[29] = bf0[2] - bf0[29];
+ bf1[30] = bf0[1] - bf0[30];
+ bf1[31] = bf0[0] - bf0[31];
+ bf1[32] = bf0[32];
+ bf1[33] = bf0[33];
+ bf1[34] = bf0[34];
+ bf1[35] = bf0[35];
+ bf1[36] = bf0[36];
+ bf1[37] = bf0[37];
+ bf1[38] = bf0[38];
+ bf1[39] = bf0[39];
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+ bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+ bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+ bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+ bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+ bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+ bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+ bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[56] = bf0[56];
+ bf1[57] = bf0[57];
+ bf1[58] = bf0[58];
+ bf1[59] = bf0[59];
+ bf1[60] = bf0[60];
+ bf1[61] = bf0[61];
+ bf1[62] = bf0[62];
+ bf1[63] = bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[63];
+ bf1[1] = bf0[1] + bf0[62];
+ bf1[2] = bf0[2] + bf0[61];
+ bf1[3] = bf0[3] + bf0[60];
+ bf1[4] = bf0[4] + bf0[59];
+ bf1[5] = bf0[5] + bf0[58];
+ bf1[6] = bf0[6] + bf0[57];
+ bf1[7] = bf0[7] + bf0[56];
+ bf1[8] = bf0[8] + bf0[55];
+ bf1[9] = bf0[9] + bf0[54];
+ bf1[10] = bf0[10] + bf0[53];
+ bf1[11] = bf0[11] + bf0[52];
+ bf1[12] = bf0[12] + bf0[51];
+ bf1[13] = bf0[13] + bf0[50];
+ bf1[14] = bf0[14] + bf0[49];
+ bf1[15] = bf0[15] + bf0[48];
+ bf1[16] = bf0[16] + bf0[47];
+ bf1[17] = bf0[17] + bf0[46];
+ bf1[18] = bf0[18] + bf0[45];
+ bf1[19] = bf0[19] + bf0[44];
+ bf1[20] = bf0[20] + bf0[43];
+ bf1[21] = bf0[21] + bf0[42];
+ bf1[22] = bf0[22] + bf0[41];
+ bf1[23] = bf0[23] + bf0[40];
+ bf1[24] = bf0[24] + bf0[39];
+ bf1[25] = bf0[25] + bf0[38];
+ bf1[26] = bf0[26] + bf0[37];
+ bf1[27] = bf0[27] + bf0[36];
+ bf1[28] = bf0[28] + bf0[35];
+ bf1[29] = bf0[29] + bf0[34];
+ bf1[30] = bf0[30] + bf0[33];
+ bf1[31] = bf0[31] + bf0[32];
+ bf1[32] = bf0[31] - bf0[32];
+ bf1[33] = bf0[30] - bf0[33];
+ bf1[34] = bf0[29] - bf0[34];
+ bf1[35] = bf0[28] - bf0[35];
+ bf1[36] = bf0[27] - bf0[36];
+ bf1[37] = bf0[26] - bf0[37];
+ bf1[38] = bf0[25] - bf0[38];
+ bf1[39] = bf0[24] - bf0[39];
+ bf1[40] = bf0[23] - bf0[40];
+ bf1[41] = bf0[22] - bf0[41];
+ bf1[42] = bf0[21] - bf0[42];
+ bf1[43] = bf0[20] - bf0[43];
+ bf1[44] = bf0[19] - bf0[44];
+ bf1[45] = bf0[18] - bf0[45];
+ bf1[46] = bf0[17] - bf0[46];
+ bf1[47] = bf0[16] - bf0[47];
+ bf1[48] = bf0[15] - bf0[48];
+ bf1[49] = bf0[14] - bf0[49];
+ bf1[50] = bf0[13] - bf0[50];
+ bf1[51] = bf0[12] - bf0[51];
+ bf1[52] = bf0[11] - bf0[52];
+ bf1[53] = bf0[10] - bf0[53];
+ bf1[54] = bf0[9] - bf0[54];
+ bf1[55] = bf0[8] - bf0[55];
+ bf1[56] = bf0[7] - bf0[56];
+ bf1[57] = bf0[6] - bf0[57];
+ bf1[58] = bf0[5] - bf0[58];
+ bf1[59] = bf0[4] - bf0[59];
+ bf1[60] = bf0[3] - bf0[60];
+ bf1[61] = bf0[2] - bf0[61];
+ bf1[62] = bf0[1] - bf0[62];
+ bf1[63] = bf0[0] - bf0[63];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+#endif // CONFIG_TX64X64
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index b16f512..cc990ea 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -357,7 +357,7 @@
FRAME_CONTEXT *fc;
/* pointers to reference frames */
- RefBuffer *block_refs[2];
+ const RefBuffer *block_refs[2];
/* pointer to current frame */
const YV12_BUFFER_CONFIG *cur_buf;
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 3368ea2..23e0409 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1489,7 +1489,6 @@
#if CONFIG_SUPERTX
for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
- int j;
for (j = 1; j < TX_SIZES; ++j) {
fc->supertx_prob[i][j] = av1_mode_mv_merge_probs(
pre_fc->supertx_prob[i][j], counts->supertx[i][j]);
@@ -1583,7 +1582,6 @@
}
for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
if (use_intra_ext_tx_for_txsize[s][i]) {
- int j;
for (j = 0; j < INTRA_MODES; ++j)
aom_tree_merge_probs(
av1_ext_tx_intra_tree[s], pre_fc->intra_ext_tx_prob[s][i][j],
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index f45f3db..c8022f2 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -889,42 +889,44 @@
break;
default:
for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
- const int shift_y = shift_32_y[idx_32];
- const int shift_uv = shift_32_uv[idx_32];
+ const int shift_y_32 = shift_32_y[idx_32];
+ const int shift_uv_32 = shift_32_uv[idx_32];
const int mi_32_col_offset = ((idx_32 & 1) << 2);
const int mi_32_row_offset = ((idx_32 >> 1) << 2);
if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
continue;
switch (mip[0]->mbmi.sb_type) {
case BLOCK_32X32:
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
break;
- case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ case BLOCK_32X16:
+ build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
#if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break;
#endif
if (mi_32_row_offset + 2 >= max_rows) continue;
mip2 = mip + mode_info_stride * 2;
- build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
+ build_masks(lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4, lfm);
break;
- case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ case BLOCK_16X32:
+ build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
#if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break;
#endif
if (mi_32_col_offset + 2 >= max_cols) continue;
mip2 = mip + 2;
- build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
+ build_masks(lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1, lfm);
break;
default:
#if CONFIG_SUPERTX
if (mip[0]->mbmi.tx_size == TX_32X32) {
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
break;
}
#endif
for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
- const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
- const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
+ const int shift_y_32_16 = shift_y_32 + shift_16_y[idx_16];
+ const int shift_uv_32_16 = shift_uv_32 + shift_16_uv[idx_16];
const int mi_16_col_offset =
mi_32_col_offset + ((idx_16 & 1) << 1);
const int mi_16_row_offset =
@@ -935,16 +937,18 @@
switch (mip[0]->mbmi.sb_type) {
case BLOCK_16X16:
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
+ lfm);
break;
case BLOCK_16X8:
#if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break;
#endif
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
+ lfm);
if (mi_16_row_offset + 1 >= max_rows) continue;
mip2 = mip + mode_info_stride;
- build_y_mask(lfi_n, mip2[0], shift_y + 8,
+ build_y_mask(lfi_n, mip2[0], shift_y_32_16 + 8,
#if CONFIG_SUPERTX
0,
#endif
@@ -954,29 +958,31 @@
#if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break;
#endif
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
+ lfm);
if (mi_16_col_offset + 1 >= max_cols) continue;
mip2 = mip + 1;
- build_y_mask(lfi_n, mip2[0], shift_y + 1,
+ build_y_mask(lfi_n, mip2[0], shift_y_32_16 + 1,
#if CONFIG_SUPERTX
0,
#endif
lfm);
break;
default: {
- const int shift_y =
- shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
+ const int shift_y_32_16_8_zero = shift_y_32_16 + shift_8_y[0];
#if CONFIG_SUPERTX
if (mip[0]->mbmi.tx_size == TX_16X16) {
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32_16_8_zero,
+ shift_uv_32_16, lfm);
break;
}
#endif
- build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ build_masks(lfi_n, mip[0], shift_y_32_16_8_zero,
+ shift_uv_32_16, lfm);
mip += offset[0];
for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
- const int shift_y = shift_32_y[idx_32] +
- shift_16_y[idx_16] + shift_8_y[idx_8];
+ const int shift_y_32_16_8 =
+ shift_y_32_16 + shift_8_y[idx_8];
const int mi_8_col_offset =
mi_16_col_offset + ((idx_8 & 1));
const int mi_8_row_offset =
@@ -985,7 +991,7 @@
if (mi_8_col_offset >= max_cols ||
mi_8_row_offset >= max_rows)
continue;
- build_y_mask(lfi_n, mip[0], shift_y,
+ build_y_mask(lfi_n, mip[0], shift_y_32_16_8,
#if CONFIG_SUPERTX
supertx_enabled(&mip[0]->mbmi),
#endif
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index 4fdeefe..2344bc1 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -763,8 +763,8 @@
*near_mv = mvlist[1];
}
-void av1_append_sub8x8_mvs_for_idx(AV1_COMMON *cm, MACROBLOCKD *xd, int block,
- int ref, int mi_row, int mi_col,
+void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
+ int block, int ref, int mi_row, int mi_col,
#if CONFIG_REF_MV
CANDIDATE_MV *ref_mv_stack,
uint8_t *ref_mv_count,
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index b5e7094..25ebbfd 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h
@@ -465,8 +465,8 @@
void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
int_mv *near_mv);
-void av1_append_sub8x8_mvs_for_idx(AV1_COMMON *cm, MACROBLOCKD *xd, int block,
- int ref, int mi_row, int mi_col,
+void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
+ int block, int ref, int mi_row, int mi_col,
#if CONFIG_REF_MV
CANDIDATE_MV *ref_mv_stack,
uint8_t *ref_mv_count,
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index 8927f26..6b0a3d5 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -20,7 +20,7 @@
extern "C" {
#endif
-static INLINE int get_segment_id(const AV1_COMMON *cm,
+static INLINE int get_segment_id(const AV1_COMMON *const cm,
const uint8_t *segment_ids, BLOCK_SIZE bsize,
int mi_row, int mi_col) {
const int mi_offset = mi_row * cm->mi_cols + mi_col;
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index ae57a43..c0fc494 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -1170,7 +1170,7 @@
// top/left neighboring blocks' inter predictors with the regular inter
// prediction. We assume the original prediction (bmc) is stored in
// xd->plane[].dst.buf
-void av1_build_obmc_inter_prediction(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *above[MAX_MB_PLANE],
int above_stride[MAX_MB_PLANE],
@@ -1281,7 +1281,7 @@
}
#endif // CONFIG_EXT_INTER
-void av1_build_prediction_by_above_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *tmp_buf[MAX_MB_PLANE],
int tmp_width[MAX_MB_PLANE],
@@ -1319,8 +1319,8 @@
pd->subsampling_y);
}
for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
- MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+ const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+ const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
xd->block_refs[ref] = ref_buf;
if ((!av1_is_valid_scale(&ref_buf->sf)))
@@ -1378,7 +1378,7 @@
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
}
-void av1_build_prediction_by_left_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *tmp_buf[MAX_MB_PLANE],
int tmp_width[MAX_MB_PLANE],
@@ -1416,8 +1416,8 @@
pd->subsampling_y);
}
for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
- MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+ const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+ const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
xd->block_refs[ref] = ref_buf;
if ((!av1_is_valid_scale(&ref_buf->sf)))
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 4f86354..bfa7e95 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -517,19 +517,19 @@
#if CONFIG_MOTION_VAR
const uint8_t *av1_get_obmc_mask(int length);
-void av1_build_obmc_inter_prediction(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *above[MAX_MB_PLANE],
int above_stride[MAX_MB_PLANE],
uint8_t *left[MAX_MB_PLANE],
int left_stride[MAX_MB_PLANE]);
-void av1_build_prediction_by_above_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *tmp_buf[MAX_MB_PLANE],
int tmp_width[MAX_MB_PLANE],
int tmp_height[MAX_MB_PLANE],
int tmp_stride[MAX_MB_PLANE]);
-void av1_build_prediction_by_left_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *tmp_buf[MAX_MB_PLANE],
int tmp_width[MAX_MB_PLANE],
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index d6bd87d..483b0b2 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -233,6 +233,14 @@
const int w = AOMMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
const int step = 1 << txsz;
+ // TODO(bshacklett, huisu): Currently the RD loop traverses 4X8 blocks in
+ // inverted N order while in the bitstream the subblocks are stored in Z
+ // order. This discrepancy makes this function incorrect when considering 4X8
+ // blocks in the RD loop, so we disable the extended right edge for these
+ // blocks. The correct solution is to change the bitstream to store these
+ // blocks in inverted N order, and then update this function appropriately.
+ if (bsize == BLOCK_4X8 && y == 1) return 0;
+
if (!right_available) {
return 0;
} else {
@@ -702,7 +710,7 @@
const uint8_t *above,
const uint8_t *left, int mode) {
int k, r, c;
- int pred[33][65];
+ int preds[33][65];
int mean, ipred;
const TX_SIZE tx_size =
(bs == 32) ? TX_32X32
@@ -721,20 +729,20 @@
}
mean = (mean + bs) / (2 * bs);
- for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+ for (r = 0; r < bs; ++r) preds[r + 1][0] = (int)left[r] - mean;
- for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+ for (c = 0; c < 2 * bs + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
for (r = 1; r < bs + 1; ++r)
for (c = 1; c < 2 * bs + 1 - r; ++c) {
- ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
- c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
- pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+ ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
+ c2 * preds[r - 1][c - 1] + c3 * preds[r - 1][c + 1];
+ preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
}
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) {
- ipred = pred[r + 1][c + 1] + mean;
+ ipred = preds[r + 1][c + 1] + mean;
dst[c] = clip_pixel(ipred);
}
dst += stride;
@@ -997,7 +1005,7 @@
const uint16_t *left, int mode,
int bd) {
int k, r, c;
- int pred[33][65];
+ int preds[33][65];
int mean, ipred;
const TX_SIZE tx_size =
(bs == 32) ? TX_32X32
@@ -1016,20 +1024,20 @@
}
mean = (mean + bs) / (2 * bs);
- for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+ for (r = 0; r < bs; ++r) preds[r + 1][0] = (int)left[r] - mean;
- for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+ for (c = 0; c < 2 * bs + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
for (r = 1; r < bs + 1; ++r)
for (c = 1; c < 2 * bs + 1 - r; ++c) {
- ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
- c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
- pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+ ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
+ c2 * preds[r - 1][c - 1] + c3 * preds[r - 1][c + 1];
+ preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
}
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) {
- ipred = pred[r + 1][c + 1] + mean;
+ ipred = preds[r + 1][c + 1] + mean;
dst[c] = clip_pixel_highbd(ipred, bd);
}
dst += stride;
@@ -1188,8 +1196,6 @@
}
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- EXT_INTRA_MODE ext_intra_mode =
- ext_intra_mode_info->ext_intra_mode[plane != 0];
need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
}
@@ -1202,7 +1208,6 @@
assert(n_bottomleft_px >= 0);
if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
- int i;
const int val = (n_left_px == 0) ? base + 1 : base - 1;
for (i = 0; i < bs; ++i) {
aom_memset16(dst, val, bs);
@@ -1351,8 +1356,6 @@
}
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- EXT_INTRA_MODE ext_intra_mode =
- ext_intra_mode_info->ext_intra_mode[plane != 0];
need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
}
@@ -1373,7 +1376,6 @@
assert(n_bottomleft_px >= 0);
if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
- int i;
const int val = (n_left_px == 0) ? 129 : 127;
for (i = 0; i < bs; ++i) {
memset(dst, val, bs);
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 3be4ed2..919fe8c 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -3799,7 +3799,7 @@
};
#endif // CONFIG_EXT_TX
-const scan_order av1_default_scan_orders[TX_SIZES] = {
+const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors },
@@ -3807,7 +3807,7 @@
};
#if CONFIG_EXT_TX
-const scan_order av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{
// TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3894,7 +3894,7 @@
}
};
-const scan_order av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
+const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{
// TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -4135,7 +4135,7 @@
#else // CONFIG_EXT_TX
-const scan_order av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ // TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
diff --git a/av1/common/scan.h b/av1/common/scan.h
index d3032aa..c183ba9 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -28,10 +28,10 @@
const int16_t *scan;
const int16_t *iscan;
const int16_t *neighbors;
-} scan_order;
+} SCAN_ORDER;
-extern const scan_order av1_default_scan_orders[TX_SIZES];
-extern const scan_order av1_intra_scan_orders[TX_SIZES][TX_TYPES];
+extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
+extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES];
static INLINE int get_coef_context(const int16_t *neighbors,
const uint8_t *token_cache, int c) {
@@ -40,21 +40,21 @@
1;
}
-static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size,
+static INLINE const SCAN_ORDER *get_intra_scan(TX_SIZE tx_size,
TX_TYPE tx_type) {
return &av1_intra_scan_orders[tx_size][tx_type];
}
#if CONFIG_EXT_TX
-extern const scan_order av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
+extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
-static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
+static INLINE const SCAN_ORDER *get_inter_scan(TX_SIZE tx_size,
TX_TYPE tx_type) {
return &av1_inter_scan_orders[tx_size][tx_type];
}
#endif // CONFIG_EXT_TX
-static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type,
+static INLINE const SCAN_ORDER *get_scan(TX_SIZE tx_size, TX_TYPE tx_type,
int is_inter) {
#if CONFIG_EXT_TX
return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
diff --git a/av1/common/x86/av1_inv_txfm_sse2.c b/av1/common/x86/av1_inv_txfm_sse2.c
index 4fe709e..365c124 100644
--- a/av1/common/x86/av1_inv_txfm_sse2.c
+++ b/av1/common/x86/av1_inv_txfm_sse2.c
@@ -2388,7 +2388,6 @@
#define IDCT32_34 \
/* Stage1 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \
const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \
\
@@ -2413,7 +2412,6 @@
\
/* Stage2 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \
const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \
\
@@ -2440,7 +2438,6 @@
\
/* Stage3 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \
const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \
\
@@ -2481,7 +2478,6 @@
\
/* Stage4 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \
const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \
\
@@ -3018,6 +3014,7 @@
// Only upper-left 8x8 has non-zero coeff
void av1_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
int stride) {
+ const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -3123,7 +3120,6 @@
col[31] = _mm_sub_epi16(stp1_0, stp1_31);
for (i = 0; i < 4; i++) {
int j;
- const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block
array_transpose_8x8(col + i * 8, in);
IDCT32_34
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 81a4692..6744572 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -296,9 +296,9 @@
if (!mbmi->skip) {
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
- const scan_order *sc = get_scan(tx_size, tx_type, 0);
- const int eob = av1_decode_block_tokens(xd, plane, sc, col, row, tx_size,
- tx_type, r, mbmi->segment_id);
+ const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type, 0);
+ const int eob = av1_decode_block_tokens(
+ xd, plane, scan_order, col, row, tx_size, tx_type, r, mbmi->segment_id);
inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
eob);
}
@@ -330,7 +330,7 @@
if (tx_size == plane_tx_size) {
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, plane_tx_size);
- const scan_order *sc = get_scan(plane_tx_size, tx_type, 1);
+ const SCAN_ORDER *sc = get_scan(plane_tx_size, tx_type, 1);
const int eob =
av1_decode_block_tokens(xd, plane, sc, blk_col, blk_row, plane_tx_size,
tx_type, r, mbmi->segment_id);
@@ -373,9 +373,9 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
int block_idx = (row << 1) + col;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
- const scan_order *sc = get_scan(tx_size, tx_type, 1);
- const int eob = av1_decode_block_tokens(xd, plane, sc, col, row, tx_size,
- tx_type, r, segment_id);
+ const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type, 1);
+ const int eob = av1_decode_block_tokens(xd, plane, scan_order, col, row,
+ tx_size, tx_type, r, segment_id);
inverse_transform_block(xd, plane, tx_type, tx_size,
&pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
@@ -418,12 +418,9 @@
// passing bsize from decode_partition().
xd->mi[0]->mbmi.sb_type = bsize;
for (y = 0; y < y_mis; ++y)
- for (x = !y; x < x_mis; ++x) {
- xd->mi[y * cm->mi_stride + x] = xd->mi[0];
- }
+ for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
set_plane_n4(xd, bw, bh, bwl, bhl);
-
set_skip_context(xd, mi_row, mi_col);
#if CONFIG_VAR_TX
@@ -1311,8 +1308,8 @@
const BLOCK_SIZE plane_bsize =
get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
- int bw = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
- int bh = num_4x4_blocks_high_txsize_lookup[max_tx_size];
+ const int bw_var_tx = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
+ const int bh_var_tx = num_4x4_blocks_high_txsize_lookup[max_tx_size];
const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
int block = 0;
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -1336,8 +1333,8 @@
plane, row, col, tx_size);
} else {
#endif
- for (row = 0; row < num_4x4_h; row += bh) {
- for (col = 0; col < num_4x4_w; col += bw) {
+ for (row = 0; row < num_4x4_h; row += bh_var_tx) {
+ for (col = 0; col < num_4x4_w; col += bw_var_tx) {
decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block, row,
col, max_tx_size, &eobtotal);
block += step;
@@ -3481,16 +3478,13 @@
setup_segmentation(cm, rb);
- {
- int i;
- for (i = 0; i < MAX_SEGMENTS; ++i) {
- const int qindex = cm->seg.enabled
- ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
- : cm->base_qindex;
- xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
- cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
- xd->qindex[i] = qindex;
- }
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ const int qindex = cm->seg.enabled
+ ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
+ : cm->base_qindex;
+ xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
+ cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ xd->qindex[i] = qindex;
}
setup_segmentation_dequant(cm);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index e19b3e3..66056c0 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1512,7 +1512,6 @@
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
} else {
- int ref;
int_mv ref_mv[2];
ref_mv[0] = nearestmv[0];
ref_mv[1] = nearestmv[1];
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index fa235b6..1bd4c0d 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -367,7 +367,7 @@
#endif // CONFIG_PALETTE
int av1_decode_block_tokens(MACROBLOCKD *const xd, int plane,
- const scan_order *sc, int x, int y, TX_SIZE tx_size,
+ const SCAN_ORDER *sc, int x, int y, TX_SIZE tx_size,
TX_TYPE tx_type,
#if CONFIG_ANS
struct AnsDecoder *const r,
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index dc96cf5..9c08ff9 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h
@@ -27,7 +27,7 @@
#endif // CONFIG_PALETTE
int av1_decode_block_tokens(MACROBLOCKD *const xd, int plane,
- const scan_order *sc, int x, int y, TX_SIZE tx_size,
+ const SCAN_ORDER *sc, int x, int y, TX_SIZE tx_size,
TX_TYPE tx_type,
#if CONFIG_ANS
struct AnsDecoder *const r,
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c
index 3c9c92f..5c4a5e3 100644
--- a/av1/encoder/aq_complexity.c
+++ b/av1/encoder/aq_complexity.c
@@ -111,9 +111,9 @@
// Select a segment for the current block.
// The choice of segment for a block depends on the ratio of the projected
// bits for the block vs a target average and its spatial complexity.
-void av1_caq_select_segment(AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
+void av1_caq_select_segment(const AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
int mi_row, int mi_col, int projected_rate) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int xmis = AOMMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
diff --git a/av1/encoder/aq_complexity.h b/av1/encoder/aq_complexity.h
index 1d966ac..af525b3 100644
--- a/av1/encoder/aq_complexity.h
+++ b/av1/encoder/aq_complexity.h
@@ -22,7 +22,7 @@
struct macroblock;
// Select a segment for the current Block.
-void av1_caq_select_segment(struct AV1_COMP *cpi, struct macroblock *,
+void av1_caq_select_segment(const struct AV1_COMP *cpi, struct macroblock *,
BLOCK_SIZE bs, int mi_row, int mi_col,
int projected_rate);
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index e0e4b88..bcf11a7 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -209,7 +209,7 @@
// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
-void av1_cyclic_refresh_update_segment(AV1_COMP *const cpi,
+void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi,
MB_MODE_INFO *const mbmi, int mi_row,
int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip) {
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index cdc9815..459ab80 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -49,7 +49,7 @@
// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
-void av1_cyclic_refresh_update_segment(struct AV1_COMP *const cpi,
+void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
MB_MODE_INFO *const mbmi, int mi_row,
int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip);
diff --git a/av1/encoder/aq_variance.c b/av1/encoder/aq_variance.c
index 1f5554e..01528ec 100644
--- a/av1/encoder/aq_variance.c
+++ b/av1/encoder/aq_variance.c
@@ -141,7 +141,7 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
-static unsigned int block_variance(AV1_COMP *cpi, MACROBLOCK *x,
+static unsigned int block_variance(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bs) {
MACROBLOCKD *xd = &x->e_mbd;
unsigned int var, sse;
@@ -189,14 +189,14 @@
}
}
-double av1_log_block_var(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
+double av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
unsigned int var = block_variance(cpi, x, bs);
aom_clear_system_state();
return log(var + 1.0);
}
#define DEFAULT_E_MIDPOINT 10.0
-int av1_block_energy(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
+int av1_block_energy(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy;
double energy_midpoint;
aom_clear_system_state();
diff --git a/av1/encoder/aq_variance.h b/av1/encoder/aq_variance.h
index 4900aa7..05725c5 100644
--- a/av1/encoder/aq_variance.h
+++ b/av1/encoder/aq_variance.h
@@ -21,8 +21,8 @@
unsigned int av1_vaq_segment_id(int energy);
void av1_vaq_frame_setup(AV1_COMP *cpi);
-int av1_block_energy(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
-double av1_log_block_var(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+int av1_block_energy(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+double av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
#ifdef __cplusplus
} // extern "C"
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f1e8828..948c4f5 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -664,91 +664,92 @@
#endif
while (p < stop && p->token != EOSB_TOKEN) {
- const int t = p->token;
+ const int token = p->token;
+ aom_tree_index index = 0;
#if !CONFIG_ANS
- const struct av1_token *const a = &av1_coef_encodings[t];
- int v = a->value;
- int n = a->len;
+ const struct av1_token *const coef_encoding = &av1_coef_encodings[token];
+ int coef_value = coef_encoding->value;
+ int coef_length = coef_encoding->len;
#endif // !CONFIG_ANS
#if CONFIG_AOM_HIGHBITDEPTH
- const av1_extra_bit *b;
- if (bit_depth == AOM_BITS_12)
- b = &av1_extra_bits_high12[t];
- else if (bit_depth == AOM_BITS_10)
- b = &av1_extra_bits_high10[t];
- else
- b = &av1_extra_bits[t];
+ const av1_extra_bit *const extra_bits_av1 =
+ (bit_depth == AOM_BITS_12)
+ ? &av1_extra_bits_high12[token]
+ : (bit_depth == AOM_BITS_10) ? &av1_extra_bits_high10[token]
+ : &av1_extra_bits[token];
#else
- const av1_extra_bit *const b = &av1_extra_bits[t];
+ const av1_extra_bit *const extra_bits_av1 = &av1_extra_bits[token];
(void)bit_depth;
#endif // CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_ANS
/* skip one or two nodes */
- if (!p->skip_eob_node) aom_write(w, t != EOB_TOKEN, p->context_tree[0]);
+ if (!p->skip_eob_node) aom_write(w, token != EOB_TOKEN, p->context_tree[0]);
- if (t != EOB_TOKEN) {
- aom_write(w, t != ZERO_TOKEN, p->context_tree[1]);
+ if (token != EOB_TOKEN) {
+ aom_write(w, token != ZERO_TOKEN, p->context_tree[1]);
- if (t != ZERO_TOKEN) {
- aom_write_symbol(w, t - ONE_TOKEN, *p->token_cdf,
+ if (token != ZERO_TOKEN) {
+ aom_write_symbol(w, token - ONE_TOKEN, *p->token_cdf,
CATEGORY6_TOKEN - ONE_TOKEN + 1);
}
}
#else
/* skip one or two nodes */
if (p->skip_eob_node)
- n -= p->skip_eob_node;
+ coef_length -= p->skip_eob_node;
else
- aom_write(w, t != EOB_TOKEN, p->context_tree[0]);
+ aom_write(w, token != EOB_TOKEN, p->context_tree[0]);
- if (t != EOB_TOKEN) {
- aom_write(w, t != ZERO_TOKEN, p->context_tree[1]);
+ if (token != EOB_TOKEN) {
+ aom_write(w, token != ZERO_TOKEN, p->context_tree[1]);
- if (t != ZERO_TOKEN) {
- aom_write(w, t != ONE_TOKEN, p->context_tree[2]);
+ if (token != ZERO_TOKEN) {
+ aom_write(w, token != ONE_TOKEN, p->context_tree[2]);
- if (t != ONE_TOKEN) {
- int len = UNCONSTRAINED_NODES - p->skip_eob_node;
+ if (token != ONE_TOKEN) {
+ const int unconstrained_len = UNCONSTRAINED_NODES - p->skip_eob_node;
aom_write_tree(w, av1_coef_con_tree,
- av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1], v,
- n - len, 0);
+ av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
+ coef_value, coef_length - unconstrained_len, 0);
}
}
}
#endif // CONFIG_ANS
- if (b->base_val) {
- const int e = p->extra, l = b->len;
- int skip_bits = (b->base_val == CAT6_MIN_VAL)
+ if (extra_bits_av1->base_val) {
+ const int extra_bits = p->extra;
+ const int extra_bits_av1_length = extra_bits_av1->len;
+ int skip_bits = (extra_bits_av1->base_val == CAT6_MIN_VAL)
? TX_SIZES - 1 - txsize_sqr_up_map[tx]
: 0;
- if (l) {
- const unsigned char *pb = b->prob;
- int v = e >> 1;
- int n = l; /* number of bits in v, assumed nonzero */
- int i = 0;
+ if (extra_bits_av1_length) {
+ const unsigned char *pb = extra_bits_av1->prob;
+ const int value = extra_bits >> 1;
+ int num_bits = extra_bits_av1_length; // number of bits in value
+ assert(num_bits > 0);
+ index = 0;
do {
- const int bb = (v >> --n) & 1;
+ const int bb = (value >> --num_bits) & 1;
if (skip_bits) {
- skip_bits--;
+ --skip_bits;
assert(!bb);
} else {
- aom_write(w, bb, pb[i >> 1]);
+ aom_write(w, bb, pb[index >> 1]);
}
- i = b->tree[i + bb];
- } while (n);
+ index = extra_bits_av1->tree[index + bb];
+ } while (num_bits);
}
- aom_write_bit(w, e & 1);
+ aom_write_bit(w, extra_bits & 1);
}
++p;
#if CONFIG_VAR_TX
++count;
- if (t == EOB_TOKEN || count == seg_eob) break;
+ if (token == EOB_TOKEN || count == seg_eob) break;
#endif
}
@@ -1701,9 +1702,9 @@
#endif
const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
- int bw = num_4x4_blocks_wide_lookup[txb_size];
int block = 0;
const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ bw = num_4x4_blocks_wide_lookup[txb_size];
for (row = 0; row < num_4x4_h; row += bw) {
for (col = 0; col < num_4x4_w; col += bw) {
pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize,
@@ -1715,8 +1716,8 @@
TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
: m->mbmi.tx_size;
BLOCK_SIZE txb_size = txsize_to_bsize[tx];
- int bw = num_4x4_blocks_wide_lookup[txb_size];
- int bh = num_4x4_blocks_high_lookup[txb_size];
+ bw = num_4x4_blocks_wide_lookup[txb_size];
+ bh = num_4x4_blocks_high_lookup[txb_size];
for (row = 0; row < num_4x4_h; row += bh)
for (col = 0; col < num_4x4_w; col += bw)
@@ -2096,7 +2097,6 @@
for (t = 0; t < entropy_nodes_update; ++t) {
aom_prob newp = new_coef_probs[i][j][k][l][t];
aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
- const aom_prob upd = DIFF_UPDATE_PROB;
int s;
int u = 0;
if (t == PIVOT_NODE)
@@ -2300,7 +2300,6 @@
for (t = 0; t < entropy_nodes_update; ++t) {
aom_prob newp = new_coef_probs[i][j][k][l][t];
aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
- const aom_prob upd = DIFF_UPDATE_PROB;
int s;
int u = 0;
@@ -2423,8 +2422,6 @@
#if CONFIG_ENTROPY
if (cm->do_subframe_update &&
cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- unsigned int eob_counts_copy[PLANE_TYPES][REF_TYPES][COEF_BANDS]
- [COEFF_CONTEXTS];
av1_coeff_count coef_counts_copy[PLANE_TYPES];
av1_copy(eob_counts_copy, cpi->common.counts.eob_branch[tx_size]);
av1_copy(coef_counts_copy, cpi->td.rd_counts.coef_counts[tx_size]);
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index a4dcba2..310325e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -26,7 +26,7 @@
unsigned int sse;
int sum;
unsigned int var;
-} diff;
+} DIFF;
typedef struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 855b140..63b71a5 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -1498,7 +1498,6 @@
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
- int i;
for (i = 0; i < 8; i++) {
// stage 1
s0 = (input[0 * stride] + input[7 * stride]) * 4;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 41a71bb..004ad68 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -60,17 +60,18 @@
#define IF_HBD(...)
#endif // CONFIG_AOM_HIGHBITDEPTH
-static void encode_superblock(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
- RUN_TYPE dry_run, int mi_row, int mi_col,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int *rate);
+static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
+ TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx, int *rate);
#if CONFIG_SUPERTX
static int check_intra_b(PICK_MODE_CONTEXT *ctx);
-static int check_intra_sb(AV1_COMP *cpi, const TileInfo *const tile, int mi_row,
- int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree);
-static void predict_superblock(AV1_COMP *cpi, ThreadData *td,
+static int check_intra_sb(const AV1_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree);
+static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_EXT_INTER
int mi_row_ori, int mi_col_ori,
#endif // CONFIG_EXT_INTER
@@ -78,17 +79,17 @@
BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
PC_TREE *pc_tree);
-static void predict_sb_complex(AV1_COMP *cpi, ThreadData *td,
+static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row,
int mi_col, int mi_row_ori, int mi_col_ori,
RUN_TYPE dry_run, BLOCK_SIZE bsize,
BLOCK_SIZE top_bsize, uint8_t *dst_buf[3],
int dst_stride[3], PC_TREE *pc_tree);
-static void update_state_sb_supertx(AV1_COMP *cpi, ThreadData *td,
+static void update_state_sb_supertx(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row,
int mi_col, BLOCK_SIZE bsize,
RUN_TYPE dry_run, PC_TREE *pc_tree);
-static void rd_supertx_sb(AV1_COMP *cpi, ThreadData *td,
+static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist,
TX_TYPE *best_tx, PC_TREE *pc_tree);
@@ -176,7 +177,7 @@
};
#endif // CONFIG_AOM_HIGHBITDEPTH
-unsigned int av1_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
unsigned int sse;
@@ -186,7 +187,7 @@
}
#if CONFIG_AOM_HIGHBITDEPTH
-unsigned int av1_high_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs, int bd) {
unsigned int var, sse;
@@ -212,7 +213,7 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
-static unsigned int get_sby_perpixel_diff_variance(AV1_COMP *cpi,
+static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
const struct buf_2d *ref,
int mi_row, int mi_col,
BLOCK_SIZE bs) {
@@ -243,21 +244,21 @@
// Lighter version of set_offsets that only sets the mode info
// pointers.
-static void set_mode_info_offsets(AV1_COMP *const cpi, MACROBLOCK *const x,
- MACROBLOCKD *const xd, int mi_row,
- int mi_col) {
- AV1_COMMON *const cm = &cpi->common;
+static void set_mode_info_offsets(const AV1_COMP *const cpi,
+ MACROBLOCK *const x, MACROBLOCKD *const xd,
+ int mi_row, int mi_col) {
+ const AV1_COMMON *const cm = &cpi->common;
const int idx_str = xd->mi_stride * mi_row + mi_col;
xd->mi = cm->mi_grid_visible + idx_str;
xd->mi[0] = cm->mi + idx_str;
x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
}
-static void set_offsets_without_segment_id(AV1_COMP *cpi,
+static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
const TileInfo *const tile,
MACROBLOCK *const x, int mi_row,
int mi_col, BLOCK_SIZE bsize) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -299,10 +300,10 @@
xd->tile = *tile;
}
-static void set_offsets(AV1_COMP *cpi, const TileInfo *const tile,
+static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
MACROBLOCK *const x, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
const struct segmentation *const seg = &cm->seg;
@@ -332,11 +333,11 @@
}
#if CONFIG_SUPERTX
-static void set_offsets_supertx(AV1_COMP *cpi, ThreadData *td,
+static void set_offsets_supertx(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row,
int mi_col, BLOCK_SIZE bsize) {
MACROBLOCK *const x = &td->mb;
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -349,7 +350,7 @@
cm->mi_cols);
}
-static void set_offsets_extend(AV1_COMP *cpi, ThreadData *td,
+static void set_offsets_extend(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row_pred,
int mi_col_pred, int mi_row_ori, int mi_col_ori,
BLOCK_SIZE bsize_pred) {
@@ -357,7 +358,7 @@
// (mi_row_ori, mi_col_ori, bsize_ori): region for mv
// (mi_row_pred, mi_col_pred, bsize_pred): region to predict
MACROBLOCK *const x = &td->mb;
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_width = num_8x8_blocks_wide_lookup[bsize_pred];
const int mi_height = num_8x8_blocks_high_lookup[bsize_pred];
@@ -973,7 +974,7 @@
}
#if CONFIG_DUAL_FILTER
-static void reset_intmv_filter_type(AV1_COMMON *cm, MACROBLOCKD *xd,
+static void reset_intmv_filter_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
MB_MODE_INFO *mbmi) {
int dir;
for (dir = 0; dir < 2; ++dir) {
@@ -1011,11 +1012,11 @@
}
#endif // CONFIG_GLOBAL_MOTION
-static void update_state(AV1_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- RUN_TYPE dry_run) {
+static void update_state(const AV1_COMP *const cpi, ThreadData *td,
+ PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, RUN_TYPE dry_run) {
int i, x_idx, y;
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
RD_COUNTS *const rdc = &td->rd_counts;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1132,18 +1133,22 @@
if (dry_run) return;
#if CONFIG_INTERNAL_STATS
- if (frame_is_intra_only(cm)) {
- static const int kf_mode_index[] = {
- THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
- THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
- THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
- THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
- THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
- };
- ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
- } else {
- // Note how often each mode chosen as best
- ++cpi->mode_chosen_counts[ctx->best_mode_index];
+ {
+ unsigned int *const mode_chosen_counts =
+ (unsigned int *)cpi->mode_chosen_counts; // Cast const away.
+ if (frame_is_intra_only(cm)) {
+ static const int kf_mode_index[] = {
+ THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
+ THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
+ THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
+ THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
+ THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
+ };
+ ++mode_chosen_counts[kf_mode_index[mbmi->mode]];
+ } else {
+ // Note how often each mode chosen as best
+ ++mode_chosen_counts[ctx->best_mode_index];
+ }
}
#endif
if (!frame_is_intra_only(cm)) {
@@ -1172,8 +1177,8 @@
#if CONFIG_DUAL_FILTER
update_filter_type_count(td->counts, xd, mbmi);
#else
- const int ctx = av1_get_pred_context_switchable_interp(xd);
- ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
+ const int switchable_ctx = av1_get_pred_context_switchable_interp(xd);
+ ++td->counts->switchable_interp[switchable_ctx][mbmi->interp_filter];
#endif
}
}
@@ -1196,14 +1201,14 @@
}
#if CONFIG_SUPERTX
-static void update_state_supertx(AV1_COMP *cpi, ThreadData *td,
+static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
BLOCK_SIZE bsize, RUN_TYPE dry_run) {
int y, x_idx;
#if CONFIG_VAR_TX || CONFIG_REF_MV
int i;
#endif
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
RD_COUNTS *const rdc = &td->rd_counts;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1341,11 +1346,11 @@
}
}
-static void update_state_sb_supertx(AV1_COMP *cpi, ThreadData *td,
+static void update_state_sb_supertx(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row,
int mi_col, BLOCK_SIZE bsize,
RUN_TYPE dry_run, PC_TREE *pc_tree) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = x->plane;
@@ -1497,10 +1502,11 @@
ctx->mic.mbmi.tx_type = best_tx;
}
-static void update_supertx_param_sb(AV1_COMP *cpi, ThreadData *td, int mi_row,
- int mi_col, BLOCK_SIZE bsize, int best_tx,
- TX_SIZE supertx_size, PC_TREE *pc_tree) {
- AV1_COMMON *const cm = &cpi->common;
+static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int best_tx, TX_SIZE supertx_size,
+ PC_TREE *pc_tree) {
+ const AV1_COMMON *const cm = &cpi->common;
int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
PARTITION_TYPE partition = pc_tree->partitioning;
BLOCK_SIZE subsize = get_subsize(bsize, partition);
@@ -1584,10 +1590,10 @@
x->e_mbd.plane[i].subsampling_y);
}
-static int set_segment_rdmult(AV1_COMP *const cpi, MACROBLOCK *const x,
+static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
int8_t segment_id) {
int segment_qindex;
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
av1_init_plane_quantizers(cpi, x, segment_id);
aom_clear_system_state();
segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
@@ -1595,7 +1601,7 @@
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
-static void rd_pick_sb_modes(AV1_COMP *cpi, TileDataEnc *tile_data,
+static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_COST *rd_cost,
#if CONFIG_SUPERTX
@@ -1606,7 +1612,7 @@
#endif
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
@@ -1787,7 +1793,7 @@
}
#endif
-static void update_stats(AV1_COMMON *cm, ThreadData *td
+static void update_stats(const AV1_COMMON *const cm, ThreadData *td
#if CONFIG_SUPERTX
,
int supertx_enabled
@@ -2099,9 +2105,9 @@
#endif
}
-static void encode_b(AV1_COMP *cpi, const TileInfo *const tile, ThreadData *td,
- TOKENEXTRA **tp, int mi_row, int mi_col, RUN_TYPE dry_run,
- BLOCK_SIZE bsize,
+static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
+ ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
+ RUN_TYPE dry_run, BLOCK_SIZE bsize,
#if CONFIG_EXT_PARTITION_TYPES
PARTITION_TYPE partition,
#endif
@@ -2123,9 +2129,10 @@
}
}
-static void encode_sb(AV1_COMP *cpi, ThreadData *td, const TileInfo *const tile,
- TOKENEXTRA **tp, int mi_row, int mi_col, RUN_TYPE dry_run,
- BLOCK_SIZE bsize, PC_TREE *pc_tree, int *rate) {
+static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
+ const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
+ int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree, int *rate) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2429,7 +2436,7 @@
int splits_below = 0;
BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type;
int do_partition_search = 1;
- PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+ PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
#if CONFIG_SUPERTX
int last_part_rate_nocoef = INT_MAX;
int none_rate_nocoef = INT_MAX;
@@ -2488,7 +2495,7 @@
#if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE,
#endif
- bsize, ctx, INT64_MAX);
+ bsize, ctx_none, INT64_MAX);
if (none_rdc.rate < INT_MAX) {
none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
@@ -2515,7 +2522,7 @@
#if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE,
#endif
- bsize, ctx, INT64_MAX);
+ bsize, ctx_none, INT64_MAX);
break;
case PARTITION_HORZ:
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -2532,11 +2539,11 @@
#if CONFIG_SUPERTX
int rt_nocoef = 0;
#endif
- PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+ PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
av1_rd_cost_init(&tmp_rdc);
- update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1);
+ update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- ctx, NULL);
+ ctx_h, NULL);
rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
#if CONFIG_SUPERTX
&rt_nocoef,
@@ -2575,11 +2582,11 @@
#if CONFIG_SUPERTX
int rt_nocoef = 0;
#endif
- PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+ PICK_MODE_CONTEXT *ctx_v = &pc_tree->vertical[0];
av1_rd_cost_init(&tmp_rdc);
- update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1);
+ update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- ctx, NULL);
+ ctx_v, NULL);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
#if CONFIG_SUPERTX
&rt_nocoef,
@@ -2698,8 +2705,6 @@
#if CONFIG_SUPERTX
int rt_nocoef = 0;
#endif
- RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
-
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
@@ -2933,26 +2938,25 @@
}
// TODO(jingning) refactor functions setting partition search range
-static void set_partition_range(AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row,
+static void set_partition_range(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd, int mi_row,
int mi_col, BLOCK_SIZE bsize,
- BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
- int mi_width = num_8x8_blocks_wide_lookup[bsize];
- int mi_height = num_8x8_blocks_high_lookup[bsize];
+ BLOCK_SIZE *const min_bs,
+ BLOCK_SIZE *const max_bs) {
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
int idx, idy;
- MODE_INFO *mi;
const int idx_str = cm->mi_stride * mi_row + mi_col;
- MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
- BLOCK_SIZE bs, min_size, max_size;
-
- min_size = BLOCK_LARGEST;
- max_size = BLOCK_4X4;
+ MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[idx_str];
+ BLOCK_SIZE min_size = BLOCK_64X64; // default values
+ BLOCK_SIZE max_size = BLOCK_4X4;
if (prev_mi) {
for (idy = 0; idy < mi_height; ++idy) {
for (idx = 0; idx < mi_width; ++idx) {
- mi = prev_mi[idy * cm->mi_stride + idx];
- bs = mi ? mi->mbmi.sb_type : bsize;
+ const MODE_INFO *const mi = prev_mi[idy * cm->mi_stride + idx];
+ const BLOCK_SIZE bs = mi ? mi->mbmi.sb_type : bsize;
min_size = AOMMIN(min_size, bs);
max_size = AOMMAX(max_size, bs);
}
@@ -2961,8 +2965,8 @@
if (xd->left_available) {
for (idy = 0; idy < mi_height; ++idy) {
- mi = xd->mi[idy * cm->mi_stride - 1];
- bs = mi ? mi->mbmi.sb_type : bsize;
+ const MODE_INFO *const mi = xd->mi[idy * cm->mi_stride - 1];
+ const BLOCK_SIZE bs = mi ? mi->mbmi.sb_type : bsize;
min_size = AOMMIN(min_size, bs);
max_size = AOMMAX(max_size, bs);
}
@@ -2970,8 +2974,8 @@
if (xd->up_available) {
for (idx = 0; idx < mi_width; ++idx) {
- mi = xd->mi[idx - cm->mi_stride];
- bs = mi ? mi->mbmi.sb_type : bsize;
+ const MODE_INFO *const mi = xd->mi[idx - cm->mi_stride];
+ const BLOCK_SIZE bs = mi ? mi->mbmi.sb_type : bsize;
min_size = AOMMIN(min_size, bs);
max_size = AOMMAX(max_size, bs);
}
@@ -3094,10 +3098,10 @@
#if CONFIG_EXT_PARTITION_TYPES
static void rd_test_partition3(
- AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp,
- PC_TREE *pc_tree, RD_COST *best_rdc, PICK_MODE_CONTEXT ctxs[3],
- PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize,
- PARTITION_TYPE partition,
+ const AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
+ TOKENEXTRA **tp, PC_TREE *pc_tree, RD_COST *best_rdc,
+ PICK_MODE_CONTEXT ctxs[3], PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, PARTITION_TYPE partition,
#if CONFIG_SUPERTX
int64_t best_rd, int *best_rate_nocoef, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
#endif
@@ -3107,7 +3111,7 @@
MACROBLOCKD *const xd = &x->e_mbd;
RD_COST this_rdc, sum_rdc;
#if CONFIG_SUPERTX
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
int this_rate_nocoef, sum_rate_nocoef;
int abort_flag;
@@ -3134,12 +3138,12 @@
#else
if (sum_rdc.rdcost < best_rdc->rdcost) {
#endif
- PICK_MODE_CONTEXT *ctx = &ctxs[0];
- update_state(cpi, td, ctx, mi_row0, mi_col0, subsize0, 1);
+ PICK_MODE_CONTEXT *ctx_0 = &ctxs[0];
+ update_state(cpi, td, ctx_0, mi_row0, mi_col0, subsize0, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row0, mi_col0, subsize0,
- ctx, NULL);
+ ctx_0, NULL);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_0);
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
@@ -3175,12 +3179,12 @@
#else
if (sum_rdc.rdcost < best_rdc->rdcost) {
#endif
- PICK_MODE_CONTEXT *ctx = &ctxs[1];
- update_state(cpi, td, ctx, mi_row1, mi_col1, subsize1, 1);
+ PICK_MODE_CONTEXT *ctx_1 = &ctxs[1];
+ update_state(cpi, td, ctx_1, mi_row1, mi_col1, subsize1, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row1, mi_col1, subsize1,
- ctx, NULL);
+ ctx_1, NULL);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_1);
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
@@ -3274,7 +3278,7 @@
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
-static void rd_pick_partition(AV1_COMP *cpi, ThreadData *td,
+static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
RD_COST *rd_cost,
@@ -3282,17 +3286,16 @@
int *rate_nocoef,
#endif
int64_t best_rd, PC_TREE *pc_tree) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
- TOKENEXTRA *tp_orig = *tp;
- PICK_MODE_CONTEXT *ctx = &pc_tree->none;
- int i;
+ const TOKENEXTRA *const tp_orig = *tp;
+ PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- int *partition_cost = cpi->partition_cost[pl];
+ const int *partition_cost = cpi->partition_cost[pl];
int tmp_partition_cost[PARTITION_TYPES];
BLOCK_SIZE subsize;
RD_COST this_rdc, sum_rdc, best_rdc;
@@ -3303,8 +3306,9 @@
bsize <= MAX_SUPERTX_BLOCK_SIZE &&
!xd->lossless[0];
#endif // CONFIG_SUPERTX
- int do_split = bsize >= BLOCK_8X8;
- int do_rect = 1;
+ const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
+ int do_square_split = bsize_at_least_8x8;
+ int do_rectangular_split = 1;
#if CONFIG_EXT_PARTITION_TYPES
BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
#endif
@@ -3325,9 +3329,9 @@
int partition_none_allowed = !force_horz_split && !force_vert_split;
int partition_horz_allowed =
- !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
+ !force_vert_split && yss <= xss && bsize_at_least_8x8;
int partition_vert_allowed =
- !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+ !force_horz_split && xss <= yss && bsize_at_least_8x8;
(void)*tp_orig;
if (force_horz_split || force_vert_split) {
@@ -3377,7 +3381,7 @@
x->mb_energy = av1_block_energy(cpi, x, bsize);
if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
- int cb_partition_search_ctrl =
+ const int cb_partition_search_ctrl =
((pc_tree->index == 0 || pc_tree->index == 3) +
get_chessboard_index(cm->current_video_frame)) &
0x1;
@@ -3389,12 +3393,13 @@
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
if (cpi->sf.auto_min_max_partition_size) {
- partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
- partition_horz_allowed &=
- ((bsize <= max_size && bsize > min_size) || force_horz_split);
- partition_vert_allowed &=
- ((bsize <= max_size && bsize > min_size) || force_vert_split);
- do_split &= bsize > min_size;
+ const int no_partition_allowed = (bsize <= max_size && bsize >= min_size);
+ // Note: Further partitioning is NOT allowed when bsize == min_size already.
+ const int partition_allowed = (bsize <= max_size && bsize > min_size);
+ partition_none_allowed &= no_partition_allowed;
+ partition_horz_allowed &= partition_allowed || force_horz_split;
+ partition_vert_allowed &= partition_allowed || force_vert_split;
+ do_square_split &= bsize > min_size;
}
if (cpi->sf.use_square_partition_only) {
partition_horz_allowed &= force_horz_split;
@@ -3420,7 +3425,7 @@
#if CONFIG_FP_MB_STATS
// Decide whether we shall split directly and skip searching NONE by using
// the first pass block statistics
- if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
+ if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_square_split &&
partition_none_allowed && src_diff_var > 4 &&
cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
int mb_row = mi_row >> 1;
@@ -3477,9 +3482,9 @@
#if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE,
#endif
- bsize, ctx, best_rdc.rdcost);
+ bsize, ctx_none, best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
- if (bsize >= BLOCK_8X8) {
+ if (bsize_at_least_8x8) {
this_rdc.rate += partition_cost[PARTITION_NONE];
this_rdc.rdcost =
RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
@@ -3489,22 +3494,21 @@
}
if (this_rdc.rdcost < best_rdc.rdcost) {
- int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
- int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
+ // Adjust dist breakout threshold according to the partition size.
+ const int64_t dist_breakout_thr =
+ cpi->sf.partition_search_breakout_dist_thr >>
+ ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
+ (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+ const int rate_breakout_thr =
+ cpi->sf.partition_search_breakout_rate_thr *
+ num_pels_log2_lookup[bsize];
best_rdc = this_rdc;
#if CONFIG_SUPERTX
best_rate_nocoef = this_rate_nocoef;
assert(best_rate_nocoef >= 0);
#endif
- if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
-
- // Adjust dist breakout threshold according to the partition size.
- dist_breakout_thr >>=
- (2 * (MAX_SB_SIZE_LOG2 - 2)) -
- (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
- rate_breakout_thr *= num_pels_log2_lookup[bsize];
+ if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
// If all y, u, v transform blocks in this partition are skippable, and
// the dist & rate are within the thresholds, the partition search is
@@ -3512,10 +3516,10 @@
// The dist & rate thresholds are set to 0 at speed 0 to disable the
// early termination at that speed.
if (!x->e_mbd.lossless[xd->mi[0]->mbmi.segment_id] &&
- (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
+ (ctx_none->skippable && best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr)) {
- do_split = 0;
- do_rect = 0;
+ do_square_split = 0;
+ do_rectangular_split = 0;
}
#if CONFIG_FP_MB_STATS
@@ -3524,7 +3528,7 @@
// If that is the case, check the difference variance between the
// current frame and the last frame. If the variance is small enough,
// stop further splitting in RD optimization
- if (cpi->use_fp_mb_stats && do_split != 0 &&
+ if (cpi->use_fp_mb_stats && do_square_split &&
cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
int mb_row = mi_row >> 1;
int mb_col = mi_col >> 1;
@@ -3557,8 +3561,8 @@
cpi, &x->plane[0].src, mi_row, mi_col, bsize);
}
if (src_diff_var < 8) {
- do_split = 0;
- do_rect = 0;
+ do_square_split = 0;
+ do_rectangular_split = 0;
}
}
}
@@ -3570,23 +3574,23 @@
}
// store estimated motion vector
- if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
// PARTITION_SPLIT
// TODO(jingning): use the motion vectors given by the above search as
// the starting point of motion search in the following partition type check.
- if (do_split) {
+ if (do_square_split) {
+ int reached_last_index = 0;
subsize = get_subsize(bsize, PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
- i = 4;
#if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter =
- ctx->mic.mbmi.interp_filter[0];
+ ctx_none->mic.mbmi.interp_filter[0];
#else
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter =
- ctx->mic.mbmi.interp_filter;
+ ctx_none->mic.mbmi.interp_filter;
#endif
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
@@ -3649,29 +3653,31 @@
pc_tree->partitioning = best_partition;
}
#endif // CONFIG_SUPERTX
+ reached_last_index = 1;
} else {
+ int idx;
#if CONFIG_SUPERTX
- for (i = 0; i < 4 && sum_rdc.rdcost < INT64_MAX; ++i) {
+ for (idx = 0; idx < 4 && sum_rdc.rdcost < INT64_MAX; ++idx) {
#else
- for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
+ for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
#endif // CONFIG_SUPERTX
- const int x_idx = (i & 1) * mi_step;
- const int y_idx = (i >> 1) * mi_step;
+ const int x_idx = (idx & 1) * mi_step;
+ const int y_idx = (idx >> 1) * mi_step;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
- pc_tree->split[i]->index = i;
+ pc_tree->split[idx]->index = idx;
#if CONFIG_SUPERTX
rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
mi_col + x_idx, subsize, &this_rdc, &this_rate_nocoef,
- INT64_MAX - sum_rdc.rdcost, pc_tree->split[i]);
+ INT64_MAX - sum_rdc.rdcost, pc_tree->split[idx]);
#else
- rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
- mi_col + x_idx, subsize, &this_rdc,
- best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
+ rd_pick_partition(
+ cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+ &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
#endif // CONFIG_SUPERTX
if (this_rdc.rate == INT_MAX) {
@@ -3689,8 +3695,9 @@
#endif // CONFIG_SUPERTX
}
}
+ reached_last_index = (idx == 4);
#if CONFIG_SUPERTX
- if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && i == 4) {
+ if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
TX_SIZE supertx_size = max_txsize_lookup[bsize];
const PARTITION_TYPE best_partition = pc_tree->partitioning;
@@ -3732,7 +3739,7 @@
#endif // CONFIG_SUPERTX
}
- if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
+ if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
#if CONFIG_SUPERTX
@@ -3747,10 +3754,10 @@
#endif // CONFIG_SUPERTX
pc_tree->partitioning = PARTITION_SPLIT;
}
- } else {
+ } else if (cpi->sf.less_rectangular_check) {
// skip rectangular partition test when larger block size
// gives better rd cost
- if (cpi->sf.less_rectangular_check) do_rect &= !partition_none_allowed;
+ do_rectangular_split &= !partition_none_allowed;
}
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
@@ -3758,18 +3765,19 @@
// PARTITION_HORZ
if (partition_horz_allowed &&
- (do_rect || av1_active_h_edge(cpi, mi_row, mi_step))) {
+ (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
subsize = get_subsize(bsize, PARTITION_HORZ);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
#if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter =
- ctx->mic.mbmi.interp_filter[0];
+ ctx_none->mic.mbmi.interp_filter[0];
#else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
- pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+ pc_tree->horizontal[0].pred_interp_filter =
+ ctx_none->mic.mbmi.interp_filter;
#endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
#if CONFIG_SUPERTX
@@ -3787,23 +3795,24 @@
#else
if (sum_rdc.rdcost < best_rdc.rdcost &&
#endif // CONFIG_SUPERTX
- mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) {
- PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
- update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1);
+ !force_horz_split && bsize > BLOCK_8X8) {
+ PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
+ update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- ctx, NULL);
+ ctx_h, NULL);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
#if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter =
- ctx->mic.mbmi.interp_filter[0];
+ ctx_h->mic.mbmi.interp_filter[0];
#else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
- pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+ pc_tree->horizontal[1].pred_interp_filter =
+ ctx_none->mic.mbmi.interp_filter;
#endif
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
@@ -3896,19 +3905,21 @@
// PARTITION_VERT
if (partition_vert_allowed &&
- (do_rect || av1_active_v_edge(cpi, mi_col, mi_step))) {
+ (do_rectangular_split || av1_active_v_edge(cpi, mi_col, mi_step))) {
subsize = get_subsize(bsize, PARTITION_VERT);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
#if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
- pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter[0];
+ pc_tree->vertical[0].pred_interp_filter =
+ ctx_none->mic.mbmi.interp_filter[0];
#else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
- pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+ pc_tree->vertical[0].pred_interp_filter =
+ ctx_none->mic.mbmi.interp_filter;
#endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
#if CONFIG_SUPERTX
@@ -3925,22 +3936,23 @@
#else
if (sum_rdc.rdcost < best_rdc.rdcost &&
#endif // CONFIG_SUPERTX
- mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) {
+ !force_vert_split && bsize > BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
&pc_tree->vertical[0], NULL);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
#if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter =
- ctx->mic.mbmi.interp_filter[0];
+ ctx_none->mic.mbmi.interp_filter[0];
#else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
- pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+ pc_tree->vertical[1].pred_interp_filter =
+ ctx_none->mic.mbmi.interp_filter;
#endif
#if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
@@ -4032,11 +4044,11 @@
#if CONFIG_EXT_PARTITION_TYPES
// PARTITION_HORZ_A
- if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+ if (partition_horz_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_HORZ_A);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->horizontala, ctx, mi_row, mi_col, bsize,
+ pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_A,
#if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx,
@@ -4046,11 +4058,11 @@
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
// PARTITION_HORZ_B
- if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+ if (partition_horz_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_HORZ_B);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->horizontalb, ctx, mi_row, mi_col, bsize,
+ pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_B,
#if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx,
@@ -4060,11 +4072,11 @@
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
// PARTITION_VERT_A
- if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+ if (partition_vert_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_VERT_A);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->verticala, ctx, mi_row, mi_col, bsize,
+ pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_A,
#if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx,
@@ -4074,11 +4086,11 @@
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
// PARTITION_VERT_B
- if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+ if (partition_vert_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_VERT_B);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->verticalb, ctx, mi_row, mi_col, bsize,
+ pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_B,
#if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx,
@@ -4924,9 +4936,10 @@
}
}
-static void tx_partition_count_update(AV1_COMMON *cm, MACROBLOCKD *xd,
- BLOCK_SIZE plane_bsize, int mi_row,
- int mi_col, FRAME_COUNTS *td_counts) {
+static void tx_partition_count_update(const AV1_COMMON *const cm,
+ MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
+ int mi_row, int mi_col,
+ FRAME_COUNTS *td_counts) {
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
@@ -4985,9 +4998,9 @@
}
}
-static void tx_partition_set_contexts(AV1_COMMON *cm, MACROBLOCKD *xd,
- BLOCK_SIZE plane_bsize, int mi_row,
- int mi_col) {
+static void tx_partition_set_contexts(const AV1_COMMON *const cm,
+ MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
+ int mi_row, int mi_col) {
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
@@ -5005,11 +5018,11 @@
}
#endif
-static void encode_superblock(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
- RUN_TYPE dry_run, int mi_row, int mi_col,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int *rate) {
- AV1_COMMON *const cm = &cpi->common;
+static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
+ TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx, int *rate) {
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO **mi_8x8 = xd->mi;
@@ -5179,7 +5192,7 @@
#endif
++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
} else {
- int x, y;
+ int i, j;
TX_SIZE tx_size;
// The new intra coding scheme requires no change of transform size
if (is_inter_block(&mi->mbmi)) {
@@ -5196,10 +5209,10 @@
tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
}
- for (y = 0; y < mi_height; y++)
- for (x = 0; x < mi_width; x++)
- if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
- mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
+ for (j = 0; j < mi_height; j++)
+ for (i = 0; i < mi_width; i++)
+ if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
+ mi_8x8[mis * j + i]->mbmi.tx_size = tx_size;
}
++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]];
++td->counts
@@ -5278,8 +5291,9 @@
return 0;
}
-static int check_intra_sb(AV1_COMP *cpi, const TileInfo *const tile, int mi_row,
- int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
+static int check_intra_sb(const AV1_COMP *const cpi, const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree) {
const AV1_COMMON *const cm = &cpi->common;
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
@@ -5388,7 +5402,7 @@
}
}
-static void predict_superblock(AV1_COMP *cpi, ThreadData *td,
+static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_EXT_INTER
int mi_row_ori, int mi_col_ori,
#endif // CONFIG_EXT_INTER
@@ -5397,7 +5411,7 @@
// Used in supertx
// (mi_row_ori, mi_col_ori): location for mv
// (mi_row_pred, mi_col_pred, bsize_pred): region to predict
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi_8x8 = xd->mi[0];
@@ -5429,7 +5443,7 @@
bsize_pred, block);
}
-static void predict_b_extend(AV1_COMP *cpi, ThreadData *td,
+static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int block,
int mi_row_ori, int mi_col_ori, int mi_row_pred,
int mi_col_pred, int mi_row_top, int mi_col_top,
@@ -5445,7 +5459,7 @@
// bextend: 1: region to predict is an extension of ori; 0: not
MACROBLOCK *const x = &td->mb;
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
int r = (mi_row_pred - mi_row_top) * MI_SIZE;
int c = (mi_col_pred - mi_col_top) * MI_SIZE;
@@ -5482,7 +5496,7 @@
if (!dry_run && !bextend) update_stats(&cpi->common, td, 1);
}
-static void extend_dir(AV1_COMP *cpi, ThreadData *td,
+static void extend_dir(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int block, BLOCK_SIZE bsize,
BLOCK_SIZE top_bsize, int mi_row, int mi_col,
int mi_row_top, int mi_col_top, RUN_TYPE dry_run,
@@ -5554,7 +5568,7 @@
}
}
-static void extend_all(AV1_COMP *cpi, ThreadData *td,
+static void extend_all(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int block, BLOCK_SIZE bsize,
BLOCK_SIZE top_bsize, int mi_row, int mi_col,
int mi_row_top, int mi_col_top, RUN_TYPE dry_run,
@@ -5586,13 +5600,13 @@
// then applied to the 2 masked prediction mentioned above in vertical direction
// If the block is split into more than one level, at every stage, masked
// prediction is stored in dst_buf[] passed from higher level.
-static void predict_sb_complex(AV1_COMP *cpi, ThreadData *td,
+static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row,
int mi_col, int mi_row_top, int mi_col_top,
RUN_TYPE dry_run, BLOCK_SIZE bsize,
BLOCK_SIZE top_bsize, uint8_t *dst_buf[3],
int dst_stride[3], PC_TREE *pc_tree) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -5644,7 +5658,11 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
- if (!dry_run && bsize < top_bsize) cm->counts.partition[ctx][partition]++;
+ if (!dry_run && bsize < top_bsize) {
+ // Explicitly cast away const.
+ FRAME_COUNTS *const frame_counts = (FRAME_COUNTS *)&cm->counts;
+ frame_counts->partition[ctx][partition]++;
+ }
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = dst_buf[i];
@@ -6029,11 +6047,11 @@
#endif // CONFIG_EXT_PARTITION_TYPES
}
-static void rd_supertx_sb(AV1_COMP *cpi, ThreadData *td,
+static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist,
TX_TYPE *best_tx, PC_TREE *pc_tree) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate,
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index c5dfadd..192ec47 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -85,10 +85,10 @@
const int16_t *const dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const scan_order *const so =
+ const SCAN_ORDER *const scan_order =
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
- const int16_t *const scan = so->scan;
- const int16_t *const nb = so->neighbors;
+ const int16_t *const scan = scan_order->scan;
+ const int16_t *const nb = scan_order->neighbors;
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
@@ -447,7 +447,7 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
- const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+ const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, is_inter);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -520,7 +520,7 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
- const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+ const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, is_inter);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -593,7 +593,7 @@
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+ const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, is_inter);
int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
is_inter, plane_type);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 13c8d87..da6f35c 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -111,8 +111,7 @@
if (c == MV_CLASS_0) {
cost += class0_cost[d];
} else {
- int i, b;
- b = c + CLASS0_BITS - 1; /* number of bits */
+ const int b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
}
if (c == MV_CLASS_0) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index ad33134..c39b78a 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -101,6 +101,10 @@
FILE *keyfile;
#endif
+#if CONFIG_INTERNAL_STATS
+typedef enum { Y, U, V, ALL } STAT_TYPE;
+#endif // CONFIG_INTERNAL_STATS
+
static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
@@ -2267,7 +2271,8 @@
av1_set_speed_features_framesize_dependent(cpi);
// Allocate memory to store variances for a frame.
- CHECK_MEM_ERROR(cm, cpi->source_diff_var, aom_calloc(cm->MBs, sizeof(diff)));
+ CHECK_MEM_ERROR(cm, cpi->source_diff_var,
+ aom_calloc(cm->MBs, sizeof(*cpi->source_diff_var)));
cpi->source_var_thresh = 0;
cpi->frames_till_next_var_check = 0;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 8819655..e2046ec 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -306,13 +306,15 @@
unsigned char *map;
} ActiveMap;
-typedef enum { Y, U, V, ALL } STAT_TYPE;
+#define NUM_STAT_TYPES 4 // types of stats: Y, U, V and ALL
typedef struct IMAGE_STAT {
- double stat[ALL + 1];
+ double stat[NUM_STAT_TYPES];
double worst;
} ImageStat;
+#undef NUM_STAT_TYPES
+
typedef struct {
int ref_count;
YV12_BUFFER_CONFIG buf;
@@ -516,7 +518,7 @@
// scaled.
// Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
- diff *source_diff_var;
+ DIFF *source_diff_var;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh;
int frames_till_next_var_check;
@@ -709,7 +711,7 @@
return cpi->alt_fb_idx;
}
-static INLINE int get_ref_frame_buf_idx(const AV1_COMP *const cpi,
+static INLINE int get_ref_frame_buf_idx(const AV1_COMP *cpi,
MV_REFERENCE_FRAME ref_frame) {
const AV1_COMMON *const cm = &cpi->common;
const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
@@ -717,15 +719,15 @@
}
static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
- AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+ const AV1_COMMON *const cm = &cpi->common;
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
: NULL;
}
static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
- AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
+ const AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
// Use up-sampled reference frames.
const int buf_idx =
cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)];
@@ -796,7 +798,7 @@
}
#endif // CONFIG_EXT_REFS
-static INLINE void set_ref_ptrs(AV1_COMMON *cm, MACROBLOCKD *xd,
+static INLINE void set_ref_ptrs(const AV1_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0,
MV_REFERENCE_FRAME ref1) {
xd->block_refs[0] =
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 396f0d7..003ecb3 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -2712,7 +2712,6 @@
// If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need.
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
- int target_rate;
configure_buffer_updates(cpi);
target_rate = gf_group->bit_allocation[gf_group->index];
target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
index c1ccb95..9bbed2b 100644
--- a/av1/encoder/mbgraph.c
+++ b/av1/encoder/mbgraph.c
@@ -110,7 +110,6 @@
// If the current best reference mv is not centered on 0,0 then do a 0,0
// based search as well.
if (ref_mv->row != 0 || ref_mv->col != 0) {
- unsigned int tmp_err;
MV zero_ref_mv = { 0, 0 };
tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col);
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index bd90739..3fbceab 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -861,36 +861,35 @@
const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
const int br = best_mv->row;
const int bc = best_mv->col;
- MV this_mv;
int i;
unsigned int sse;
+ const MV this_mv = { br, bc };
- this_mv.row = br;
- this_mv.col = bc;
cost_list[0] =
fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
if (check_bounds(x, br, bc, 1)) {
for (i = 0; i < 4; i++) {
- const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv),
+ get_buf_from_mv(in_what, &neighbor_mv),
in_what->stride, &sse) +
- mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
+ mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
x->mvcost, x->errorperbit);
}
} else {
for (i = 0; i < 4; i++) {
- const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
- if (!is_mv_in(x, &this_mv))
+ const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ if (!is_mv_in(x, &neighbor_mv))
cost_list[i + 1] = INT_MAX;
else
- cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv),
- in_what->stride, &sse) +
- mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
- x->mvcost, x->errorperbit);
+ cost_list[i + 1] =
+ fn_ptr->vf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
+ &sse) +
+ mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit);
}
}
}
@@ -1187,12 +1186,13 @@
// cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
// cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
if (cost_list) {
- const MV best_mv = { br, bc };
+ const MV best_int_mv = { br, bc };
if (last_is_4) {
- calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list,
+ calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
use_mvcost, bestsad);
} else {
- calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list);
+ calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
+ cost_list);
}
}
x->best_mv.as_mv.row = br;
@@ -1692,7 +1692,7 @@
const int ref_stride = xd->plane[0].pre[0].stride;
uint8_t const *ref_buf, *src_buf;
MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
- unsigned int best_sad, tmp_sad, this_sad[4];
+ unsigned int best_sad, tmp_sad, sad_arr[4];
MV this_mv;
const int norm_factor = 3 + (bw >> 5);
const YV12_BUFFER_CONFIG *scaled_ref_frame =
@@ -1762,23 +1762,23 @@
ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
};
- cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
+ cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, sad_arr);
}
for (idx = 0; idx < 4; ++idx) {
- if (this_sad[idx] < best_sad) {
- best_sad = this_sad[idx];
+ if (sad_arr[idx] < best_sad) {
+ best_sad = sad_arr[idx];
tmp_mv->row = search_pos[idx].row + this_mv.row;
tmp_mv->col = search_pos[idx].col + this_mv.col;
}
}
- if (this_sad[0] < this_sad[3])
+ if (sad_arr[0] < sad_arr[3])
this_mv.row -= 1;
else
this_mv.row += 1;
- if (this_sad[1] < this_sad[2])
+ if (sad_arr[1] < sad_arr[2])
this_mv.col -= 1;
else
this_mv.col += 1;
@@ -1805,9 +1805,9 @@
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
-static int full_pixel_diamond(AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
- int step_param, int sadpb, int further_steps,
- int do_refine, int *cost_list,
+static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine, int *cost_list,
const aom_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv) {
MV temp_mv;
@@ -1870,7 +1870,7 @@
#define MIN_INTERVAL 1
// Runs an limited range exhaustive mesh search using a pattern set
// according to the encode speed profile.
-static int full_pixel_exhaustive(AV1_COMP *cpi, MACROBLOCK *x,
+static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
const MV *centre_mv_full, int sadpb,
int *cost_list,
const aom_variance_fn_ptr_t *fn_ptr,
@@ -2243,7 +2243,7 @@
}
#define MIN_EX_SEARCH_LIMIT 128
-static int is_exhaustive_allowed(AV1_COMP *cpi, MACROBLOCK *x) {
+static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
const SPEED_FEATURES *const sf = &cpi->sf;
const int max_ex =
AOMMAX(MIN_EX_SEARCH_LIMIT,
@@ -2254,13 +2254,13 @@
(*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
}
-int av1_full_pixel_search(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int error_per_bit,
int *cost_list, const MV *ref_mv, int var_max,
int rd) {
const SPEED_FEATURES *const sf = &cpi->sf;
const SEARCH_METHODS method = sf->mv.search_method;
- aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+ const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
int var = 0;
if (cost_list) {
@@ -2530,7 +2530,7 @@
}
int av1_find_best_masked_sub_pixel_tree_up(
- AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
+ const AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
@@ -3031,7 +3031,7 @@
}
int av1_find_best_obmc_sub_pixel_tree_up(
- AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
+ const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
const MV *ref_mv, int allow_hp, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 8c42825..e244a3f 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -114,10 +114,10 @@
struct AV1_COMP;
-int av1_full_pixel_search(struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- MV *mvp_full, int step_param, int error_per_bit,
- int *cost_list, const MV *ref_mv, int var_max,
- int rd);
+int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+ int error_per_bit, int *cost_list, const MV *ref_mv,
+ int var_max, int rd);
#if CONFIG_EXT_INTER
int av1_find_best_masked_sub_pixel_tree(
@@ -127,11 +127,11 @@
int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
int is_second);
int av1_find_best_masked_sub_pixel_tree_up(
- struct AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
- int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
- int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
- int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, int is_second, int use_upsampled_ref);
+ const struct AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask,
+ int mask_stride, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv,
+ int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
+ int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref);
int av1_masked_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
MV *mvp_full, int step_param, int sadpb,
@@ -147,8 +147,8 @@
const aom_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv, int is_second);
int av1_find_best_obmc_sub_pixel_tree_up(
- struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
- const MV *ref_mv, int allow_hp, int error_per_bit,
+ const struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
+ MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
int is_second, int use_upsampled_ref);
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 28bdcc3..62303b7 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -351,7 +351,6 @@
memset(A, 0, sizeof(A));
memset(B, 0, sizeof(B));
for (i = 0; i < RESTORATION_WIN; i++) {
- int j;
for (j = 0; j < RESTORATION_WIN; ++j) {
const int jj = wrap_index(j);
A[jj] += Mc[i][j] * b[i];
@@ -399,7 +398,6 @@
memset(A, 0, sizeof(A));
memset(B, 0, sizeof(B));
for (i = 0; i < RESTORATION_WIN; i++) {
- int j;
const int ii = wrap_index(i);
for (j = 0; j < RESTORATION_WIN; j++) A[ii] += Mc[i][j] * a[j];
}
diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c
index d3b8c1c..827e6d8 100644
--- a/av1/encoder/quantize.c
+++ b/av1/encoder/quantize.c
@@ -344,7 +344,7 @@
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc, const QUANT_PARAM *qparam) {
+ const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
@@ -362,7 +362,7 @@
void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd, tran_low_t *dqcoeff_ptr,
- uint16_t *eob_ptr, const scan_order *sc,
+ uint16_t *eob_ptr, const SCAN_ORDER *sc,
const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
@@ -382,7 +382,7 @@
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc, const QUANT_PARAM *qparam) {
+ const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
(void)sc;
@@ -402,7 +402,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
@@ -418,7 +418,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
@@ -434,7 +434,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam) {
// obsolete skip_block
const int skip_block = 0;
@@ -1115,9 +1115,9 @@
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
for (i = 0; i < COEF_BANDS; i++) {
- const int quant = cpi->y_dequant[q][i != 0];
+ const int y_quant = cpi->y_dequant[q][i != 0];
const int uvquant = cpi->uv_dequant[q][i != 0];
- av1_get_dequant_val_nuq(quant, i, cpi->y_dequant_val_nuq[dq][q][i],
+ av1_get_dequant_val_nuq(y_quant, i, cpi->y_dequant_val_nuq[dq][q][i],
quants->y_cuml_bins_nuq[dq][q][i], dq);
av1_get_dequant_val_nuq(uvquant, i, cpi->uv_dequant_val_nuq[dq][q][i],
quants->uv_cuml_bins_nuq[dq][q][i], dq);
diff --git a/av1/encoder/quantize.h b/av1/encoder/quantize.h
index 7394887..1c32ee1 100644
--- a/av1/encoder/quantize.h
+++ b/av1/encoder/quantize.h
@@ -28,7 +28,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
typedef struct {
@@ -80,19 +80,19 @@
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc, const QUANT_PARAM *qparam);
+ const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd, tran_low_t *dqcoeff_ptr,
- uint16_t *eob_ptr, const scan_order *sc,
+ uint16_t *eob_ptr, const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc, const QUANT_PARAM *qparam);
+ const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
#if CONFIG_NEW_QUANT
void quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -128,7 +128,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
@@ -136,7 +136,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
@@ -144,7 +144,7 @@
tran_low_t *qcoeff_ptr,
const MACROBLOCKD_PLANE *pd,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const scan_order *sc,
+ const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
void av1_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index cbc5173..aaca103 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -776,20 +776,20 @@
if (frame_is_intra_only(cm)) {
if (oxcf->rc_mode == AOM_Q) {
- int qindex = cq_level;
- double q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
- int delta_qindex = av1_compute_qdelta(rc, q, q * 0.25, cm->bit_depth);
+ const int qindex = cq_level;
+ const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
+ const int delta_qindex =
+ av1_compute_qdelta(rc, q_val, q_val * 0.25, cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else if (rc->this_key_frame_forced) {
- int qindex = rc->last_boosted_qindex;
- double last_boosted_q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
- int delta_qindex = av1_compute_qdelta(
+ const int qindex = rc->last_boosted_qindex;
+ const double last_boosted_q =
+ av1_convert_qindex_to_q(qindex, cm->bit_depth);
+ const int delta_qindex = av1_compute_qdelta(
rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else {
- // not first frame of one pass and kf_boost is set
+ } else { // not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
- double q_val;
active_best_quality = get_kf_active_quality(
rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
@@ -799,60 +799,56 @@
q_adj_factor -= 0.25;
}
- // Convert the adjustment factor to a qindex delta
- // on active_best_quality.
- q_val = av1_convert_qindex_to_q(active_best_quality, cm->bit_depth);
- active_best_quality +=
- av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ // Convert the adjustment factor to a qindex delta on active_best_quality.
+ {
+ const double q_val =
+ av1_convert_qindex_to_q(active_best_quality, cm->bit_depth);
+ active_best_quality +=
+ av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ }
}
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
- if (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
- q = rc->avg_frame_qindex[INTER_FRAME];
- } else {
- q = rc->avg_frame_qindex[KEY_FRAME];
- }
+ q = (rc->frames_since_key > 1 &&
+ rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+ ? rc->avg_frame_qindex[INTER_FRAME]
+ : rc->avg_frame_qindex[KEY_FRAME];
// For constrained quality dont allow Q less than the cq level
if (oxcf->rc_mode == AOM_CQ) {
if (q < cq_level) q = cq_level;
-
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
-
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
-
} else if (oxcf->rc_mode == AOM_Q) {
- int qindex = cq_level;
- double q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
- int delta_qindex;
- if (cpi->refresh_alt_ref_frame)
- delta_qindex = av1_compute_qdelta(rc, q, q * 0.40, cm->bit_depth);
- else
- delta_qindex = av1_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
+ const int qindex = cq_level;
+ const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
+ const int delta_qindex =
+ (cpi->refresh_alt_ref_frame)
+ ? av1_compute_qdelta(rc, q_val, q_val * 0.40, cm->bit_depth)
+ : av1_compute_qdelta(rc, q_val, q_val * 0.50, cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else {
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == AOM_Q) {
- int qindex = cq_level;
- double q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
- double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
- 0.70, 1.0, 0.85, 1.0 };
- int delta_qindex = av1_compute_qdelta(
- rc, q, q * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
+ const int qindex = cq_level;
+ const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
+ const double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
+ 0.70, 1.0, 0.85, 1.0 };
+ const int delta_qindex = av1_compute_qdelta(
+ rc, q_val,
+ q_val * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else {
// Use the lower of active_worst_quality and recent/average Q.
- if (cm->current_video_frame > 1)
- active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
- else
- active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ active_best_quality = (cm->current_video_frame > 1)
+ ? inter_minq[rc->avg_frame_qindex[INTER_FRAME]]
+ : inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
// For the constrained quality mode we don't want
// q to fall below the cq level.
if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 2b9171f..5015837 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -619,7 +619,7 @@
get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left);
}
-void av1_mv_pred(AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
+void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
int i;
int zero_seen = 0;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 3ca4768..c9d21a8 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -398,8 +398,7 @@
void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
unsigned int qstep, int *rate, int64_t *dist);
-int av1_get_switchable_rate(const struct AV1_COMP *cpi,
- const MACROBLOCKD *const xd);
+int av1_get_switchable_rate(const struct AV1_COMP *cpi, const MACROBLOCKD *xd);
int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
int stride);
@@ -438,8 +437,9 @@
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
-void av1_mv_pred(struct AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
- int ref_y_stride, int ref_frame, BLOCK_SIZE block_size);
+void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
+ uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
+ BLOCK_SIZE block_size);
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
x->errorperbit = rdmult >> RD_EPB_SHIFT;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fef5901..463570a 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -135,7 +135,7 @@
int64_t best_rd;
int exit_early;
int use_fast_coef_costing;
- const scan_order *so;
+ const SCAN_ORDER *scan_order;
uint8_t skippable;
};
@@ -1070,7 +1070,7 @@
static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
struct rdcost_block_args *args) {
return av1_cost_coeffs(args->x, plane, block, coeff_ctx, tx_size,
- args->so->scan, args->so->neighbors,
+ args->scan_order->scan, args->scan_order->neighbors,
args->use_fast_coef_costing);
}
@@ -1130,11 +1130,11 @@
if (args->exit_early) return;
if (!is_inter_block(mbmi)) {
- struct encode_b_args intra_arg = {
+ struct encode_b_args b_args = {
x, NULL, &mbmi->skip, args->t_above, args->t_left, 1
};
av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
- &intra_arg);
+ &b_args);
if (args->cpi->sf.use_transform_domain_distortion) {
dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist,
@@ -1232,7 +1232,8 @@
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
- args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ args.scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
&args);
@@ -1275,7 +1276,8 @@
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
- args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ args.scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
&args);
@@ -1294,10 +1296,10 @@
}
#endif // CONFIG_SUPERTX
-static int64_t txfm_yrd(AV1_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d,
- int *s, int64_t *sse, int64_t ref_best_rd,
+static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *r,
+ int64_t *d, int *s, int64_t *sse, int64_t ref_best_rd,
BLOCK_SIZE bs, TX_TYPE tx_type, int tx_size) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd = INT64_MAX;
@@ -1371,12 +1373,12 @@
return rd;
}
-static int64_t choose_tx_size_fix_type(AV1_COMP *cpi, BLOCK_SIZE bs,
+static int64_t choose_tx_size_fix_type(const AV1_COMP *const cpi, BLOCK_SIZE bs,
MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip,
int64_t *psse, int64_t ref_best_rd,
TX_TYPE tx_type, int prune) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int r, s;
@@ -1423,7 +1425,7 @@
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (evaluate_rect_tx) {
const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
- const int ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
+ ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
if (ext_tx_used_inter[ext_tx_set][tx_type]) {
rd = txfm_yrd(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs, tx_type,
rect_tx_size);
@@ -1494,18 +1496,19 @@
}
#if CONFIG_EXT_INTER
-static int64_t estimate_yrd_for_sb(AV1_COMP *cpi, BLOCK_SIZE bs, MACROBLOCK *x,
- int *r, int64_t *d, int *s, int64_t *sse,
- int64_t ref_best_rd) {
+static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
+ MACROBLOCK *x, int *r, int64_t *d, int *s,
+ int64_t *sse, int64_t ref_best_rd) {
return txfm_yrd(cpi, x, r, d, s, sse, ref_best_rd, bs, DCT_DCT,
max_txsize_lookup[bs]);
}
#endif // CONFIG_EXT_INTER
-static void choose_largest_tx_size(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int64_t *distortion, int *skip, int64_t *sse,
- int64_t ref_best_rd, BLOCK_SIZE bs) {
- AV1_COMMON *const cm = &cpi->common;
+static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skip,
+ int64_t *sse, int64_t ref_best_rd,
+ BLOCK_SIZE bs) {
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
TX_TYPE tx_type, best_tx_type = DCT_DCT;
@@ -1629,8 +1632,8 @@
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
}
-static void choose_smallest_tx_size(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int64_t *distortion, int *skip,
+static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skip,
int64_t *sse, int64_t ref_best_rd,
BLOCK_SIZE bs) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1643,7 +1646,8 @@
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
}
-static void choose_tx_size_type_from_rd(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
+ MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip,
int64_t *psse, int64_t ref_best_rd,
BLOCK_SIZE bs) {
@@ -1693,7 +1697,7 @@
#endif
}
-static void super_block_yrd(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip, int64_t *psse,
BLOCK_SIZE bs, int64_t ref_best_rd) {
MACROBLOCKD *xd = &x->e_mbd;
@@ -1733,7 +1737,7 @@
#if CONFIG_PALETTE
static int rd_pick_palette_intra_sby(
- AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int palette_ctx,
+ const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int palette_ctx,
int dc_mode_cost, PALETTE_MODE_INFO *palette_mode_info,
uint8_t *best_palette_color_map, TX_SIZE *best_tx, TX_TYPE *best_tx_type,
PREDICTION_MODE *mode_selected, int64_t *best_rd) {
@@ -1878,13 +1882,11 @@
}
#endif // CONFIG_PALETTE
-static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
- int col, PREDICTION_MODE *best_mode,
- const int *bmode_costs, ENTROPY_CONTEXT *a,
- ENTROPY_CONTEXT *l, int *bestrate,
- int *bestratey, int64_t *bestdistortion,
- BLOCK_SIZE bsize, int *y_skip,
- int64_t rd_thresh) {
+static int64_t rd_pick_intra4x4block(
+ const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
+ PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
+ ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
+ BLOCK_SIZE bsize, int *y_skip, int64_t rd_thresh) {
PREDICTION_MODE mode;
MACROBLOCKD *const xd = &x->e_mbd;
int64_t best_rd = rd_thresh;
@@ -1946,7 +1948,7 @@
dst_stride, xd->bd);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+ const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
const int coeff_ctx =
combine_entropy_contexts(*(tempa + idx), *(templ + idy));
#if CONFIG_NEW_QUANT
@@ -1956,9 +1958,9 @@
av1_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8,
TX_4X4, AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
- ratey +=
- av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
- so->neighbors, cpi->sf.use_fast_coef_costing);
+ ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+ scan_order->scan, scan_order->neighbors,
+ cpi->sf.use_fast_coef_costing);
*(tempa + idx) = !(p->eobs[block] == 0);
*(templ + idy) = !(p->eobs[block] == 0);
can_skip &= (p->eobs[block] == 0);
@@ -1971,7 +1973,7 @@
int64_t dist;
unsigned int tmp;
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+ const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
const int coeff_ctx =
combine_entropy_contexts(*(tempa + idx), *(templ + idy));
#if CONFIG_NEW_QUANT
@@ -1982,9 +1984,9 @@
TX_4X4, AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
av1_optimize_b(x, 0, block, TX_4X4, coeff_ctx);
- ratey +=
- av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
- so->neighbors, cpi->sf.use_fast_coef_costing);
+ ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+ scan_order->scan, scan_order->neighbors,
+ cpi->sf.use_fast_coef_costing);
*(tempa + idx) = !(p->eobs[block] == 0);
*(templ + idy) = !(p->eobs[block] == 0);
can_skip &= (p->eobs[block] == 0);
@@ -2066,7 +2068,7 @@
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+ const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
const int coeff_ctx =
combine_entropy_contexts(*(tempa + idx), *(templ + idy));
#if CONFIG_NEW_QUANT
@@ -2076,9 +2078,9 @@
av1_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8, TX_4X4,
AV1_XFORM_QUANT_B);
#endif // CONFIG_NEW_QUANT
- ratey +=
- av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
- so->neighbors, cpi->sf.use_fast_coef_costing);
+ ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+ scan_order->scan, scan_order->neighbors,
+ cpi->sf.use_fast_coef_costing);
*(tempa + idx) = !(p->eobs[block] == 0);
*(templ + idy) = !(p->eobs[block] == 0);
can_skip &= (p->eobs[block] == 0);
@@ -2090,7 +2092,7 @@
int64_t dist;
unsigned int tmp;
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
- const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+ const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
const int coeff_ctx =
combine_entropy_contexts(*(tempa + idx), *(templ + idy));
#if CONFIG_NEW_QUANT
@@ -2101,9 +2103,9 @@
AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
av1_optimize_b(x, 0, block, TX_4X4, coeff_ctx);
- ratey +=
- av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
- so->neighbors, cpi->sf.use_fast_coef_costing);
+ ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+ scan_order->scan, scan_order->neighbors,
+ cpi->sf.use_fast_coef_costing);
*(tempa + idx) = !(p->eobs[block] == 0);
*(templ + idy) = !(p->eobs[block] == 0);
can_skip &= (p->eobs[block] == 0);
@@ -2151,10 +2153,10 @@
return best_rd;
}
-static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
- int *rate, int *rate_y,
- int64_t *distortion, int *y_skip,
- int64_t best_rd) {
+static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
+ MACROBLOCK *mb, int *rate,
+ int *rate_y, int64_t *distortion,
+ int *y_skip, int64_t best_rd) {
int i, j;
const MACROBLOCKD *const xd = &mb->e_mbd;
MODE_INFO *const mic = xd->mi[0];
@@ -2247,11 +2249,11 @@
#if CONFIG_EXT_INTRA
// Return 1 if an ext intra mode is selected; return 0 otherwise.
-static int rd_pick_ext_intra_sby(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int mode_cost, int64_t *best_rd,
- uint16_t skip_mask) {
+static int rd_pick_ext_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int mode_cost,
+ int64_t *best_rd, uint16_t skip_mask) {
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
@@ -2310,7 +2312,7 @@
}
static void pick_intra_angle_routine_sby(
- AV1_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
+ const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable, int *best_angle_delta,
TX_SIZE *best_tx_size, TX_TYPE *best_tx_type, INTRA_FILTER *best_filter,
BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
@@ -2337,10 +2339,11 @@
}
}
-static int64_t rd_pick_intra_angle_sby(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int rate_overhead, int64_t best_rd) {
+static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
@@ -2496,7 +2499,7 @@
static void angle_estimation(const uint8_t *src, int src_stride, int rows,
int cols, uint8_t *directional_mode_skip_mask) {
- int i, r, c, index, dx, dy, temp, sn, remd, quot;
+ int i, r, c, dx, dy, temp, sn, remd, quot;
uint64_t hist[DIRECTIONAL_MODES];
uint64_t hist_sum = 0;
@@ -2504,6 +2507,7 @@
src += src_stride;
for (r = 1; r < rows; ++r) {
for (c = 1; c < cols; ++c) {
+ uint8_t index;
dx = src[c] - src[c - 1];
dy = src[c] - src[c - src_stride];
temp = dx * dx + dy * dy;
@@ -2526,7 +2530,7 @@
for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
for (i = 0; i < INTRA_MODES; ++i) {
if (i != DC_PRED && i != TM_PRED) {
- int index = mode_to_angle_bin[i];
+ const uint8_t index = mode_to_angle_bin[i];
uint64_t score = 2 * hist[index];
int weight = 2;
if (index > 0) {
@@ -2547,7 +2551,7 @@
static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
int rows, int cols,
uint8_t *directional_mode_skip_mask) {
- int i, r, c, index, dx, dy, temp, sn, remd, quot;
+ int i, r, c, dx, dy, temp, sn, remd, quot;
uint64_t hist[DIRECTIONAL_MODES];
uint64_t hist_sum = 0;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@@ -2556,6 +2560,7 @@
src += src_stride;
for (r = 1; r < rows; ++r) {
for (c = 1; c < cols; ++c) {
+ uint8_t index;
dx = src[c] - src[c - 1];
dy = src[c] - src[c - src_stride];
temp = dx * dx + dy * dy;
@@ -2578,7 +2583,7 @@
for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
for (i = 0; i < INTRA_MODES; ++i) {
if (i != DC_PRED && i != TM_PRED) {
- int index = mode_to_angle_bin[i];
+ const uint8_t index = mode_to_angle_bin[i];
uint64_t score = 2 * hist[index];
int weight = 2;
if (index > 0) {
@@ -2598,10 +2603,10 @@
#endif // CONFIG_EXT_INTRA
// This function is used only for intra_only frames
-static int64_t rd_pick_intra_sby_mode(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int64_t best_rd) {
+static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int64_t best_rd) {
uint8_t mode_idx;
PREDICTION_MODE mode_selected = DC_PRED;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2625,7 +2630,7 @@
int beat_best_rd = 0;
#endif // CONFIG_EXT_INTRA
TX_TYPE best_tx_type = DCT_DCT;
- int *bmode_costs;
+ const int *bmode_costs;
#if CONFIG_PALETTE
PALETTE_MODE_INFO palette_mode_info;
PALETTE_MODE_INFO *const pmi = &mic->mbmi.palette_mode_info;
@@ -2812,7 +2817,7 @@
// Return value 0: early termination triggered, no valid rd cost available;
// 1: rd cost values are valid.
-static int super_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skippable, int64_t *sse,
BLOCK_SIZE bsize, int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2826,7 +2831,6 @@
if (ref_best_rd < 0) is_cost_valid = 0;
if (is_inter_block(mbmi) && is_cost_valid) {
- int plane;
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
av1_subtract_plane(x, bsize, plane);
}
@@ -2879,7 +2883,7 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const scan_order *const scan_order =
+ const SCAN_ORDER *const scan_order =
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
@@ -3109,7 +3113,6 @@
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bsl = b_height_log2_lookup[bsize];
int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
- int i;
int this_rate;
int64_t this_dist;
int64_t this_bsse;
@@ -3248,7 +3251,7 @@
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_rect_tx_allowed(xd, mbmi)) {
int rate_rect_tx, skippable_rect_tx = 0;
- int64_t dist_rect_tx, sse_rect_tx, rd, rd_rect_tx;
+ int64_t dist_rect_tx, sse_rect_tx, rd_rect_tx;
int tx_size_cat = inter_tx_size_cat_lookup[bsize];
TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
TX_SIZE var_tx_size = mbmi->tx_size;
@@ -3529,7 +3532,6 @@
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_inter_block(mbmi) && is_cost_valid) {
- int plane;
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
av1_subtract_plane(x, bsize, plane);
}
@@ -3593,7 +3595,7 @@
#if CONFIG_PALETTE
static void rd_pick_palette_intra_sbuv(
- AV1_COMP *cpi, MACROBLOCK *x, int dc_mode_cost,
+ const AV1_COMP *const cpi, MACROBLOCK *x, int dc_mode_cost,
PALETTE_MODE_INFO *palette_mode_info, uint8_t *best_palette_color_map,
PREDICTION_MODE *mode_selected, int64_t *best_rd, int *rate,
int *rate_tokenonly, int64_t *distortion, int *skippable) {
@@ -3760,10 +3762,10 @@
#if CONFIG_EXT_INTRA
// Return 1 if an ext intra mode is selected; return 0 otherwise.
-static int rd_pick_ext_intra_sbuv(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int64_t *best_rd) {
+static int rd_pick_ext_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int64_t *best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
int ext_intra_selected_flag = 0;
@@ -3813,12 +3815,10 @@
}
}
-static void pick_intra_angle_routine_sbuv(AV1_COMP *cpi, MACROBLOCK *x,
- int *rate, int *rate_tokenonly,
- int64_t *distortion, int *skippable,
- int *best_angle_delta,
- BLOCK_SIZE bsize, int rate_overhead,
- int64_t *best_rd) {
+static void pick_intra_angle_routine_sbuv(
+ const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable, int *best_angle_delta,
+ BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse, this_rd;
@@ -3839,10 +3839,11 @@
}
}
-static int rd_pick_intra_angle_sbuv(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int rate_overhead, int64_t best_rd) {
+static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
int this_rate_tokenonly, this_rate, s;
@@ -3909,10 +3910,10 @@
}
#endif // CONFIG_EXT_INTRA
-static int64_t rd_pick_intra_sbuv_mode(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- TX_SIZE max_tx_size) {
+static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
PREDICTION_MODE mode;
@@ -4039,7 +4040,7 @@
return best_rd;
}
-static void choose_intra_uv_mode(AV1_COMP *cpi, MACROBLOCK *const x,
+static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
TX_SIZE max_tx_size, int *rate_uv,
int *rate_uv_tokenonly, int64_t *dist_uv,
@@ -4052,7 +4053,7 @@
*mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
}
-static int cost_mv_ref(const AV1_COMP *cpi, PREDICTION_MODE mode,
+static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
#if CONFIG_REF_MV && CONFIG_EXT_INTER
int is_compound,
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
@@ -4148,8 +4149,9 @@
2);
#endif // CONFIG_GLOBAL_MOTION
-static int set_and_cost_bmi_mvs(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
- int i, PREDICTION_MODE mode, int_mv this_mv[2],
+static int set_and_cost_bmi_mvs(const AV1_COMP *const cpi, MACROBLOCK *x,
+ MACROBLOCKD *xd, int i, PREDICTION_MODE mode,
+ int_mv this_mv[2],
int_mv frame_mv[MB_MODE_COUNT]
[TOTAL_REFS_PER_FRAME],
int_mv seg_mvs[TOTAL_REFS_PER_FRAME],
@@ -4322,7 +4324,7 @@
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
}
-static int64_t encode_inter_mb_segment(AV1_COMP *cpi, MACROBLOCK *x,
+static int64_t encode_inter_mb_segment(const AV1_COMP *const cpi, MACROBLOCK *x,
int64_t best_yrd, int i, int *labelyrate,
int64_t *distortion, int64_t *sse,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
@@ -4345,7 +4347,7 @@
TX_SIZE tx_size = mi->mbmi.tx_size;
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
- const scan_order *so = get_scan(tx_size, tx_type, 1);
+ const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type, 1);
const int num_4x4_w = num_4x4_blocks_wide_txsize_lookup[tx_size];
const int num_4x4_h = num_4x4_blocks_high_txsize_lookup[tx_size];
@@ -4405,8 +4407,9 @@
&dist, &ssz);
thisdistortion += dist;
thissse += ssz;
- thisrate += av1_cost_coeffs(x, 0, block, coeff_ctx, tx_size, so->scan,
- so->neighbors, cpi->sf.use_fast_coef_costing);
+ thisrate +=
+ av1_cost_coeffs(x, 0, block, coeff_ctx, tx_size, scan_order->scan,
+ scan_order->neighbors, cpi->sf.use_fast_coef_costing);
*(ta + (k & 1)) = !(p->eobs[block] == 0);
*(tl + (k >> 1)) = !(p->eobs[block] == 0);
#if CONFIG_EXT_TX
@@ -4499,7 +4502,7 @@
// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
// TODO(aconverse): Find out if this is still productive then clean up or remove
static int check_best_zero_mv(
- const AV1_COMP *cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
+ const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
#if CONFIG_REF_MV && CONFIG_EXT_INTER
const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
@@ -4601,8 +4604,9 @@
return 1;
}
-static void joint_motion_search(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- int_mv *frame_mv, int mi_row, int mi_col,
+static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
+ int mi_col,
#if CONFIG_EXT_INTER
int_mv *ref_mv_sub8x8[2],
#endif
@@ -4834,7 +4838,7 @@
}
static int64_t rd_pick_best_sub8x8_mode(
- AV1_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
+ const AV1_COMP *const cpi, MACROBLOCK *x, int_mv *best_ref_mv,
int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
int mvthresh,
@@ -4856,7 +4860,7 @@
int k, br = 0, idx, idy;
int64_t bd = 0, block_sse = 0;
PREDICTION_MODE this_mode;
- AV1_COMMON *cm = &cpi->common;
+ const AV1_COMMON *cm = &cpi->common;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const int label_count = 4;
@@ -4917,8 +4921,8 @@
int_mv mode_mv[MB_MODE_COUNT][2];
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
PREDICTION_MODE mode_selected = ZEROMV;
- int64_t best_rd = INT64_MAX;
- const int i = idy * 2 + idx;
+ int64_t new_best_rd = INT64_MAX;
+ const int index = idy * 2 + idx;
int ref;
#if CONFIG_REF_MV
CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
@@ -4933,7 +4937,7 @@
const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
#if CONFIG_EXT_INTER
int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
- av1_update_mv_context(xd, mi, frame, mv_ref_list, i, mi_row, mi_col,
+ av1_update_mv_context(xd, mi, frame, mv_ref_list, index, mi_row, mi_col,
NULL);
#endif // CONFIG_EXT_INTER
#if CONFIG_GLOBAL_MOTION
@@ -4942,7 +4946,7 @@
#else // CONFIG_GLOBAL_MOTION
frame_mv[ZEROMV][frame].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
- av1_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
+ av1_append_sub8x8_mvs_for_idx(cm, xd, index, ref, mi_row, mi_col,
#if CONFIG_REF_MV
ref_mv_stack[ref], &ref_mv_count[ref],
#endif
@@ -5017,7 +5021,7 @@
for (ref = 0; ref < 1 + has_second_rf; ++ref)
bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[mv_idx][ref].as_int;
#endif // CONFIG_EXT_INTER
- bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
+ bsi->rdstat[index][mode_idx].brdcost = INT64_MAX;
if (!(inter_mode_mask & (1 << this_mode))) continue;
#if CONFIG_REF_MV
@@ -5025,16 +5029,16 @@
#if !CONFIG_EXT_INTER
if (filter_idx > 0 && this_mode == NEWMV) {
BEST_SEG_INFO *ref_bsi = bsi_buf;
- SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[i][mode_idx];
+ SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
if (has_second_rf) {
- if (seg_mvs[i][mbmi->ref_frame[0]].as_int ==
+ if (seg_mvs[index][mbmi->ref_frame[0]].as_int ==
ref_rdstat->mvs[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV)
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
--run_mv_search;
- if (seg_mvs[i][mbmi->ref_frame[1]].as_int ==
+ if (seg_mvs[index][mbmi->ref_frame[1]].as_int ==
ref_rdstat->mvs[1].as_int &&
ref_rdstat->mvs[1].as_int != INVALID_MV)
if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
@@ -5043,23 +5047,24 @@
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV) {
run_mv_search = 0;
- seg_mvs[i][mbmi->ref_frame[0]].as_int = ref_rdstat->mvs[0].as_int;
+ seg_mvs[index][mbmi->ref_frame[0]].as_int =
+ ref_rdstat->mvs[0].as_int;
}
}
if (run_mv_search != 0 && filter_idx > 1) {
ref_bsi = bsi_buf + 1;
- ref_rdstat = &ref_bsi->rdstat[i][mode_idx];
+ ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
run_mv_search = 2;
if (has_second_rf) {
- if (seg_mvs[i][mbmi->ref_frame[0]].as_int ==
+ if (seg_mvs[index][mbmi->ref_frame[0]].as_int ==
ref_rdstat->mvs[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV)
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
--run_mv_search;
- if (seg_mvs[i][mbmi->ref_frame[1]].as_int ==
+ if (seg_mvs[index][mbmi->ref_frame[1]].as_int ==
ref_rdstat->mvs[1].as_int &&
ref_rdstat->mvs[1].as_int != INVALID_MV)
if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
@@ -5068,7 +5073,7 @@
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV) {
run_mv_search = 0;
- seg_mvs[i][mbmi->ref_frame[0]].as_int =
+ seg_mvs[index][mbmi->ref_frame[0]].as_int =
ref_rdstat->mvs[0].as_int;
}
}
@@ -5088,24 +5093,24 @@
mbmi_ext->compound_mode_context,
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
frame_mv, this_mode, mbmi->ref_frame, bsize,
- i))
+ index))
continue;
memcpy(orig_pre, pd->pre, sizeof(orig_pre));
- memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
- sizeof(bsi->rdstat[i][mode_idx].ta));
- memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
- sizeof(bsi->rdstat[i][mode_idx].tl));
+ memcpy(bsi->rdstat[index][mode_idx].ta, t_above,
+ sizeof(bsi->rdstat[index][mode_idx].ta));
+ memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
+ sizeof(bsi->rdstat[index][mode_idx].tl));
// motion search for newmv (single predictor case only)
if (!has_second_rf &&
#if CONFIG_EXT_INTER
have_newmv_in_inter_mode(this_mode) &&
- (seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+ (seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
av1_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0)
#else
this_mode == NEWMV &&
- (seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+ (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV ||
run_mv_search)
#endif // CONFIG_EXT_INTER
) {
@@ -5122,7 +5127,7 @@
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
- if (best_rd < label_mv_thresh) break;
+ if (new_best_rd < label_mv_thresh) break;
if (cpi->oxcf.mode != BEST) {
#if CONFIG_EXT_INTER
@@ -5130,18 +5135,18 @@
#else
// use previous block's result as next block's MV predictor.
#if !CONFIG_REF_MV
- if (i > 0) {
- bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
- if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
+ if (index > 0) {
+ bsi->mvp.as_int = mi->bmi[index - 1].as_mv[0].as_int;
+ if (index == 2)
+ bsi->mvp.as_int = mi->bmi[index - 2].as_mv[0].as_int;
}
#endif
#endif // CONFIG_EXT_INTER
}
- if (i == 0)
- max_mv = x->max_mv_context[mbmi->ref_frame[0]];
- else
- max_mv =
- AOMMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
+ max_mv = (index == 0) ? (int)x->max_mv_context[mbmi->ref_frame[0]]
+ : AOMMAX(abs(bsi->mvp.as_mv.row),
+ abs(bsi->mvp.as_mv.col)) >>
+ 3;
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
@@ -5168,7 +5173,7 @@
}
// adjust src pointer for this block
- mi_buf_shift(x, i);
+ mi_buf_shift(x, index);
av1_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
@@ -5197,7 +5202,6 @@
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
// Use up-sampled reference frames.
- struct macroblockd_plane *const pd = &xd->plane[0];
struct buf_2d backup_pred = pd->pre[0];
const YV12_BUFFER_CONFIG *upsampled_ref =
get_upsampled_ref(cpi, mbmi->ref_frame[0]);
@@ -5211,7 +5215,7 @@
// adjust pred pointer for this block
pd->pre[0].buf =
- &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, i,
+ &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, index,
pd->pre[0].stride))
<< 3];
@@ -5265,9 +5269,9 @@
// save motion search result for use in compound prediction
#if CONFIG_EXT_INTER
- seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
+ seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
#else
- seg_mvs[i][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
+ seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
#endif // CONFIG_EXT_INTER
}
@@ -5286,11 +5290,11 @@
if (has_second_rf) {
#if CONFIG_EXT_INTER
- if (seg_mvs[i][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
- seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
+ if (seg_mvs[index][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+ seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
#else
- if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
- seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
+ if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+ seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV)
#endif // CONFIG_EXT_INTER
continue;
}
@@ -5312,26 +5316,26 @@
#endif
{
// adjust src pointers
- mi_buf_shift(x, i);
+ mi_buf_shift(x, index);
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
mi_col,
#if CONFIG_EXT_INTER
- bsi->ref_mv, seg_mvs[i][mv_idx],
+ bsi->ref_mv, seg_mvs[index][mv_idx],
#else
- seg_mvs[i],
+ seg_mvs[index],
#endif // CONFIG_EXT_INTER
- &rate_mv, i);
+ &rate_mv, index);
#if CONFIG_EXT_INTER
- compound_seg_newmvs[i][0].as_int =
+ compound_seg_newmvs[index][0].as_int =
frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
- compound_seg_newmvs[i][1].as_int =
+ compound_seg_newmvs[index][1].as_int =
frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
#else
- seg_mvs[i][mbmi->ref_frame[0]].as_int =
+ seg_mvs[index][mbmi->ref_frame[0]].as_int =
frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
- seg_mvs[i][mbmi->ref_frame[1]].as_int =
+ seg_mvs[index][mbmi->ref_frame[1]].as_int =
frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
#endif // CONFIG_EXT_INTER
}
@@ -5339,42 +5343,42 @@
mi_buf_restore(x, orig_src, orig_pre);
}
- bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
- cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv,
+ bsi->rdstat[index][mode_idx].brate = set_and_cost_bmi_mvs(
+ cpi, x, xd, index, this_mode, mode_mv[this_mode], frame_mv,
#if CONFIG_EXT_INTER
- seg_mvs[i][mv_idx], compound_seg_newmvs[i],
+ seg_mvs[index][mv_idx], compound_seg_newmvs[index],
#else
- seg_mvs[i],
+ seg_mvs[index],
#endif // CONFIG_EXT_INTER
bsi->ref_mv, x->nmvjointcost, x->mvcost);
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
- bsi->rdstat[i][mode_idx].mvs[ref].as_int =
+ bsi->rdstat[index][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
if (num_4x4_blocks_wide > 1)
- bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
+ bsi->rdstat[index + 1][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
if (num_4x4_blocks_high > 1)
- bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
+ bsi->rdstat[index + 2][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
#if CONFIG_REF_MV
- bsi->rdstat[i][mode_idx].pred_mv[ref].as_int =
- mi->bmi[i].pred_mv[ref].as_int;
+ bsi->rdstat[index][mode_idx].pred_mv[ref].as_int =
+ mi->bmi[index].pred_mv[ref].as_int;
if (num_4x4_blocks_wide > 1)
- bsi->rdstat[i + 1][mode_idx].pred_mv[ref].as_int =
- mi->bmi[i].pred_mv[ref].as_int;
+ bsi->rdstat[index + 1][mode_idx].pred_mv[ref].as_int =
+ mi->bmi[index].pred_mv[ref].as_int;
if (num_4x4_blocks_high > 1)
- bsi->rdstat[i + 2][mode_idx].pred_mv[ref].as_int =
- mi->bmi[i].pred_mv[ref].as_int;
+ bsi->rdstat[index + 2][mode_idx].pred_mv[ref].as_int =
+ mi->bmi[index].pred_mv[ref].as_int;
#endif
#if CONFIG_EXT_INTER
- bsi->rdstat[i][mode_idx].ref_mv[ref].as_int =
+ bsi->rdstat[index][mode_idx].ref_mv[ref].as_int =
bsi->ref_mv[ref]->as_int;
if (num_4x4_blocks_wide > 1)
- bsi->rdstat[i + 1][mode_idx].ref_mv[ref].as_int =
+ bsi->rdstat[index + 1][mode_idx].ref_mv[ref].as_int =
bsi->ref_mv[ref]->as_int;
if (num_4x4_blocks_high > 1)
- bsi->rdstat[i + 2][mode_idx].ref_mv[ref].as_int =
+ bsi->rdstat[index + 2][mode_idx].ref_mv[ref].as_int =
bsi->ref_mv[ref]->as_int;
#endif // CONFIG_EXT_INTER
}
@@ -5393,17 +5397,18 @@
subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
#if CONFIG_EXT_INTER
if (have_newmv_in_inter_mode(this_mode))
- have_ref &= ((mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
- (bsi->ref_mv[ref]->as_int ==
- ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+ have_ref &=
+ ((mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
+ (bsi->ref_mv[ref]->as_int ==
+ ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
else
#endif // CONFIG_EXT_INTER
have_ref &= mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
+ ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
}
- have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0;
+ have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
if (filter_idx > 1 && !subpelmv && !have_ref) {
ref_bsi = bsi_buf + 1;
@@ -5411,118 +5416,126 @@
for (ref = 0; ref < 1 + has_second_rf; ++ref)
#if CONFIG_EXT_INTER
if (have_newmv_in_inter_mode(this_mode))
- have_ref &= ((mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
- (bsi->ref_mv[ref]->as_int ==
- ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+ have_ref &=
+ ((mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
+ (bsi->ref_mv[ref]->as_int ==
+ ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
else
#endif // CONFIG_EXT_INTER
have_ref &= mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
+ ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
- have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0;
+ have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
}
if (!subpelmv && have_ref &&
- ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
+ ref_bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
#if CONFIG_REF_MV
- bsi->rdstat[i][mode_idx].byrate =
- ref_bsi->rdstat[i][mode_idx].byrate;
- bsi->rdstat[i][mode_idx].bdist = ref_bsi->rdstat[i][mode_idx].bdist;
- bsi->rdstat[i][mode_idx].bsse = ref_bsi->rdstat[i][mode_idx].bsse;
- bsi->rdstat[i][mode_idx].brate +=
- ref_bsi->rdstat[i][mode_idx].byrate;
- bsi->rdstat[i][mode_idx].eobs = ref_bsi->rdstat[i][mode_idx].eobs;
+ bsi->rdstat[index][mode_idx].byrate =
+ ref_bsi->rdstat[index][mode_idx].byrate;
+ bsi->rdstat[index][mode_idx].bdist =
+ ref_bsi->rdstat[index][mode_idx].bdist;
+ bsi->rdstat[index][mode_idx].bsse =
+ ref_bsi->rdstat[index][mode_idx].bsse;
+ bsi->rdstat[index][mode_idx].brate +=
+ ref_bsi->rdstat[index][mode_idx].byrate;
+ bsi->rdstat[index][mode_idx].eobs =
+ ref_bsi->rdstat[index][mode_idx].eobs;
- bsi->rdstat[i][mode_idx].brdcost =
- RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate,
- bsi->rdstat[i][mode_idx].bdist);
+ bsi->rdstat[index][mode_idx].brdcost =
+ RDCOST(x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate,
+ bsi->rdstat[index][mode_idx].bdist);
- memcpy(bsi->rdstat[i][mode_idx].ta, ref_bsi->rdstat[i][mode_idx].ta,
- sizeof(bsi->rdstat[i][mode_idx].ta));
- memcpy(bsi->rdstat[i][mode_idx].tl, ref_bsi->rdstat[i][mode_idx].tl,
- sizeof(bsi->rdstat[i][mode_idx].tl));
+ memcpy(bsi->rdstat[index][mode_idx].ta,
+ ref_bsi->rdstat[index][mode_idx].ta,
+ sizeof(bsi->rdstat[index][mode_idx].ta));
+ memcpy(bsi->rdstat[index][mode_idx].tl,
+ ref_bsi->rdstat[index][mode_idx].tl,
+ sizeof(bsi->rdstat[index][mode_idx].tl));
#else
- memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
- sizeof(SEG_RDSTAT));
+ memcpy(&bsi->rdstat[index][mode_idx],
+ &ref_bsi->rdstat[index][mode_idx], sizeof(SEG_RDSTAT));
#endif
if (num_4x4_blocks_wide > 1)
- bsi->rdstat[i + 1][mode_idx].eobs =
- ref_bsi->rdstat[i + 1][mode_idx].eobs;
+ bsi->rdstat[index + 1][mode_idx].eobs =
+ ref_bsi->rdstat[index + 1][mode_idx].eobs;
if (num_4x4_blocks_high > 1)
- bsi->rdstat[i + 2][mode_idx].eobs =
- ref_bsi->rdstat[i + 2][mode_idx].eobs;
+ bsi->rdstat[index + 2][mode_idx].eobs =
+ ref_bsi->rdstat[index + 2][mode_idx].eobs;
- if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
+ if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
#if CONFIG_REF_MV
// If the NEWMV mode is using the same motion vector as the
// NEARESTMV mode, skip the rest rate-distortion calculations
// and use the inferred motion vector modes.
if (this_mode == NEWMV) {
if (has_second_rf) {
- if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int &&
- bsi->rdstat[i][mode_idx].mvs[1].as_int ==
+ bsi->rdstat[index][mode_idx].mvs[1].as_int ==
bsi->ref_mv[1]->as_int)
continue;
} else {
- if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int)
continue;
}
}
#endif
mode_selected = this_mode;
- best_rd = bsi->rdstat[i][mode_idx].brdcost;
+ new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
}
continue;
}
}
- bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
- cpi, x, bsi->segment_rd - this_segment_rd, i,
- &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
- &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
- bsi->rdstat[i][mode_idx].tl, idy, idx, mi_row, mi_col);
+ bsi->rdstat[index][mode_idx].brdcost = encode_inter_mb_segment(
+ cpi, x, bsi->segment_rd - this_segment_rd, index,
+ &bsi->rdstat[index][mode_idx].byrate,
+ &bsi->rdstat[index][mode_idx].bdist,
+ &bsi->rdstat[index][mode_idx].bsse, bsi->rdstat[index][mode_idx].ta,
+ bsi->rdstat[index][mode_idx].tl, idy, idx, mi_row, mi_col);
- if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
- bsi->rdstat[i][mode_idx].brdcost +=
- RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
- bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
- bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
+ if (bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
+ bsi->rdstat[index][mode_idx].brdcost += RDCOST(
+ x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate, 0);
+ bsi->rdstat[index][mode_idx].brate +=
+ bsi->rdstat[index][mode_idx].byrate;
+ bsi->rdstat[index][mode_idx].eobs = p->eobs[index];
if (num_4x4_blocks_wide > 1)
- bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
+ bsi->rdstat[index + 1][mode_idx].eobs = p->eobs[index + 1];
if (num_4x4_blocks_high > 1)
- bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
+ bsi->rdstat[index + 2][mode_idx].eobs = p->eobs[index + 2];
}
- if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
+ if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
#if CONFIG_REF_MV
// If the NEWMV mode is using the same motion vector as the
// NEARESTMV mode, skip the rest rate-distortion calculations
// and use the inferred motion vector modes.
if (this_mode == NEWMV) {
if (has_second_rf) {
- if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int &&
- bsi->rdstat[i][mode_idx].mvs[1].as_int ==
+ bsi->rdstat[index][mode_idx].mvs[1].as_int ==
bsi->ref_mv[1]->as_int)
continue;
} else {
- if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int)
continue;
}
}
#endif
mode_selected = this_mode;
- best_rd = bsi->rdstat[i][mode_idx].brdcost;
+ new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
}
} /*for each 4x4 mode*/
- if (best_rd == INT64_MAX) {
+ if (new_best_rd == INT64_MAX) {
int iy, midx;
- for (iy = i + 1; iy < 4; ++iy)
+ for (iy = index + 1; iy < 4; ++iy)
#if CONFIG_EXT_INTER
for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
#else
@@ -5534,33 +5547,33 @@
}
mode_idx = INTER_OFFSET(mode_selected);
- memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
- memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
+ memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
+ memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
#if CONFIG_EXT_INTER
mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
- bsi->ref_mv[0]->as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int;
+ bsi->ref_mv[0]->as_int = bsi->rdstat[index][mode_idx].ref_mv[0].as_int;
if (has_second_rf)
- bsi->ref_mv[1]->as_int = bsi->rdstat[i][mode_idx].ref_mv[1].as_int;
+ bsi->ref_mv[1]->as_int = bsi->rdstat[index][mode_idx].ref_mv[1].as_int;
#endif // CONFIG_EXT_INTER
- set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
- frame_mv,
+ set_and_cost_bmi_mvs(cpi, x, xd, index, mode_selected,
+ mode_mv[mode_selected], frame_mv,
#if CONFIG_EXT_INTER
- seg_mvs[i][mv_idx], compound_seg_newmvs[i],
+ seg_mvs[index][mv_idx], compound_seg_newmvs[index],
#else
- seg_mvs[i],
+ seg_mvs[index],
#endif // CONFIG_EXT_INTER
bsi->ref_mv, x->nmvjointcost, x->mvcost);
- br += bsi->rdstat[i][mode_idx].brate;
- bd += bsi->rdstat[i][mode_idx].bdist;
- block_sse += bsi->rdstat[i][mode_idx].bsse;
- segmentyrate += bsi->rdstat[i][mode_idx].byrate;
- this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
+ br += bsi->rdstat[index][mode_idx].brate;
+ bd += bsi->rdstat[index][mode_idx].bdist;
+ block_sse += bsi->rdstat[index][mode_idx].bsse;
+ segmentyrate += bsi->rdstat[index][mode_idx].byrate;
+ this_segment_rd += bsi->rdstat[index][mode_idx].brdcost;
if (this_segment_rd > bsi->segment_rd) {
int iy, midx;
- for (iy = i + 1; iy < 4; ++iy)
+ for (iy = index + 1; iy < 4; ++iy)
#if CONFIG_EXT_INTER
for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
#else
@@ -5772,7 +5785,7 @@
ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
}
-static void setup_buffer_inter(AV1_COMP *cpi, MACROBLOCK *x,
+static void setup_buffer_inter(const AV1_COMP *const cpi, MACROBLOCK *x,
MV_REFERENCE_FRAME ref_frame,
BLOCK_SIZE block_size, int mi_row, int mi_col,
int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
@@ -5817,8 +5830,8 @@
block_size);
}
-static void single_motion_search(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- int mi_row, int mi_col,
+static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
#if CONFIG_EXT_INTER
int ref_idx, int mv_idx,
#endif // CONFIG_EXT_INTER
@@ -5908,9 +5921,9 @@
x->best_mv.as_int = INVALID_MV;
if (scaled_ref_frame) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; ++i)
- xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ int j;
+ for (j = 0; j < MAX_MB_PLANE; ++j)
+ xd->plane[j].pre[ref_idx] = backup_yv12[j];
}
return;
}
@@ -6063,7 +6076,7 @@
}
#if CONFIG_EXT_INTER
-static void do_masked_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
+static void do_masked_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv, int ref_idx,
@@ -6086,6 +6099,7 @@
const YV12_BUFFER_CONFIG *scaled_ref_frame =
av1_get_scaled_ref_frame(cpi, ref);
+ int i;
MV pred_mv[3];
pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
@@ -6097,7 +6111,6 @@
#endif
if (scaled_ref_frame) {
- int i;
// Swap out the reference frame for a version that's been scaled to
// match the resolution of the current frame, allowing the existing
// motion search code to be used without additional modifications.
@@ -6139,7 +6152,6 @@
// prev_mv_sad is not setup for dynamically scaled frames.
if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
- int i;
for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
x->pred_mv[ref].row = 0;
@@ -6147,9 +6159,9 @@
tmp_mv->as_int = INVALID_MV;
if (scaled_ref_frame) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; ++i)
- xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ int j;
+ for (j = 0; j < MAX_MB_PLANE; ++j)
+ xd->plane[j].pre[ref_idx] = backup_yv12[j];
}
return;
}
@@ -6188,18 +6200,17 @@
x->pred_mv[ref] = tmp_mv->as_mv;
if (scaled_ref_frame) {
- int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[ref_idx] = backup_yv12[i];
}
}
-static void do_masked_motion_search_indexed(AV1_COMP *cpi, MACROBLOCK *x,
- int wedge_index, int wedge_sign,
- BLOCK_SIZE bsize, int mi_row,
- int mi_col, int_mv *tmp_mv,
- int *rate_mv, int mv_idx[2],
- int which) {
+static void do_masked_motion_search_indexed(const AV1_COMP *const cpi,
+ MACROBLOCK *x, int wedge_index,
+ int wedge_sign, BLOCK_SIZE bsize,
+ int mi_row, int mi_col,
+ int_mv *tmp_mv, int *rate_mv,
+ int mv_idx[2], int which) {
// NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -6228,7 +6239,7 @@
// However, once established that vector may be usable through the nearest and
// near mv modes to reduce distortion in subsequent blocks and also improve
// visual quality.
-static int discount_newmv_test(const AV1_COMP *cpi, int this_mode,
+static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
int_mv this_mv,
int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
int ref_frame) {
@@ -6594,7 +6605,7 @@
#endif // CONFIG_EXT_INTER
static int64_t handle_inter_mode(
- AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
+ const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
int mi_col,
@@ -6613,7 +6624,7 @@
InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME],
int (*single_skippable)[TOTAL_REFS_PER_FRAME], int64_t *psse,
const int64_t ref_best_rd) {
- AV1_COMMON *cm = &cpi->common;
+ const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
@@ -7108,21 +7119,21 @@
#endif // CONFIG_MOTION_VAR
if (is_comp_pred && is_interinter_wedge_used(bsize)) {
- int rate_sum, rs;
+ int rate_sum, rs2;
int64_t dist_sum;
int64_t best_rd_nowedge = INT64_MAX;
int64_t best_rd_wedge = INT64_MAX;
int tmp_skip_txfm_sb;
int64_t tmp_skip_sse_sb;
- rs = av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+ rs2 = av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
mbmi->use_wedge_interinter = 0;
av1_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
av1_subtract_plane(x, bsize, 0);
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
best_rd_nowedge = rd;
// Disbale wedge search if source variance is small
@@ -7135,8 +7146,8 @@
int strides[1] = { bw };
mbmi->use_wedge_interinter = 1;
- rs = av1_cost_literal(get_interinter_wedge_bits(bsize)) +
- av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
+ rs2 = av1_cost_literal(get_interinter_wedge_bits(bsize)) +
+ av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
av1_build_inter_predictors_for_planes_single_buf(
xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
@@ -7145,7 +7156,7 @@
// Choose the best wedge
best_rd_wedge = pick_interinter_wedge(cpi, x, bsize, pred0, pred1);
- best_rd_wedge += RDCOST(x->rdmult, x->rddiv, rs + rate_mv, 0);
+ best_rd_wedge += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
if (have_newmv_in_inter_mode(this_mode)) {
int_mv tmp_mv[2];
@@ -7176,7 +7187,8 @@
av1_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
+ rd =
+ RDCOST(x->rdmult, x->rddiv, rs2 + tmp_rate_mv + rate_sum, dist_sum);
if (rd < best_rd_wedge) {
best_rd_wedge = rd;
} else {
@@ -7191,7 +7203,7 @@
estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum,
+ rd = RDCOST(x->rdmult, x->rddiv, rs2 + tmp_rate_mv + rate_sum,
dist_sum);
best_rd_wedge = rd;
@@ -7216,7 +7228,7 @@
estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
best_rd_wedge = rd;
if (best_rd_wedge < best_rd_nowedge) {
mbmi->use_wedge_interinter = 1;
@@ -7464,7 +7476,6 @@
for (mbmi->motion_mode = SIMPLE_TRANSLATION;
mbmi->motion_mode < (allow_motvar ? MOTION_MODES : 1);
mbmi->motion_mode++) {
- int64_t tmp_rd;
#if CONFIG_EXT_INTER
int tmp_rate2 = mbmi->motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff
: rate2_nocoeff;
@@ -7752,10 +7763,10 @@
return 0; // The rate-distortion cost will be re-calculated by caller.
}
-void av1_rd_pick_intra_mode_sb(AV1_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd) {
- AV1_COMMON *const cm = &cpi->common;
+void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = xd->plane;
int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
@@ -7800,7 +7811,7 @@
}
// Do we have an internal image edge (e.g. formatting bars).
-int av1_internal_image_edge(AV1_COMP *cpi) {
+int av1_internal_image_edge(const AV1_COMP *cpi) {
return (cpi->oxcf.pass == 2) &&
((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
(cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
@@ -7809,14 +7820,14 @@
// Checks to see if a super block is on a horizontal image edge.
// In most cases this is the "real" edge unless there are formatting
// bars embedded in the stream.
-int av1_active_h_edge(AV1_COMP *cpi, int mi_row, int mi_step) {
+int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
int top_edge = 0;
int bottom_edge = cpi->common.mi_rows;
int is_active_h_edge = 0;
// For two pass account for any formatting bars detected.
if (cpi->oxcf.pass == 2) {
- TWO_PASS *twopass = &cpi->twopass;
+ const TWO_PASS *const twopass = &cpi->twopass;
// The inactive region is specified in MBs not mi units.
// The image edge is in the following MB row.
@@ -7836,14 +7847,14 @@
// Checks to see if a super block is on a vertical image edge.
// In most cases this is the "real" edge unless there are formatting
// bars embedded in the stream.
-int av1_active_v_edge(AV1_COMP *cpi, int mi_col, int mi_step) {
+int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
int left_edge = 0;
int right_edge = cpi->common.mi_cols;
int is_active_v_edge = 0;
// For two pass account for any formatting bars detected.
if (cpi->oxcf.pass == 2) {
- TWO_PASS *twopass = &cpi->twopass;
+ const TWO_PASS *const twopass = &cpi->twopass;
// The inactive region is specified in MBs not mi units.
// The image edge is in the following MB row.
@@ -7863,13 +7874,13 @@
// Checks to see if a super block is at the edge of the active image.
// In most cases this is the "real" edge unless there are formatting
// bars embedded in the stream.
-int av1_active_edge_sb(AV1_COMP *cpi, int mi_row, int mi_col) {
+int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
}
#if CONFIG_PALETTE
-static void restore_uv_color_map(AV1_COMP *cpi, MACROBLOCK *x) {
+static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@@ -7920,10 +7931,10 @@
#if CONFIG_EXT_INTRA
static void pick_ext_intra_interframe(
- AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
- int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
- PREDICTION_MODE *mode_uv, EXT_INTRA_MODE_INFO *ext_intra_mode_info_uv,
- int8_t *uv_angle_delta,
+ const AV1_COMP *const cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+ BLOCK_SIZE bsize, int *rate_uv_intra, int *rate_uv_tokenonly,
+ int64_t *dist_uv, int *skip_uv, PREDICTION_MODE *mode_uv,
+ EXT_INTRA_MODE_INFO *ext_intra_mode_info_uv, int8_t *uv_angle_delta,
#if CONFIG_PALETTE
PALETTE_MODE_INFO *pmi_uv, int palette_ctx,
#endif // CONFIG_PALETTE
@@ -7934,7 +7945,7 @@
int *returnrate_nocoef,
#endif // CONFIG_SUPERTX
int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_COST *rd_cost) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
#if CONFIG_PALETTE
@@ -8096,7 +8107,7 @@
int left_stride);
#endif // CONFIG_MOTION_VAR
-void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
+void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
MACROBLOCK *x, int mi_row, int mi_col,
RD_COST *rd_cost,
#if CONFIG_SUPERTX
@@ -8104,9 +8115,9 @@
#endif // CONFIG_SUPERTX
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
- AV1_COMMON *const cm = &cpi->common;
- RD_OPT *const rd_opt = &cpi->rd;
- SPEED_FEATURES *const sf = &cpi->sf;
+ const AV1_COMMON *const cm = &cpi->common;
+ const RD_OPT *const rd_opt = &cpi->rd;
+ const SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
#if CONFIG_PALETTE
@@ -8160,8 +8171,8 @@
unsigned int best_pred_sse = UINT_MAX;
PREDICTION_MODE best_intra_mode = DC_PRED;
int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
- int64_t dist_uv[TX_SIZES];
- int skip_uv[TX_SIZES];
+ int64_t dist_uvs[TX_SIZES];
+ int skip_uvs[TX_SIZES];
PREDICTION_MODE mode_uv[TX_SIZES];
#if CONFIG_PALETTE
PALETTE_MODE_INFO pmi_uv[TX_SIZES];
@@ -8711,8 +8722,8 @@
[pd->subsampling_y];
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
- &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
- &skip_uv[uv_tx], &mode_uv[uv_tx]);
+ &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
+ &skip_uvs[uv_tx], &mode_uv[uv_tx]);
#if CONFIG_PALETTE
if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
#endif // CONFIG_PALETTE
@@ -8724,8 +8735,8 @@
}
rate_uv = rate_uv_tokenonly[uv_tx];
- distortion_uv = dist_uv[uv_tx];
- skippable = skippable && skip_uv[uv_tx];
+ distortion_uv = dist_uvs[uv_tx];
+ skippable = skippable && skip_uvs[uv_tx];
mbmi->uv_mode = mode_uv[uv_tx];
#if CONFIG_PALETTE
if (cm->allow_screen_content_tools) {
@@ -8943,7 +8954,6 @@
};
int dummy_single_skippable[MB_MODE_COUNT]
[TOTAL_REFS_PER_FRAME] = { { 0 } };
- int dummy_disable_skip = 0;
#if CONFIG_EXT_INTER
int_mv dummy_single_newmvs[2][TOTAL_REFS_PER_FRAME] = { { { 0 } },
{ { 0 } } };
@@ -9325,7 +9335,7 @@
int best_rate_nocoef;
#endif
int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
- int skippable = 0, rate_overhead = 0;
+ int skippable = 0;
TX_SIZE best_tx_size, uv_tx;
TX_TYPE best_tx_type;
PALETTE_MODE_INFO palette_mode_info;
@@ -9333,6 +9343,7 @@
x->palette_buffer->best_palette_color_map;
uint8_t *const color_map = xd->plane[0].color_index_map;
+ rate_overhead = 0;
mbmi->mode = DC_PRED;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
@@ -9358,8 +9369,8 @@
[xd->plane[1].subsampling_y];
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
- &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
- &skip_uv[uv_tx], &mode_uv[uv_tx]);
+ &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
+ &skip_uvs[uv_tx], &mode_uv[uv_tx]);
pmi_uv[uv_tx] = *pmi;
#if CONFIG_EXT_INTRA
ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
@@ -9381,8 +9392,8 @@
ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
}
#endif // CONFIG_EXT_INTRA
- skippable = skippable && skip_uv[uv_tx];
- distortion2 = distortion_y + dist_uv[uv_tx];
+ skippable = skippable && skip_uvs[uv_tx];
+ distortion2 = distortion_y + dist_uvs[uv_tx];
rate2 = rate_y + rate_overhead + rate_uv_intra[uv_tx];
rate2 += ref_costs_single[INTRA_FRAME];
@@ -9427,8 +9438,8 @@
!dc_skipped && best_mode_index >= 0 &&
best_intra_rd < (best_rd + (best_rd >> 3))) {
pick_ext_intra_interframe(
- cpi, x, ctx, bsize, rate_uv_intra, rate_uv_tokenonly, dist_uv, skip_uv,
- mode_uv, ext_intra_mode_info_uv, uv_angle_delta,
+ cpi, x, ctx, bsize, rate_uv_intra, rate_uv_tokenonly, dist_uvs,
+ skip_uvs, mode_uv, ext_intra_mode_info_uv, uv_angle_delta,
#if CONFIG_PALETTE
pmi_uv, palette_ctx,
#endif // CONFIG_PALETTE
@@ -9466,7 +9477,6 @@
#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_REF_MV
if (!comp_pred_mode) {
- int i;
int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
: INT_MAX;
@@ -9496,7 +9506,6 @@
nearmv[1] = frame_mv[NEARMV][refs[1]];
}
#else
- int i;
int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
: INT_MAX;
@@ -9677,12 +9686,12 @@
#endif // CONFIG_PALETTE
}
-void av1_rd_pick_inter_mode_sb_seg_skip(AV1_COMP *cpi, TileDataEnc *tile_data,
- MACROBLOCK *x, RD_COST *rd_cost,
- BLOCK_SIZE bsize,
+void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
+ TileDataEnc *tile_data, MACROBLOCK *x,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
unsigned char segment_id = mbmi->segment_id;
@@ -9813,17 +9822,18 @@
store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
}
-void av1_rd_pick_inter_mode_sub8x8(struct AV1_COMP *cpi, TileDataEnc *tile_data,
- struct macroblock *x, int mi_row, int mi_col,
+void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi,
+ TileDataEnc *tile_data, struct macroblock *x,
+ int mi_row, int mi_col,
struct RD_COST *rd_cost,
#if CONFIG_SUPERTX
int *returnrate_nocoef,
#endif // CONFIG_SUPERTX
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
- AV1_COMMON *const cm = &cpi->common;
- RD_OPT *const rd_opt = &cpi->rd;
- SPEED_FEATURES *const sf = &cpi->sf;
+ const AV1_COMMON *const cm = &cpi->common;
+ const RD_OPT *const rd_opt = &cpi->rd;
+ const SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct segmentation *const seg = &cm->seg;
@@ -9950,7 +9960,6 @@
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable = 0;
- int i;
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 584c439..16afaf3 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -29,20 +29,20 @@
int av1_cost_coeffs(MACROBLOCK *x, int plane, int block, int coeff_ctx,
TX_SIZE tx_size, const int16_t *scan, const int16_t *nb,
int use_fast_coef_costing);
-void av1_rd_pick_intra_mode_sb(struct AV1_COMP *cpi, struct macroblock *x,
+void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd);
-unsigned int av1_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs);
#if CONFIG_AOM_HIGHBITDEPTH
-unsigned int av1_high_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs, int bd);
#endif
-void av1_rd_pick_inter_mode_sb(struct AV1_COMP *cpi,
+void av1_rd_pick_inter_mode_sb(const struct AV1_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x, int mi_row, int mi_col,
struct RD_COST *rd_cost,
@@ -53,16 +53,16 @@
int64_t best_rd_so_far);
void av1_rd_pick_inter_mode_sb_seg_skip(
- struct AV1_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x,
- struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far);
+ const struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
+ struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
-int av1_internal_image_edge(struct AV1_COMP *cpi);
-int av1_active_h_edge(struct AV1_COMP *cpi, int mi_row, int mi_step);
-int av1_active_v_edge(struct AV1_COMP *cpi, int mi_col, int mi_step);
-int av1_active_edge_sb(struct AV1_COMP *cpi, int mi_row, int mi_col);
+int av1_internal_image_edge(const struct AV1_COMP *cpi);
+int av1_active_h_edge(const struct AV1_COMP *cpi, int mi_row, int mi_step);
+int av1_active_v_edge(const struct AV1_COMP *cpi, int mi_col, int mi_step);
+int av1_active_edge_sb(const struct AV1_COMP *cpi, int mi_row, int mi_col);
-void av1_rd_pick_inter_mode_sub8x8(struct AV1_COMP *cpi,
+void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x, int mi_row, int mi_col,
struct RD_COST *rd_cost,
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index dafb6ff..2285e46 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -336,7 +336,7 @@
((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
for (mb_col = 0; mb_col < mb_cols; mb_col++) {
- int i, j, k;
+ int j, k;
int stride;
memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 173c934..e95e52b 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -344,7 +344,7 @@
#endif // !CONFIG_ANS
struct tokenize_b_args {
- AV1_COMP *cpi;
+ const AV1_COMP *cpi;
ThreadData *td;
TOKENEXTRA **tp;
int this_rate;
@@ -362,11 +362,11 @@
const PLANE_TYPE type = pd->plane_type;
const int ref = is_inter_block(mbmi);
const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
- const scan_order *const so = get_scan(tx_size, tx_type, ref);
+ const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, ref);
int pt = get_entropy_context(tx_size, pd->above_context + blk_col,
pd->left_context + blk_row);
- int rate =
- av1_cost_coeffs(x, plane, block, pt, tx_size, so->scan, so->neighbors, 0);
+ int rate = av1_cost_coeffs(x, plane, block, pt, tx_size, scan_order->scan,
+ scan_order->neighbors, 0);
args->this_rate += rate;
av1_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, blk_col,
blk_row);
@@ -409,7 +409,7 @@
}
#if CONFIG_PALETTE
-void av1_tokenize_palette_sb(AV1_COMP *cpi, struct ThreadData *const td,
+void av1_tokenize_palette_sb(const AV1_COMP *cpi, struct ThreadData *const td,
int plane, TOKENEXTRA **t, RUN_TYPE dry_run,
BLOCK_SIZE bsize, int *rate) {
MACROBLOCK *const x = &td->mb;
@@ -454,7 +454,7 @@
static void tokenize_b(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct tokenize_b_args *const args = arg;
- AV1_COMP *cpi = args->cpi;
+ const AV1_COMP *cpi = args->cpi;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -476,12 +476,13 @@
#endif // CONFIG_SUEPRTX
const int16_t *scan, *nb;
const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
- const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi));
+ const SCAN_ORDER *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(mbmi));
const int ref = is_inter_block(mbmi);
unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
#if CONFIG_ENTROPY
- aom_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ const aom_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx]
[txsize_sqr_map[tx_size]][type][ref];
#else
@@ -501,8 +502,8 @@
EXTRABIT extra;
pt = get_entropy_context(tx_size, pd->above_context + blk_col,
pd->left_context + blk_row);
- scan = so->scan;
- nb = so->neighbors;
+ scan = scan_order->scan;
+ nb = scan_order->neighbors;
c = 0;
while (c < eob) {
@@ -612,8 +613,7 @@
: mbmi->inter_tx_size[tx_row][tx_col];
if (tx_size == plane_tx_size) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+ plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
if (!dry_run)
tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
else if (dry_run == DRY_RUN_NORMAL)
@@ -641,10 +641,10 @@
}
}
-void av1_tokenize_sb_vartx(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
RUN_TYPE dry_run, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -696,9 +696,9 @@
}
#endif // CONFIG_VAR_TX
-void av1_tokenize_sb(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate) {
- AV1_COMMON *const cm = &cpi->common;
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -732,9 +732,10 @@
}
#if CONFIG_SUPERTX
-void av1_tokenize_sb_supertx(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
- RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate) {
- AV1_COMMON *const cm = &cpi->common;
+void av1_tokenize_sb_supertx(const AV1_COMP *cpi, ThreadData *td,
+ TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+ int *rate) {
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &td->mb.e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
TOKENEXTRA *t_backup = *t;
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 677d041..e869a19 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -66,20 +66,21 @@
// with the coefficient token cost only if dry_run = DRY_RUN_COSTCOEFS,
// otherwise rate is not incremented.
#if CONFIG_VAR_TX
-void av1_tokenize_sb_vartx(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb_vartx(const struct AV1_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
int mi_col, BLOCK_SIZE bsize, int *rate);
#endif
#if CONFIG_PALETTE
-void av1_tokenize_palette_sb(struct AV1_COMP *cpi, struct ThreadData *const td,
- int plane, TOKENEXTRA **t, RUN_TYPE dry_run,
- BLOCK_SIZE bsize, int *rate);
+void av1_tokenize_palette_sb(const struct AV1_COMP *cpi,
+ struct ThreadData *const td, int plane,
+ TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+ int *rate);
#endif // CONFIG_PALETTE
-void av1_tokenize_sb(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
int *rate);
#if CONFIG_SUPERTX
-void av1_tokenize_sb_supertx(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb_supertx(const struct AV1_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
int *rate);
#endif
diff --git a/av1/encoder/variance_tree.h b/av1/encoder/variance_tree.h
index 728d7f4..6397084 100644
--- a/av1/encoder/variance_tree.h
+++ b/av1/encoder/variance_tree.h
@@ -31,12 +31,12 @@
int64_t sum_error;
int log2_count;
int variance;
-} var;
+} VAR;
typedef struct {
- var none;
- var horz[2];
- var vert[2];
+ VAR none;
+ VAR horz[2];
+ VAR vert[2];
} partition_variance;
typedef struct VAR_TREE {
@@ -59,7 +59,7 @@
void av1_free_var_tree(struct ThreadData *td);
// Set variance values given sum square error, sum error, count.
-static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) {
+static INLINE void fill_variance(int64_t s2, int64_t s, int c, VAR *v) {
v->sum_square_error = s2;
v->sum_error = s;
v->log2_count = c;
@@ -69,7 +69,7 @@
v->log2_count);
}
-static INLINE void sum_2_variances(const var *a, const var *b, var *r) {
+static INLINE void sum_2_variances(const VAR *a, const VAR *b, VAR *r) {
assert(a->log2_count == b->log2_count);
fill_variance(a->sum_square_error + b->sum_square_error,
a->sum_error + b->sum_error, a->log2_count + 1, r);
diff --git a/configure b/configure
index f7fb2ca..611756f 100755
--- a/configure
+++ b/configure
@@ -610,12 +610,15 @@
check_add_cflags -Wfloat-conversion
check_add_cflags -Wpointer-arith
check_add_cflags -Wtype-limits
- check_add_cflags -Wcast-qual
check_add_cflags -Wvla
check_add_cflags -Wimplicit-function-declaration
check_add_cflags -Wuninitialized
check_add_cflags -Wunused-variable
check_add_cflags -Wsign-compare
+ # Enabling the following warning for C++ generates some useless warnings
+ # about some function parameters shadowing class member function names.
+ # So, only enable this warning for C code.
+ check_cflags "-Wshadow" && add_cflags_only "-Wshadow"
case ${CC} in
*clang*) ;;
*) check_add_cflags -Wunused-but-set-variable ;;
diff --git a/test/av1_fwd_txfm1d_test.cc b/test/av1_fwd_txfm1d_test.cc
index 03bed19..f671097 100644
--- a/test/av1_fwd_txfm1d_test.cc
+++ b/test/av1_fwd_txfm1d_test.cc
@@ -23,10 +23,15 @@
const TYPE_TXFM txfm_type_ls[2] = { TYPE_DCT, TYPE_ADST };
const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32 };
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+ { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+ av1_fdct64_new },
+#else
{ av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
{ av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
};
diff --git a/test/av1_inv_txfm1d_test.cc b/test/av1_inv_txfm1d_test.cc
index 110d4c3..8470fc0 100644
--- a/test/av1_inv_txfm1d_test.cc
+++ b/test/av1_inv_txfm1d_test.cc
@@ -18,15 +18,25 @@
namespace {
const int txfm_type_num = 2;
const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32 };
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+ { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+ av1_fdct64_new },
+#else
{ av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
{ av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
};
const TxfmFunc inv_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+ { av1_idct4_new, av1_idct8_new, av1_idct16_new, av1_idct32_new,
+ av1_idct64_new },
+#else
{ av1_idct4_new, av1_idct8_new, av1_idct16_new, av1_idct32_new, NULL },
+#endif
{ av1_iadst4_new, av1_iadst8_new, av1_iadst16_new, av1_iadst32_new, NULL }
};
diff --git a/test/av1_inv_txfm_test.cc b/test/av1_inv_txfm_test.cc
index 83a7680..f637d51 100644
--- a/test/av1_inv_txfm_test.cc
+++ b/test/av1_inv_txfm_test.cc
@@ -138,7 +138,6 @@
};
TEST_P(AV1PartialIDctTest, RunQuantCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
int size;
switch (tx_size_) {
case TX_4X4: size = 4; break;
@@ -159,7 +158,7 @@
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
int max_error = 0;
- for (int i = 0; i < count_test_block; ++i) {
+ for (int m = 0; m < count_test_block; ++m) {
// clear out destination buffer
memset(dst1, 0, sizeof(*dst1) * block_size);
memset(dst2, 0, sizeof(*dst2) * block_size);
@@ -168,11 +167,11 @@
ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int i = 0; i < count_test_block; ++i) {
+ for (int n = 0; n < count_test_block; ++n) {
// Initialize a test block with input range [-255, 255].
- if (i == 0) {
+ if (n == 0) {
for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
- } else if (i == 1) {
+ } else if (n == 1) {
for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
} else {
for (int j = 0; j < block_size; ++j) {
diff --git a/test/av1_quantize_test.cc b/test/av1_quantize_test.cc
index db1c969..4e1aabd 100644
--- a/test/av1_quantize_test.cc
+++ b/test/av1_quantize_test.cc
@@ -70,7 +70,7 @@
QuantizeFpFunc quanFunc = params_.qFunc;
QuantizeFpFunc quanFuncRef = params_.qFuncRef;
- const scan_order scanOrder = av1_default_scan_orders[txSize];
+ const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
for (int i = 0; i < numTests; i++) {
int err_count = 0;
ref_eob = eob = -1;
@@ -137,7 +137,7 @@
int log_scale = (txSize == TX_32X32);
QuantizeFpFunc quanFunc = params_.qFunc;
QuantizeFpFunc quanFuncRef = params_.qFuncRef;
- const scan_order scanOrder = av1_default_scan_orders[txSize];
+ const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
for (int i = 0; i < numTests; i++) {
ref_eob = eob = -1;
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
index 1000f58..c0e6d48 100644
--- a/test/boolcoder_test.cc
+++ b/test/boolcoder_test.cc
@@ -68,11 +68,13 @@
aom_stop_encode(&bw);
+#if !CONFIG_DAALA_EC
// First bit should be zero
GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
+#endif
aom_reader br;
- aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
bit_rnd.Reset(random_seed);
for (int i = 0; i < kBitsToTest; ++i) {
if (bit_method == 2) {
@@ -110,15 +112,15 @@
aom_stop_encode(&bw);
aom_reader br;
aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
- ptrdiff_t last_tell = aom_reader_tell(&br);
- ptrdiff_t last_tell_frac = aom_reader_tell_frac(&br);
+ uint32_t last_tell = aom_reader_tell(&br);
+ uint32_t last_tell_frac = aom_reader_tell_frac(&br);
double frac_diff_total = 0;
- GTEST_ASSERT_GE(aom_reader_tell(&br), 0);
- GTEST_ASSERT_LE(aom_reader_tell(&br), 1);
+ GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+ GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
for (int i = 0; i < kSymbols; i++) {
aom_read(&br, p, NULL);
- ptrdiff_t tell = aom_reader_tell(&br);
- ptrdiff_t tell_frac = aom_reader_tell_frac(&br);
+ uint32_t tell = aom_reader_tell(&br);
+ uint32_t tell_frac = aom_reader_tell_frac(&br);
GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell
<< ", last_tell: " << last_tell;
GTEST_ASSERT_GE(tell_frac, last_tell_frac)
@@ -131,7 +133,7 @@
fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
last_tell_frac = tell_frac;
}
- const int expected = (int)(-kSymbols * log2(probability));
+ const uint32_t expected = (uint32_t)(-kSymbols * log2(probability));
// Last tell should be close to the expected value.
GTEST_ASSERT_LE(last_tell - expected, 20) << " last_tell: " << last_tell;
// The average frac_diff error should be pretty small.
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index de1ae04..837b282 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -239,23 +239,24 @@
// Vertical pass (transposed intermediate -> dst).
{
- uint16_t *src_ptr = intermediate_buffer;
+ const uint16_t *interm_ptr = intermediate_buffer;
const int dst_next_row_stride = dst_stride - output_width;
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
// Apply filter...
- const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
- (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
- (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
- (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
- (AV1_FILTER_WEIGHT >> 1); // Rounding
+ const int temp =
+ (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
+ (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
+ (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
+ (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
+ (AV1_FILTER_WEIGHT >> 1); // Rounding
// Normalize back to 0-255...
*dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
- src_ptr += intermediate_height;
+ interm_ptr += intermediate_height;
}
- src_ptr += intermediate_next_stride;
+ interm_ptr += intermediate_next_stride;
dst_ptr += dst_next_row_stride;
}
}
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index 5d814f4..318df19 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -160,7 +160,7 @@
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
#endif // CONFIG_AOM_HIGHBITDEPTH
- for (int j = 0; j < kNumCoeffs; ++j) {
+ for (j = 0; j < kNumCoeffs; ++j) {
err_count += ref_s[j] != s[j];
}
if (err_count && !err_count_total) {
@@ -324,7 +324,7 @@
ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
thresh0, blimit1, limit1, thresh1));
#endif // CONFIG_AOM_HIGHBITDEPTH
- for (int j = 0; j < kNumCoeffs; ++j) {
+ for (j = 0; j < kNumCoeffs; ++j) {
err_count += ref_s[j] != s[j];
}
if (err_count && !err_count_total) {
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index c18f48b..7eedfaf 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -55,7 +55,6 @@
};
TEST_P(PartialIDctTest, RunQuantCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
int size;
switch (tx_size_) {
case TX_4X4: size = 4; break;
@@ -76,7 +75,7 @@
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
int max_error = 0;
- for (int i = 0; i < count_test_block; ++i) {
+ for (int m = 0; m < count_test_block; ++m) {
// clear out destination buffer
memset(dst1, 0, sizeof(*dst1) * block_size);
memset(dst2, 0, sizeof(*dst2) * block_size);
@@ -85,11 +84,11 @@
ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int i = 0; i < count_test_block; ++i) {
+ for (int n = 0; n < count_test_block; ++n) {
// Initialize a test block with input range [-255, 255].
- if (i == 0) {
+ if (n == 0) {
for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
- } else if (i == 1) {
+ } else if (n == 1) {
for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
} else {
for (int j = 0; j < block_size; ++j) {