Merge changes I082959ab,Ib6932640 * changes: vp9/decoder: threaded row-based loop filter vp9/decoder: add thread worker
diff --git a/README b/README index 92cc074..d7cb11a 100644 --- a/README +++ b/README
@@ -1,7 +1,7 @@ vpx Multi-Format Codec SDK -README - 21 June 2012 +README - 1 August 2013 -Welcome to the WebM VP8 Codec SDK! +Welcome to the WebM VP8/VP9 Codec SDK! COMPILING THE APPLICATIONS/LIBRARIES: The build system used is similar to autotools. Building generally consists of @@ -53,33 +53,63 @@ armv5te-android-gcc armv5te-linux-rvct armv5te-linux-gcc + armv5te-none-rvct armv6-darwin-gcc armv6-linux-rvct armv6-linux-gcc + armv6-none-rvct armv7-android-gcc + armv7-darwin-gcc armv7-linux-rvct armv7-linux-gcc + armv7-none-rvct + armv7-win32-vs11 mips32-linux-gcc ppc32-darwin8-gcc ppc32-darwin9-gcc + ppc32-linux-gcc ppc64-darwin8-gcc ppc64-darwin9-gcc ppc64-linux-gcc + sparc-solaris-gcc + x86-android-gcc x86-darwin8-gcc x86-darwin8-icc x86-darwin9-gcc x86-darwin9-icc + x86-darwin10-gcc + x86-darwin11-gcc + x86-darwin12-gcc + x86-darwin13-gcc x86-linux-gcc x86-linux-icc + x86-os2-gcc x86-solaris-gcc + x86-win32-gcc x86-win32-vs7 x86-win32-vs8 + x86-win32-vs9 + x86-win32-vs10 + x86-win32-vs11 x86_64-darwin9-gcc + x86_64-darwin10-gcc + x86_64-darwin11-gcc + x86_64-darwin12-gcc + x86_64-darwin13-gcc x86_64-linux-gcc + x86_64-linux-icc x86_64-solaris-gcc + x86_64-win64-gcc x86_64-win64-vs8 + x86_64-win64-vs9 + x86_64-win64-vs10 + x86_64-win64-vs11 universal-darwin8-gcc universal-darwin9-gcc + universal-darwin10-gcc + universal-darwin11-gcc + universal-darwin12-gcc + universal-darwin13-gcc generic-gnu The generic-gnu target, in conjunction with the CROSS environment variable,
diff --git a/configure b/configure index 3651334..c45f908 100755 --- a/configure +++ b/configure
@@ -257,6 +257,7 @@ install_bins install_libs install_srcs + force_x86inc debug gprof gcov
diff --git a/libs.mk b/libs.mk index 4aa7dc4..ac2e73a 100644 --- a/libs.mk +++ b/libs.mk
@@ -57,6 +57,19 @@ RTCD += $$(BUILD_PFX)$(1).h endef +# x86inc.asm is not compatible with pic 32bit builds. Restrict +# files which use it to 64bit builds or 32bit without pic +USE_X86INC = no +ifneq ($(CONFIG_PIC),yes) + USE_X86INC = yes +endif +ifeq ($(ARCH_X86_64),yes) + USE_X86INC = yes +endif +ifeq ($(CONFIG_FORCE_X86INC),yes) + USE_X86INC = yes +endif + CODEC_SRCS-yes += CHANGELOG CODEC_SRCS-yes += libs.mk
diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 3b72129..b1510c6 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc
@@ -527,9 +527,9 @@ #if HAVE_SSSE3 const ConvolveFunctions convolve8_ssse3( - vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c, - vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c, - vp9_convolve8_ssse3, vp9_convolve8_avg_c); + vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, + vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, + vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3); INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( make_tuple(4, 4, &convolve8_ssse3),
diff --git a/test/vp9_subtract_test.cc b/test/vp9_subtract_test.cc index 3e5fe8d..4e53e77 100644 --- a/test/vp9_subtract_test.cc +++ b/test/vp9_subtract_test.cc
@@ -39,7 +39,7 @@ ACMRandom rnd(ACMRandom::DeterministicSeed()); // FIXME(rbultje) split in its own file - for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES; + for (BLOCK_SIZE_TYPE bsize = BLOCK_4X4; bsize < BLOCK_SIZE_TYPES; bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) { const int block_width = 4 << b_width_log2(bsize); const int block_height = 4 << b_height_log2(bsize);
diff --git a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm index 15039e2..110a56c 100644 --- a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm +++ b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
@@ -52,15 +52,15 @@ ; sp[]int h |vp9_convolve8_avg_horiz_neon| PROC + ldr r12, [sp, #4] ; x_step_q4 + cmp r12, #16 + bne vp9_convolve8_avg_horiz_c + push {r4-r10, lr} sub r0, r0, #3 ; adjust for taps - ldr r4, [sp, #36] ; x_step_q4 ldr r5, [sp, #32] ; filter_x - cmp r4, #16 - bne call_horiz_c_convolve ; x_step_q4 != 16 - ldr r6, [sp, #48] ; w ldr r7, [sp, #52] ; h @@ -82,22 +82,22 @@ mov r10, r6 ; w loop counter loop_horiz - vld4.u8 {d24[0], d25[0], d26[0], d27[0]}, [r0]! - vld4.u8 {d24[4], d25[4], d26[4], d27[4]}, [r0]! + vld1.8 {d24}, [r0]! vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9 - vld4.u8 {d24[1], d25[1], d26[1], d27[1]}, [r0]! - vld4.u8 {d24[5], d25[5], d26[5], d27[5]}, [r0]! + vld1.8 {d25}, [r0]! vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9 - vld4.u8 {d24[2], d25[2], d26[2], d27[2]}, [r0]! - vld4.u8 {d24[6], d25[6], d26[6], d27[6]}, [r0]! + vld1.8 {d26}, [r0]! vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9 - vld4.u8 {d24[3], d25[3], d26[3], d27[3]}, [r0]! - vld4.u8 {d24[7], d25[7], d26[7], d27[7]}, [r0]! + vld1.8 {d27}, [r0]! vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8 + vtrn.16 q12, q13 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + ; extract to s16 vmovl.u8 q8, d24 vmovl.u8 q9, d25 @@ -128,8 +128,8 @@ vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; transpose vtrn.16 d2, d3 @@ -137,10 +137,7 @@ vtrn.8 d2, d3 ; average the new value and the dst value - vaddl.u8 q8, d2, d6 - vaddl.u8 q9, d3, d7 - vqrshrn.u16 d2, q8, #1 - vqrshrn.u16 d3, q9, #1 + vrhadd.u8 q1, q1, q3 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d3[0]}, [r2], r3 @@ -159,26 +156,20 @@ pop {r4-r10, pc} -call_horiz_c_convolve - pop {r4-r10, lr} - add r0, r0, #3 ; un-adjust for taps - b vp9_convolve8_avg_horiz_c - - ENDP |vp9_convolve8_avg_vert_neon| PROC + ldr r12, [sp, #12] + cmp r12, #16 + bne vp9_convolve8_avg_vert_c + push {r4-r10, lr} ; adjust for taps sub r0, r0, r1 sub r0, r0, r1, lsl #1 - ldr r6, [sp, #44] ; y_step_q4 ldr r7, [sp, #40] ; filter_y - cmp r6, #16 - bne call_vert_c_convolve ; y_step_q4 != 16 - ldr r8, [sp, #48] ; w ldr r9, [sp, #52] ; h @@ -240,14 +231,11 @@ vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; average the new value and the dst value - vaddl.u8 q8, d2, d6 - vaddl.u8 q9, d3, d7 - vqrshrn.u16 d2, q8, #1 - vqrshrn.u16 d3, q9, #1 + vrhadd.u8 q1, q1, q3 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d2[1]}, [r2], r3 @@ -266,12 +254,5 @@ pop {r4-r10, pc} -call_vert_c_convolve - pop {r4-r10, lr} - ; un-adjust for taps - add r0, r0, r1 - add r0, r0, r1, lsl #1 - b vp9_convolve8_avg_vert_c - ENDP END
diff --git a/vp9/common/arm/neon/vp9_convolve8_neon.asm b/vp9/common/arm/neon/vp9_convolve8_neon.asm index 842c73c..845e4a8 100644 --- a/vp9/common/arm/neon/vp9_convolve8_neon.asm +++ b/vp9/common/arm/neon/vp9_convolve8_neon.asm
@@ -52,15 +52,15 @@ ; sp[]int h |vp9_convolve8_horiz_neon| PROC + ldr r12, [sp, #4] ; x_step_q4 + cmp r12, #16 + bne vp9_convolve8_horiz_c + push {r4-r10, lr} sub r0, r0, #3 ; adjust for taps - ldr r4, [sp, #36] ; x_step_q4 ldr r5, [sp, #32] ; filter_x - cmp r4, #16 - bne call_horiz_c_convolve ; x_step_q4 != 16 - ldr r6, [sp, #48] ; w ldr r7, [sp, #52] ; h @@ -82,22 +82,22 @@ mov r10, r6 ; w loop counter loop_horiz - vld4.u8 {d24[0], d25[0], d26[0], d27[0]}, [r0]! - vld4.u8 {d24[4], d25[4], d26[4], d27[4]}, [r0]! + vld1.8 {d24}, [r0]! vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9 - vld4.u8 {d24[1], d25[1], d26[1], d27[1]}, [r0]! - vld4.u8 {d24[5], d25[5], d26[5], d27[5]}, [r0]! + vld1.8 {d25}, [r0]! vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9 - vld4.u8 {d24[2], d25[2], d26[2], d27[2]}, [r0]! - vld4.u8 {d24[6], d25[6], d26[6], d27[6]}, [r0]! + vld1.8 {d26}, [r0]! vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9 - vld4.u8 {d24[3], d25[3], d26[3], d27[3]}, [r0]! - vld4.u8 {d24[7], d25[7], d26[7], d27[7]}, [r0]! + vld1.8 {d27}, [r0]! vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8 + vtrn.16 q12, q13 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + ; extract to s16 vmovl.u8 q8, d24 vmovl.u8 q9, d25 @@ -120,8 +120,8 @@ vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 ; transpose vtrn.16 d2, d3 @@ -145,26 +145,20 @@ pop {r4-r10, pc} -call_horiz_c_convolve - pop {r4-r10, lr} - add r0, r0, #3 ; un-adjust for taps - b vp9_convolve8_horiz_c - - ENDP |vp9_convolve8_vert_neon| PROC + ldr r12, [sp, #12] + cmp r12, #16 + bne vp9_convolve8_vert_c + push {r4-r10, lr} ; adjust for taps sub r0, r0, r1 sub r0, r0, r1, lsl #1 - ldr r6, [sp, #44] ; y_step_q4 ldr r7, [sp, #40] ; filter_y - cmp r6, #16 - bne call_vert_c_convolve ; y_step_q4 != 16 - ldr r8, [sp, #48] ; w ldr r9, [sp, #52] ; h @@ -219,8 +213,8 @@ vqrshrun.s32 d5, q15, #7 ; saturate - vqshrn.u16 d2, q1, #0 - vqshrn.u16 d3, q2, #0 + vqmovn.u16 d2, q1 + vqmovn.u16 d3, q2 vst1.u32 {d2[0]}, [r2], r3 vst1.u32 {d2[1]}, [r2], r3 @@ -239,12 +233,5 @@ pop {r4-r10, pc} -call_vert_c_convolve - pop {r4-r10, lr} - ; un-adjust for taps - add r0, r0, r1 - add r0, r0, r1, lsl #1 - b vp9_convolve8_vert_c - ENDP END
diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm new file mode 100644 index 0000000..edf5786 --- /dev/null +++ b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
@@ -0,0 +1,618 @@ +; +; Copyright (c) 2013 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + EXPORT |vp9_mb_lpf_horizontal_edge_w_neon| + EXPORT |vp9_mb_lpf_vertical_edge_w_neon| + ARM + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh +; int count) +; r0 uint8_t *s, +; r1 int p, /* pitch */ +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh, +|vp9_mb_lpf_horizontal_edge_w_neon| PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] ; load thresh + ldr r12, [sp, #92] ; load count + +h_count + vld1.8 {d16[]}, [r2] ; load *blimit + vld1.8 {d17[]}, [r3] ; load *limit + vld1.8 {d18[]}, [r4] ; load *thresh + + sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines + + vld1.u8 {d0}, [r8@64], r1 ; p7 + vld1.u8 {d1}, [r8@64], r1 ; p6 + vld1.u8 {d2}, [r8@64], r1 ; p5 + vld1.u8 {d3}, [r8@64], r1 ; p4 + vld1.u8 {d4}, [r8@64], r1 ; p3 + vld1.u8 {d5}, [r8@64], r1 ; p2 + vld1.u8 {d6}, [r8@64], r1 ; p1 + vld1.u8 {d7}, [r8@64], r1 ; p0 + vld1.u8 {d8}, [r8@64], r1 ; q0 + vld1.u8 {d9}, [r8@64], r1 ; q1 + vld1.u8 {d10}, [r8@64], r1 ; q2 + vld1.u8 {d11}, [r8@64], r1 ; q3 + vld1.u8 {d12}, [r8@64], r1 ; q4 + vld1.u8 {d13}, [r8@64], r1 ; q5 + vld1.u8 {d14}, [r8@64], r1 ; q6 + vld1.u8 {d15}, [r8@64], r1 ; q7 + + bl vp9_wide_mbfilter_neon + + tst r7, #1 + beq h_mbfilter + + ; flat && mask were not set for any of the channels. Just store the values + ; from filter. + sub r8, r0, r1, lsl #1 + + vst1.u8 {d25}, [r8@64], r1 ; store op1 + vst1.u8 {d24}, [r8@64], r1 ; store op0 + vst1.u8 {d23}, [r8@64], r1 ; store oq0 + vst1.u8 {d26}, [r8@64], r1 ; store oq1 + + b h_next + +h_mbfilter + tst r7, #2 + beq h_wide_mbfilter + + ; flat2 was not set for any of the channels. Just store the values from + ; mbfilter. + sub r8, r0, r1, lsl #1 + sub r8, r8, r1 + + vst1.u8 {d18}, [r8@64], r1 ; store op2 + vst1.u8 {d19}, [r8@64], r1 ; store op1 + vst1.u8 {d20}, [r8@64], r1 ; store op0 + vst1.u8 {d21}, [r8@64], r1 ; store oq0 + vst1.u8 {d22}, [r8@64], r1 ; store oq1 + vst1.u8 {d23}, [r8@64], r1 ; store oq2 + + b h_next + +h_wide_mbfilter + sub r8, r0, r1, lsl #3 + add r8, r8, r1 + + vst1.u8 {d16}, [r8@64], r1 ; store op6 + vst1.u8 {d24}, [r8@64], r1 ; store op5 + vst1.u8 {d25}, [r8@64], r1 ; store op4 + vst1.u8 {d26}, [r8@64], r1 ; store op3 + vst1.u8 {d27}, [r8@64], r1 ; store op2 + vst1.u8 {d18}, [r8@64], r1 ; store op1 + vst1.u8 {d19}, [r8@64], r1 ; store op0 + vst1.u8 {d20}, [r8@64], r1 ; store oq0 + vst1.u8 {d21}, [r8@64], r1 ; store oq1 + vst1.u8 {d22}, [r8@64], r1 ; store oq2 + vst1.u8 {d23}, [r8@64], r1 ; store oq3 + vst1.u8 {d1}, [r8@64], r1 ; store oq4 + vst1.u8 {d2}, [r8@64], r1 ; store oq5 + vst1.u8 {d3}, [r8@64], r1 ; store oq6 + +h_next + add r0, r0, #8 + subs r12, r12, #1 + bne h_count + + vpop {d8-d15} + pop {r4-r8, pc} + + ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon| + +; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int p, /* pitch */ +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh, +|vp9_mb_lpf_vertical_edge_w_neon| PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] ; load thresh + + vld1.8 {d16[]}, [r2] ; load *blimit + vld1.8 {d17[]}, [r3] ; load *limit + vld1.8 {d18[]}, [r4] ; load *thresh + + sub r8, r0, #8 + + vld1.8 {d0}, [r8@64], r1 + vld1.8 {d8}, [r0@64], r1 + vld1.8 {d1}, [r8@64], r1 + vld1.8 {d9}, [r0@64], r1 + vld1.8 {d2}, [r8@64], r1 + vld1.8 {d10}, [r0@64], r1 + vld1.8 {d3}, [r8@64], r1 + vld1.8 {d11}, [r0@64], r1 + vld1.8 {d4}, [r8@64], r1 + vld1.8 {d12}, [r0@64], r1 + vld1.8 {d5}, [r8@64], r1 + vld1.8 {d13}, [r0@64], r1 + vld1.8 {d6}, [r8@64], r1 + vld1.8 {d14}, [r0@64], r1 + vld1.8 {d7}, [r8@64], r1 + vld1.8 {d15}, [r0@64], r1 + + sub r0, r0, r1, lsl #3 + + vtrn.32 q0, q2 + vtrn.32 q1, q3 + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + vtrn.8 d0, d1 + vtrn.8 d2, d3 + vtrn.8 d4, d5 + vtrn.8 d6, d7 + + vtrn.8 d8, d9 + vtrn.8 d10, d11 + vtrn.8 d12, d13 + vtrn.8 d14, d15 + + bl vp9_wide_mbfilter_neon + + tst r7, #1 + beq v_mbfilter + + ; flat && mask were not set for any of the channels. Just store the values + ; from filter. + sub r8, r0, #2 + + vswp d23, d25 + + vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 + vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 + vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 + vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 + vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 + vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 + vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 + vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 + + b v_end + +v_mbfilter + tst r7, #2 + beq v_wide_mbfilter + + ; flat2 was not set for any of the channels. Just store the values from + ; mbfilter. + sub r8, r0, #3 + + vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 + vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 + vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 + vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 + vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 + vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 + vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 + vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 + vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 + vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 + vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 + vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 + vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 + vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 + vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 + vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 + + b v_end + +v_wide_mbfilter + sub r8, r0, #8 + + vtrn.32 d0, d26 + vtrn.32 d16, d27 + vtrn.32 d24, d18 + vtrn.32 d25, d19 + + vtrn.16 d0, d24 + vtrn.16 d16, d25 + vtrn.16 d26, d18 + vtrn.16 d27, d19 + + vtrn.8 d0, d16 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + vtrn.8 d18, d19 + + vtrn.32 d20, d1 + vtrn.32 d21, d2 + vtrn.32 d22, d3 + vtrn.32 d23, d15 + + vtrn.16 d20, d22 + vtrn.16 d21, d23 + vtrn.16 d1, d3 + vtrn.16 d2, d15 + + vtrn.8 d20, d21 + vtrn.8 d22, d23 + vtrn.8 d1, d2 + vtrn.8 d3, d15 + + vst1.8 {d0}, [r8@64], r1 + vst1.8 {d20}, [r0@64], r1 + vst1.8 {d16}, [r8@64], r1 + vst1.8 {d21}, [r0@64], r1 + vst1.8 {d24}, [r8@64], r1 + vst1.8 {d22}, [r0@64], r1 + vst1.8 {d25}, [r8@64], r1 + vst1.8 {d23}, [r0@64], r1 + vst1.8 {d26}, [r8@64], r1 + vst1.8 {d1}, [r0@64], r1 + vst1.8 {d27}, [r8@64], r1 + vst1.8 {d2}, [r0@64], r1 + vst1.8 {d18}, [r8@64], r1 + vst1.8 {d3}, [r0@64], r1 + vst1.8 {d19}, [r8@64], r1 + vst1.8 {d15}, [r0@64], r1 + +v_end + vpop {d8-d15} + pop {r4-r8, pc} + + ENDP ; |vp9_mb_lpf_vertical_edge_w_neon| + +; void vp9_wide_mbfilter_neon(); +; This is a helper function for the loopfilters. The invidual functions do the +; necessary load, transpose (if necessary) and store. +; +; r0-r3 PRESERVE +; d16 blimit +; d17 limit +; d18 thresh +; d0 p7 +; d1 p6 +; d2 p5 +; d3 p4 +; d4 p3 +; d5 p2 +; d6 p1 +; d7 p0 +; d8 q0 +; d9 q1 +; d10 q2 +; d11 q3 +; d12 q4 +; d13 q5 +; d14 q6 +; d15 q7 +|vp9_wide_mbfilter_neon| PROC + mov r7, #0 + + ; filter_mask + vabd.u8 d19, d4, d5 ; abs(p3 - p2) + vabd.u8 d20, d5, d6 ; abs(p2 - p1) + vabd.u8 d21, d6, d7 ; abs(p1 - p0) + vabd.u8 d22, d9, d8 ; abs(q1 - q0) + vabd.u8 d23, d10, d9 ; abs(q2 - q1) + vabd.u8 d24, d11, d10 ; abs(q3 - q2) + + ; only compare the largest value to limit + vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) + vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) + vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) + vmax.u8 d19, d19, d20 + + vabd.u8 d24, d7, d8 ; abs(p0 - q0) + + vmax.u8 d19, d19, d23 + + vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) + vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 + + ; abs () > limit + vcge.u8 d19, d17, d19 + + ; flatmask4 + vabd.u8 d25, d7, d5 ; abs(p0 - p2) + vabd.u8 d26, d8, d10 ; abs(q0 - q2) + vabd.u8 d27, d4, d7 ; abs(p3 - p0) + vabd.u8 d28, d11, d8 ; abs(q3 - q0) + + ; only compare the largest value to thresh + vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) + vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) + vmax.u8 d25, d25, d26 + vmax.u8 d20, d20, d25 + + vshr.u8 d23, d23, #1 ; a = a / 2 + vqadd.u8 d24, d24, d23 ; a = b + a + + vmov.u8 d30, #1 + vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 + + vcge.u8 d20, d30, d20 ; flat + + vand d19, d19, d24 ; mask + + ; hevmask + vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 + vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 + vorr d21, d21, d22 ; hev + + vand d16, d20, d19 ; flat && mask + vmov r5, r6, d16 + orrs r5, r5, r6 ; Check for 0 + orreq r7, r7, #1 ; Only do filter branch + + ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) + vabd.u8 d22, d3, d7 ; abs(p4 - p0) + vabd.u8 d23, d12, d8 ; abs(q4 - q0) + vabd.u8 d24, d7, d2 ; abs(p0 - p5) + vabd.u8 d25, d8, d13 ; abs(q0 - q5) + vabd.u8 d26, d1, d7 ; abs(p6 - p0) + vabd.u8 d27, d14, d8 ; abs(q6 - q0) + vabd.u8 d28, d0, d7 ; abs(p7 - p0) + vabd.u8 d29, d15, d8 ; abs(q7 - q0) + + ; only compare the largest value to thresh + vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) + vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) + vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) + vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) + + vmax.u8 d26, d22, d23 + vmax.u8 d27, d24, d25 + vmax.u8 d23, d26, d27 + + vcge.u8 d18, d30, d23 ; flat2 + + vmov.u8 d22, #0x80 + + vand d17, d18, d16 ; flat2 && flat && mask + vmov r5, r6, d17 + orrs r5, r5, r6 ; Check for 0 + orreq r7, r7, #2 ; Only do mbfilter branch + + ; mbfilter() function + + ; filter() function + ; convert to signed + veor d23, d8, d22 ; qs0 + veor d24, d7, d22 ; ps0 + veor d25, d6, d22 ; ps1 + veor d26, d9, d22 ; qs1 + + vmov.u8 d27, #3 + + vsub.s8 d28, d23, d24 ; ( qs0 - ps0) + + vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) + + vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) + + vand d29, d29, d21 ; filter &= hev + + vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) + + vmov.u8 d29, #4 + + ; filter = clamp(filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d28, q15 + + vand d28, d28, d19 ; filter &= mask + + vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) + vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) + vshr.s8 d30, d30, #3 ; filter2 >>= 3 + vshr.s8 d29, d29, #3 ; filter1 >>= 3 + + + vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) + vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) + + ; outer tap adjustments: ++filter1 >> 1 + vrshr.s8 d29, d29, #1 + vbic d29, d29, d21 ; filter &= ~hev + + vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) + vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) + + veor d24, d24, d22 ; *f_op0 = u^0x80 + veor d23, d23, d22 ; *f_oq0 = u^0x80 + veor d25, d25, d22 ; *f_op1 = u^0x80 + veor d26, d26, d22 ; *f_oq1 = u^0x80 + + tst r7, #1 + bxne lr + + ; mbfilter flat && mask branch + ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's + ; and using vibt on the q's? + vmov.u8 d29, #2 + vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 + vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 + vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 + vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 + vqrshrn.u16 d18, q15, #3 ; r_op2 + + vsubw.u8 q15, d4 ; op1 = op2 - p3 + vsubw.u8 q15, d5 ; op1 -= p2 + vaddw.u8 q15, d6 ; op1 += p1 + vaddw.u8 q15, d9 ; op1 += q1 + vqrshrn.u16 d19, q15, #3 ; r_op1 + + vsubw.u8 q15, d4 ; op0 = op1 - p3 + vsubw.u8 q15, d6 ; op0 -= p1 + vaddw.u8 q15, d7 ; op0 += p0 + vaddw.u8 q15, d10 ; op0 += q2 + vqrshrn.u16 d20, q15, #3 ; r_op0 + + vsubw.u8 q15, d4 ; oq0 = op0 - p3 + vsubw.u8 q15, d7 ; oq0 -= p0 + vaddw.u8 q15, d8 ; oq0 += q0 + vaddw.u8 q15, d11 ; oq0 += q3 + vqrshrn.u16 d21, q15, #3 ; r_oq0 + + vsubw.u8 q15, d5 ; oq1 = oq0 - p2 + vsubw.u8 q15, d8 ; oq1 -= q0 + vaddw.u8 q15, d9 ; oq1 += q1 + vaddw.u8 q15, d11 ; oq1 += q3 + vqrshrn.u16 d22, q15, #3 ; r_oq1 + + vsubw.u8 q15, d6 ; oq2 = oq0 - p1 + vsubw.u8 q15, d9 ; oq2 -= q1 + vaddw.u8 q15, d10 ; oq2 += q2 + vaddw.u8 q15, d11 ; oq2 += q3 + vqrshrn.u16 d27, q15, #3 ; r_oq2 + + ; Filter does not set op2 or oq2, so use p2 and q2. + vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) + vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) + vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) + vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) + vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) + + vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) + vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) + + tst r7, #2 + bxne lr + + ; wide_mbfilter flat2 && flat && mask branch + vmov.u8 d16, #7 + vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 + vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 + vmlal.u8 q15, d1, d29 ; op6 += p6 * 2 + vaddw.u8 q15, d2 ; op6 += p5 + vaddw.u8 q15, d3 ; op6 += p4 + vaddw.u8 q15, d4 ; op6 += p3 + vaddw.u8 q15, d5 ; op6 += p2 + vaddw.u8 q15, d6 ; op6 += p1 + vqrshrn.u16 d16, q15, #4 ; w_op6 + + vsubw.u8 q15, d0 ; op5 = op6 - p7 + vsubw.u8 q15, d1 ; op5 -= p6 + vaddw.u8 q15, d2 ; op5 += p5 + vaddw.u8 q15, d9 ; op5 += q1 + vqrshrn.u16 d24, q15, #4 ; w_op5 + + vsubw.u8 q15, d0 ; op4 = op5 - p7 + vsubw.u8 q15, d2 ; op4 -= p5 + vaddw.u8 q15, d3 ; op4 += p4 + vaddw.u8 q15, d10 ; op4 += q2 + vqrshrn.u16 d25, q15, #4 ; w_op4 + + vsubw.u8 q15, d0 ; op3 = op4 - p7 + vsubw.u8 q15, d3 ; op3 -= p4 + vaddw.u8 q15, d4 ; op3 += p3 + vaddw.u8 q15, d11 ; op3 += q3 + vqrshrn.u16 d26, q15, #4 ; w_op3 + + vsubw.u8 q15, d0 ; op2 = op3 - p7 + vsubw.u8 q15, d4 ; op2 -= p3 + vaddw.u8 q15, d5 ; op2 += p2 + vaddw.u8 q15, d12 ; op2 += q4 + vqrshrn.u16 d27, q15, #4 ; w_op2 + + vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) + + vsubw.u8 q15, d0 ; op1 = op2 - p7 + vsubw.u8 q15, d5 ; op1 -= p2 + vaddw.u8 q15, d6 ; op1 += p1 + vaddw.u8 q15, d13 ; op1 += q5 + vqrshrn.u16 d18, q15, #4 ; w_op1 + + vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) + + vsubw.u8 q15, d0 ; op0 = op1 - p7 + vsubw.u8 q15, d6 ; op0 -= p1 + vaddw.u8 q15, d7 ; op0 += p0 + vaddw.u8 q15, d14 ; op0 += q6 + vqrshrn.u16 d19, q15, #4 ; w_op0 + + vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) + + vsubw.u8 q15, d0 ; oq0 = op0 - p7 + vsubw.u8 q15, d7 ; oq0 -= p0 + vaddw.u8 q15, d8 ; oq0 += q0 + vaddw.u8 q15, d15 ; oq0 += q7 + vqrshrn.u16 d20, q15, #4 ; w_oq0 + + vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) + + vsubw.u8 q15, d1 ; oq1 = oq0 - p6 + vsubw.u8 q15, d8 ; oq1 -= q0 + vaddw.u8 q15, d9 ; oq1 += q1 + vaddw.u8 q15, d15 ; oq1 += q7 + vqrshrn.u16 d21, q15, #4 ; w_oq1 + + vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) + + vsubw.u8 q15, d2 ; oq2 = oq1 - p5 + vsubw.u8 q15, d9 ; oq2 -= q1 + vaddw.u8 q15, d10 ; oq2 += q2 + vaddw.u8 q15, d15 ; oq2 += q7 + vqrshrn.u16 d22, q15, #4 ; w_oq2 + + vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) + + vsubw.u8 q15, d3 ; oq3 = oq2 - p4 + vsubw.u8 q15, d10 ; oq3 -= q2 + vaddw.u8 q15, d11 ; oq3 += q3 + vaddw.u8 q15, d15 ; oq3 += q7 + vqrshrn.u16 d23, q15, #4 ; w_oq3 + + vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) + + vsubw.u8 q15, d4 ; oq4 = oq3 - p3 + vsubw.u8 q15, d11 ; oq4 -= q3 + vaddw.u8 q15, d12 ; oq4 += q4 + vaddw.u8 q15, d15 ; oq4 += q7 + vqrshrn.u16 d1, q15, #4 ; w_oq4 + + vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) + + vsubw.u8 q15, d5 ; oq5 = oq4 - p2 + vsubw.u8 q15, d12 ; oq5 -= q4 + vaddw.u8 q15, d13 ; oq5 += q5 + vaddw.u8 q15, d15 ; oq5 += q7 + vqrshrn.u16 d2, q15, #4 ; w_oq5 + + vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) + + vsubw.u8 q15, d6 ; oq6 = oq5 - p1 + vsubw.u8 q15, d13 ; oq6 -= q5 + vaddw.u8 q15, d14 ; oq6 += q6 + vaddw.u8 q15, d15 ; oq6 += q7 + vqrshrn.u16 d3, q15, #4 ; w_oq6 + + vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) + vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) + vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) + vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) + vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) + + bx lr + ENDP ; |vp9_wide_mbfilter_neon| + + END
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 70e3032..f68c5c6 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h
@@ -161,6 +161,11 @@ union b_mode_info bmi[4]; } MODE_INFO; +static int is_inter_block(const MB_MODE_INFO *mbmi) { + return mbmi->ref_frame[0] > INTRA_FRAME; +} + + enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 @@ -283,22 +288,22 @@ static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { switch (subsize) { - case BLOCK_SIZE_SB64X64: - case BLOCK_SIZE_SB64X32: - case BLOCK_SIZE_SB32X64: - case BLOCK_SIZE_SB32X32: + case BLOCK_64X64: + case BLOCK_64X32: + case BLOCK_32X64: + case BLOCK_32X32: return &xd->sb_index; - case BLOCK_SIZE_SB32X16: - case BLOCK_SIZE_SB16X32: - case BLOCK_SIZE_MB16X16: + case BLOCK_32X16: + case BLOCK_16X32: + case BLOCK_16X16: return &xd->mb_index; - case BLOCK_SIZE_SB16X8: - case BLOCK_SIZE_SB8X16: - case BLOCK_SIZE_SB8X8: + case BLOCK_16X8: + case BLOCK_8X16: + case BLOCK_8X8: return &xd->b_index; - case BLOCK_SIZE_SB8X4: - case BLOCK_SIZE_SB4X8: - case BLOCK_SIZE_AB4X4: + case BLOCK_8X4: + case BLOCK_4X8: + case BLOCK_4X4: return &xd->ab_index; default: assert(0); @@ -312,7 +317,7 @@ const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2; const int bwl = b_width_log2(sb_type); const int bhl = b_height_log2(sb_type); - const int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl; + const int boffset = b_width_log2(BLOCK_64X64) - bsl; const char pcval0 = ~(0xe << boffset); const char pcval1 = ~(0xf << boffset); const char pcvalue[2] = {pcval0, pcval1}; @@ -330,7 +335,7 @@ BLOCK_SIZE_TYPE sb_type) { int bsl = mi_width_log2(sb_type), bs = 1 << bsl; int above = 0, left = 0, i; - int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; + int boffset = mi_width_log2(BLOCK_64X64) - bsl; assert(mi_width_log2(sb_type) == mi_height_log2(sb_type)); assert(bsl >= 0); @@ -363,10 +368,10 @@ if (plane_type != PLANE_TYPE_Y_WITH_DC || xd->lossless || - mbmi->ref_frame[0] != INTRA_FRAME) + is_inter_block(mbmi)) return DCT_DCT; - return mode2txfm_map[mbmi->sb_type < BLOCK_SIZE_SB8X8 ? + return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ? mi->bmi[ib].as_mode : mbmi->mode]; } @@ -560,8 +565,8 @@ // size of the predictor to use. int pred_w, pred_h; - if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - assert(bsize == BLOCK_SIZE_SB8X8); + if (xd->mode_info_context->mbmi.sb_type < BLOCK_8X8) { + assert(bsize == BLOCK_8X8); pred_w = 0; pred_h = 0; } else { @@ -686,46 +691,39 @@ } } static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, - int plane, int ss_tx_size, int eob, int aoff, - int loff, ENTROPY_CONTEXT *A, - ENTROPY_CONTEXT *L) { - const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); - const int sw = bw - xd->plane[plane].subsampling_x; - const int sh = bh - xd->plane[plane].subsampling_y; - int mi_blocks_wide = 1 << sw; - int mi_blocks_high = 1 << sh; - int tx_size_in_blocks = (1 << ss_tx_size); + int plane, int tx_size_in_blocks, + int eob, int aoff, int loff, + ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { + struct macroblockd_plane *pd = &xd->plane[plane]; int above_contexts = tx_size_in_blocks; int left_contexts = tx_size_in_blocks; + int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd); + int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd); int pt; // xd->mb_to_right_edge is in units of pixels * 8. This converts // it to 4x4 block sizes. - if (xd->mb_to_right_edge < 0) { - mi_blocks_wide += (xd->mb_to_right_edge - >> (5 + xd->plane[plane].subsampling_x)); - } + if (xd->mb_to_right_edge < 0) + mi_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); // this code attempts to avoid copying into contexts that are outside // our border. Any blocks that do are set to 0... if (above_contexts + aoff > mi_blocks_wide) above_contexts = mi_blocks_wide - aoff; - if (xd->mb_to_bottom_edge < 0) { - mi_blocks_high += (xd->mb_to_bottom_edge - >> (5 + xd->plane[plane].subsampling_y)); - } - if (left_contexts + loff > mi_blocks_high) { + if (xd->mb_to_bottom_edge < 0) + mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + if (left_contexts + loff > mi_blocks_high) left_contexts = mi_blocks_high - loff; - } for (pt = 0; pt < above_contexts; pt++) A[pt] = eob > 0; - for (pt = above_contexts; pt < (1 << ss_tx_size); pt++) + for (pt = above_contexts; pt < tx_size_in_blocks; pt++) A[pt] = 0; for (pt = 0; pt < left_contexts; pt++) L[pt] = eob > 0; - for (pt = left_contexts; pt < (1 << ss_tx_size); pt++) + for (pt = left_contexts; pt < tx_size_in_blocks; pt++) L[pt] = 0; }
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c index 7cd302a..fdf37e4 100644 --- a/vp9/common/vp9_common_data.c +++ b/vp9/common/vp9_common_data.c
@@ -35,6 +35,9 @@ const int size_group_lookup[BLOCK_SIZE_TYPES] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; +const int num_pels_log2_lookup[BLOCK_SIZE_TYPES] = + {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; + const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = { { // 4X4 @@ -45,25 +48,25 @@ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 8X8 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 16X16 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 32X32 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 64X64 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, @@ -73,29 +76,29 @@ const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = { { // PARTITION_NONE - BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB8X4, - BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB16X8, - BLOCK_SIZE_MB16X16, BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB32X16, - BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64, BLOCK_SIZE_SB64X32, - BLOCK_SIZE_SB64X64, + BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, + BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, + BLOCK_64X64, }, { // PARTITION_HORZ BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB64X32, + BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_64X32, }, { // PARTITION_VERT BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB32X64, + BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_32X64, }, { // PARTITION_SPLIT BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_AB4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_MB16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, - BLOCK_SIZE_SB32X32, + BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_32X32, } }; @@ -113,14 +116,9 @@ }; const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = { - {BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8, - BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8}, - {BLOCK_SIZE_SB8X4, BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, - BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB8X16}, - {BLOCK_SIZE_SB16X8, BLOCK_SIZE_SB16X8, BLOCK_SIZE_MB16X16, - BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB16X32}, - {BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, - BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64}, - {BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, - BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X64} + { BLOCK_4X4, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8 }, + { BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X16, BLOCK_8X16 }, + { BLOCK_16X8, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 }, + { BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 }, + { BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 } };
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h index 44ec7ae..bc8c01a 100644 --- a/vp9/common/vp9_common_data.h +++ b/vp9/common/vp9_common_data.h
@@ -22,10 +22,8 @@ extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES]; extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES]; extern const int size_group_lookup[BLOCK_SIZE_TYPES]; - +extern const int num_pels_log2_lookup[BLOCK_SIZE_TYPES]; extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES]; - - extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES]; extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES]; extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index c7733ad..df3a9fe 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c
@@ -419,7 +419,7 @@ init_bit_tree(cat6, 14); } -vp9_extra_bit vp9_extra_bits[12] = { +const vp9_extra_bit vp9_extra_bits[12] = { { 0, 0, 0, 0}, { 0, 0, 0, 1}, { 0, 0, 0, 2},
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 9fa1fc6..861c078 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h
@@ -50,7 +50,7 @@ int base_val; } vp9_extra_bit; -extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */ +extern const vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */ #define MAX_PROB 255 #define DCT_MAX_VALUE 16384
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 9ec3f4f..768e5f5 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c
@@ -356,29 +356,6 @@ vp9_inter_mode_tree, NEARESTMV); } -void vp9_accum_mv_refs(VP9_COMMON *pc, - MB_PREDICTION_MODE m, - const int context) { - unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] = - pc->counts.inter_mode; - - if (m == ZEROMV) { - ++inter_mode_counts[context][0][0]; - } else { - ++inter_mode_counts[context][0][1]; - if (m == NEARESTMV) { - ++inter_mode_counts[context][1][0]; - } else { - ++inter_mode_counts[context][1][1]; - if (m == NEARMV) { - ++inter_mode_counts[context][2][0]; - } else { - ++inter_mode_counts[context][2][1]; - } - } - } -} - #define COUNT_SAT 20 #define MAX_UPDATE_FACTOR 128 @@ -425,10 +402,11 @@ fc->single_ref_prob[i][j] = update_ct2(pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); - for (j = 0; j < INTER_MODE_CONTEXTS; j++) - for (i = 0; i < VP9_INTER_MODES - 1; i++) - fc->inter_mode_probs[j][i] = update_ct2(pre_fc->inter_mode_probs[j][i], - counts->inter_mode[j][i]); + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree, + counts->inter_mode[i], pre_fc->inter_mode_probs[i], + fc->inter_mode_probs[i], NEARESTMV); + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, counts->y_mode[i], pre_fc->y_mode_prob[i], @@ -484,22 +462,24 @@ counts->mbskip[i]); } -static void set_default_lf_deltas(MACROBLOCKD *xd) { - xd->lf.mode_ref_delta_enabled = 1; - xd->lf.mode_ref_delta_update = 1; +static void set_default_lf_deltas(struct loopfilter *lf) { + lf->mode_ref_delta_enabled = 1; + lf->mode_ref_delta_update = 1; - xd->lf.ref_deltas[INTRA_FRAME] = 1; - xd->lf.ref_deltas[LAST_FRAME] = 0; - xd->lf.ref_deltas[GOLDEN_FRAME] = -1; - xd->lf.ref_deltas[ALTREF_FRAME] = -1; + lf->ref_deltas[INTRA_FRAME] = 1; + lf->ref_deltas[LAST_FRAME] = 0; + lf->ref_deltas[GOLDEN_FRAME] = -1; + lf->ref_deltas[ALTREF_FRAME] = -1; - xd->lf.mode_deltas[0] = 0; - xd->lf.mode_deltas[1] = 0; + lf->mode_deltas[0] = 0; + lf->mode_deltas[1] = 0; } void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) { // Reset the segment feature data to the default stats: // Features disabled, 0, with delta coding (Default state). + struct loopfilter *const lf = &xd->lf; + int i; vp9_clearall_segfeatures(&xd->seg); xd->seg.abs_delta = SEGMENT_DELTADATA; @@ -507,12 +487,12 @@ vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); // Reset the mode ref deltas for loop filter - vp9_zero(xd->lf.last_ref_deltas); - vp9_zero(xd->lf.last_mode_deltas); - set_default_lf_deltas(xd); + vp9_zero(lf->last_ref_deltas); + vp9_zero(lf->last_mode_deltas); + set_default_lf_deltas(lf); // To force update of the sharpness - xd->lf.last_sharpness_level = -1; + lf->last_sharpness_level = -1; vp9_default_coef_probs(cm); vp9_init_mbmode_probs(cm);
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index da152cd..17a7c26 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h
@@ -67,10 +67,6 @@ void vp9_adapt_mode_probs(struct VP9Common *); -void vp9_accum_mv_refs(struct VP9Common *pc, - MB_PREDICTION_MODE m, - const int context); - void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p, unsigned int (*ct_32x32p)[2]); void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index 4080f8c..6cfc346 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c
@@ -198,7 +198,7 @@ } -void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) { +void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { int i, j; FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; @@ -207,7 +207,7 @@ nmv_context *pre_ctx = &pre_fc->nmvc; nmv_context_counts *cts = &cm->counts.mv; - vp9_counts_process(cts, usehp); + vp9_counts_process(cts, allow_hp); adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints); @@ -229,7 +229,7 @@ adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].fp, pre_ctx->comps[i].fp, cts->comps[i].fp); - if (usehp) { + if (allow_hp) { ctx->comps[i].class0_hp = adapt_prob(pre_ctx->comps[i].class0_hp, cts->comps[i].class0_hp); ctx->comps[i].hp = adapt_prob(pre_ctx->comps[i].hp, cts->comps[i].hp);
diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 643b229..3af8b8d 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c
@@ -14,8 +14,9 @@ #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_sadmxn.h" -static void lower_mv_precision(int_mv *mv, int usehp) { - if (!usehp || !vp9_use_mv_hp(&mv->as_mv)) { +static void lower_mv_precision(int_mv *mv, int allow_hp) { + const int use_hp = allow_hp && vp9_use_mv_hp(&mv->as_mv); + if (!use_hp) { if (mv->as_mv.row & 1) mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1); if (mv->as_mv.col & 1) @@ -32,7 +33,7 @@ // Make sure all the candidates are properly clamped etc for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { lower_mv_precision(&mvlist[i], xd->allow_high_precision_mv); - clamp_mv2(&mvlist[i], xd); + clamp_mv2(&mvlist[i].as_mv, xd); } *nearest = mvlist[0]; *near = mvlist[1]; @@ -41,7 +42,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int_mv *dst_nearest, int_mv *dst_near, - int block_idx, int ref_idx) { + int block_idx, int ref_idx, + int mi_row, int mi_col) { int_mv dst_list[MAX_MV_REF_CANDIDATES]; int_mv mv_list[MAX_MV_REF_CANDIDATES]; MODE_INFO *mi = xd->mode_info_context; @@ -53,7 +55,8 @@ vp9_find_mv_refs_idx(cm, xd, xd->mode_info_context, xd->prev_mode_info_context, mbmi->ref_frame[ref_idx], - mv_list, cm->ref_frame_sign_bias, block_idx); + mv_list, cm->ref_frame_sign_bias, block_idx, + mi_row, mi_col); dst_list[1].as_int = 0; if (block_idx == 0) {
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index b0fa505..e5221ed 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h
@@ -29,31 +29,19 @@ int_mv *near); // TODO(jingning): this mv clamping function should be block size dependent. -static void clamp_mv(int_mv *mv, - int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) { - mv->as_mv.col = clamp(mv->as_mv.col, mb_to_left_edge, mb_to_right_edge); - mv->as_mv.row = clamp(mv->as_mv.row, mb_to_top_edge, mb_to_bottom_edge); -} - -static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) { - int_mv tmp_mv; - tmp_mv.as_int = mv->as_int; - clamp_mv(mv, - xd->mb_to_left_edge - LEFT_TOP_MARGIN, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); - return tmp_mv.as_int != mv->as_int; +static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, + xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, + xd->mb_to_top_edge - LEFT_TOP_MARGIN, + xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *pc, MACROBLOCKD *xd, int_mv *dst_nearest, int_mv *dst_near, - int block_idx, int ref_idx); + int block_idx, int ref_idx, + int mi_row, int mi_col); static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { // FIXME(rbultje, jingning): temporary hack because jenkins doesn't @@ -62,7 +50,7 @@ /* On L edge, get from MB to left of us */ --cur_mb; - if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) { + if (is_inter_block(&cur_mb->mbmi)) { return DC_PRED; } else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) { return ((cur_mb->bmi + 1 + b)->as_mode); @@ -80,7 +68,7 @@ /* On top edge, get from MB above us */ cur_mb -= mi_stride; - if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) { + if (is_inter_block(&cur_mb->mbmi)) { return DC_PRED; } else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) { return ((cur_mb->bmi + 2 + b)->as_mode);
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 38fec3e..a224525 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c
@@ -1274,29 +1274,3 @@ out = dct_const_round_shift(out * cospi_16_64); output[0] = ROUND_POWER_OF_TWO(out, 6); } - -void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest, - int dest_stride) { - int16_t out[32 * 32] = { 0 }; - int16_t *outptr = out; - int i, j; - int16_t temp_in[32], temp_out[32]; - - // First transform rows. Since all non-zero dct coefficients are in - // upper-left 4x4 area, we only need to calculate first 4 rows here. - for (i = 0; i < 4; ++i) { - idct32_1d(input, outptr); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_1d(temp_in, temp_out); - for (j = 0; j < 32; ++j) - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * dest_stride + i]); - } -}
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index c57f0a5..66df627 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c
@@ -16,6 +16,12 @@ #include "vp9/common/vp9_seg_common.h" +struct loop_filter_info { + const uint8_t *mblim; + const uint8_t *lim; + const uint8_t *hev_thr; +}; + static void lf_init_lut(loop_filter_info_n *lfi) { lfi->mode_lf_lut[DC_PRED] = 0; lfi->mode_lf_lut[D45_PRED] = 0; @@ -73,13 +79,14 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd, int default_filt_lvl) { - int seg; + int seg_id; // n_shift is the a multiplier for lf_deltas // the multiplier is 1 for when filter_lvl is between 0 and 31; // 2 when filter_lvl is between 32 and 63 const int n_shift = default_filt_lvl >> 5; loop_filter_info_n *const lfi = &cm->lf_info; - struct loopfilter *lf = &xd->lf; + struct loopfilter *const lf = &xd->lf; + struct segmentation *const seg = &xd->seg; // update limits if sharpness has changed if (lf->last_sharpness_level != lf->sharpness_level) { @@ -87,13 +94,13 @@ lf->last_sharpness_level = lf->sharpness_level; } - for (seg = 0; seg < MAX_SEGMENTS; seg++) { + for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { int lvl_seg = default_filt_lvl, ref, mode, intra_lvl; // Set the baseline filter values for each segment - if (vp9_segfeature_active(&xd->seg, seg, SEG_LVL_ALT_LF)) { - const int data = vp9_get_segdata(&xd->seg, seg, SEG_LVL_ALT_LF); - lvl_seg = xd->seg.abs_delta == SEGMENT_ABSDATA + if (vp9_segfeature_active(&xd->seg, seg_id, SEG_LVL_ALT_LF)) { + const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); + lvl_seg = seg->abs_delta == SEGMENT_ABSDATA ? data : clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER); } @@ -101,18 +108,18 @@ if (!lf->mode_ref_delta_enabled) { // we could get rid of this if we assume that deltas are set to // zero when not in use; encoder always uses deltas - vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4); + vpx_memset(lfi->lvl[seg_id][0], lvl_seg, 4 * 4); continue; } intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift); - lfi->lvl[seg][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); + lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift) + (lf->mode_deltas[mode] << n_shift); - lfi->lvl[seg][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); + lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); } } } @@ -256,7 +263,7 @@ // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const int skip_this = mi[c].mbmi.mb_skip_coeff - && mi[c].mbmi.ref_frame[0] != INTRA_FRAME; + && is_inter_block(&mi[c].mbmi); // left edge of current unit is block/partition edge -> no skip const int block_edge_left = b_width_log2(mi[c].mbmi.sb_type) ? !(c & ((1 << (b_width_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1;
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index c6fe112..5fc9094 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h
@@ -35,13 +35,6 @@ uint8_t mode_lf_lut[MB_MODE_COUNT]; } loop_filter_info_n; -struct loop_filter_info { - const uint8_t *mblim; - const uint8_t *lim; - const uint8_t *hev_thr; -}; - - /* assorted loopfilter functions which get used elsewhere */ struct VP9Common; struct macroblockd;
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h index a095258..31a79b9 100644 --- a/vp9/common/vp9_mv.h +++ b/vp9/common/vp9_mv.h
@@ -13,6 +13,8 @@ #include "vpx/vpx_integer.h" +#include "vp9/common/vp9_common.h" + typedef struct { int16_t row; int16_t col; @@ -28,4 +30,10 @@ int32_t col; } MV32; +static void clamp_mv(MV *mv, int min_col, int max_col, + int min_row, int max_row) { + mv->col = clamp(mv->col, min_col, max_col); + mv->row = clamp(mv->row, min_row, max_row); +} + #endif // VP9_COMMON_VP9_MV_H_
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index ae009b0..3b72f41 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c
@@ -11,6 +11,65 @@ #include "vp9/common/vp9_mvref_common.h" #define MVREF_NEIGHBOURS 8 + +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +// This is used to figure out a context for the ref blocks. The code flattens +// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by +// adding 9 for each intra block, 3 for each zero mv and 1 for each new +// motion vector. This single number is then converted into a context +// with a single lookup ( counter_to_context ). +static const int mode_2_counter[MB_MODE_COUNT] = { + 9, // DC_PRED + 9, // V_PRED + 9, // H_PRED + 9, // D45_PRED + 9, // D135_PRED + 9, // D117_PRED + 9, // D153_PRED + 9, // D27_PRED + 9, // D63_PRED + 9, // TM_PRED + 0, // NEARESTMV + 0, // NEARMV + 3, // ZEROMV + 1, // NEWMV +}; + +// There are 3^3 different combinations of 3 counts that can be either 0,1 or +// 2. However the actual count can never be greater than 2 so the highest +// counter we need is 18. 9 is an invalid counter that's never used. +static const int counter_to_context[19] = { + BOTH_PREDICTED, // 0 + NEW_PLUS_NON_INTRA, // 1 + BOTH_NEW, // 2 + ZERO_PLUS_PREDICTED, // 3 + NEW_PLUS_NON_INTRA, // 4 + INVALID_CASE, // 5 + BOTH_ZERO, // 6 + INVALID_CASE, // 7 + INVALID_CASE, // 8 + INTRA_PLUS_NON_INTRA, // 9 + INTRA_PLUS_NON_INTRA, // 10 + INVALID_CASE, // 11 + INTRA_PLUS_NON_INTRA, // 12 + INVALID_CASE, // 13 + INVALID_CASE, // 14 + INVALID_CASE, // 15 + INVALID_CASE, // 16 + INVALID_CASE, // 17 + BOTH_INTRA // 18 +}; + static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = { // SB4X4 {{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}}, @@ -39,263 +98,212 @@ // SB64X64 {{3, -1}, {-1, 3}, {4, -1}, {-1, 4}, {-1, -1}, {0, -1}, {-1, 0}, {6, -1}} }; + +static const int idx_n_column_to_subblock[4][2] = { + {1, 2}, + {1, 3}, + {3, 2}, + {3, 3} +}; + // clamp_mv_ref #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) { - mv->as_mv.col = clamp(mv->as_mv.col, xd->mb_to_left_edge - MV_BORDER, - xd->mb_to_right_edge + MV_BORDER); - mv->as_mv.row = clamp(mv->as_mv.row, xd->mb_to_top_edge - MV_BORDER, - xd->mb_to_bottom_edge + MV_BORDER); + clamp_mv(&mv->as_mv, xd->mb_to_left_edge - MV_BORDER, + xd->mb_to_right_edge + MV_BORDER, + xd->mb_to_top_edge - MV_BORDER, + xd->mb_to_bottom_edge + MV_BORDER); } -// Gets a candidate reference motion vector from the given mode info -// structure if one exists that matches the given reference frame. -static int get_matching_candidate(const MODE_INFO *candidate_mi, - MV_REFERENCE_FRAME ref_frame, - int_mv *c_mv, int block_idx) { - if (ref_frame == candidate_mi->mbmi.ref_frame[0]) { - if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) - c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[0].as_int; - else - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - } else if (ref_frame == candidate_mi->mbmi.ref_frame[1]) { - if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) - c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[1].as_int; - else - c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - } else { - return 0; - } - - return 1; -} - -// Gets candidate reference motion vector(s) from the given mode info -// structure if they exists and do NOT match the given reference frame. -static void get_non_matching_candidates(const MODE_INFO *candidate_mi, - MV_REFERENCE_FRAME ref_frame, - MV_REFERENCE_FRAME *c_ref_frame, - int_mv *c_mv, - MV_REFERENCE_FRAME *c2_ref_frame, - int_mv *c2_mv) { - - c_mv->as_int = 0; - c2_mv->as_int = 0; - *c_ref_frame = INTRA_FRAME; - *c2_ref_frame = INTRA_FRAME; - - // If first candidate not valid neither will be. - if (candidate_mi->mbmi.ref_frame[0] > INTRA_FRAME) { - // First candidate - if (candidate_mi->mbmi.ref_frame[0] != ref_frame) { - *c_ref_frame = candidate_mi->mbmi.ref_frame[0]; - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - } - - // Second candidate - if ((candidate_mi->mbmi.ref_frame[1] > INTRA_FRAME) && - (candidate_mi->mbmi.ref_frame[1] != ref_frame) && - (candidate_mi->mbmi.mv[1].as_int != candidate_mi->mbmi.mv[0].as_int)) { - *c2_ref_frame = candidate_mi->mbmi.ref_frame[1]; - c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - } - } +// This function returns either the appropriate sub block or block's mv +// on whether the block_size < 8x8 and we have check_sub_blocks set. +static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, + int check_sub_blocks, int which_mv, + int search_col, int block_idx) { + return (check_sub_blocks && candidate->mbmi.sb_type < BLOCK_SIZE_SB8X8 + ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .as_mv[which_mv] + : candidate->mbmi.mv[which_mv]); } // Performs mv sign inversion if indicated by the reference frame combination. -static void scale_mv(MACROBLOCKD *xd, MV_REFERENCE_FRAME this_ref_frame, - MV_REFERENCE_FRAME candidate_ref_frame, - int_mv *candidate_mv, int *ref_sign_bias) { +static INLINE int_mv scale_mv(const MODE_INFO *candidate, const int which_mv, + const MV_REFERENCE_FRAME this_ref_frame, + const int *ref_sign_bias) { + int_mv return_mv = candidate->mbmi.mv[which_mv]; // Sign inversion where appropriate. - if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) { - candidate_mv->as_mv.row = -candidate_mv->as_mv.row; - candidate_mv->as_mv.col = -candidate_mv->as_mv.col; + if (ref_sign_bias[candidate->mbmi.ref_frame[which_mv]] != + ref_sign_bias[this_ref_frame]) { + return_mv.as_mv.row *= -1; + return_mv.as_mv.col *= -1; } + return return_mv; } -// Add a candidate mv. -// Discard if it has already been seen. -static void add_candidate_mv(int_mv *mv_list, int *mv_scores, - int *candidate_count, int_mv candidate_mv, - int weight) { - if (*candidate_count == 0) { - mv_list[0].as_int = candidate_mv.as_int; - mv_scores[0] = weight; - *candidate_count += 1; - } else if ((*candidate_count == 1) && - (candidate_mv.as_int != mv_list[0].as_int)) { - mv_list[1].as_int = candidate_mv.as_int; - mv_scores[1] = weight; - *candidate_count += 1; +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector it will also +// skip all additional processing and jump to done! +#define ADD_MV_REF_LIST(MV) \ + if (refmv_count) { \ + if ((MV).as_int != mv_ref_list[0].as_int) { \ + mv_ref_list[refmv_count] = (MV); \ + goto Done; \ + } \ + } else { \ + mv_ref_list[refmv_count++] = (MV); \ } + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \ + if ((CANDIDATE)->mbmi.ref_frame[0] != ref_frame) { \ + ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \ + } \ + if ((CANDIDATE)->mbmi.ref_frame[1] != ref_frame && \ + (CANDIDATE)->mbmi.ref_frame[1] > INTRA_FRAME && \ + (CANDIDATE)->mbmi.mv[1].as_int != (CANDIDATE)->mbmi.mv[0].as_int) { \ + ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \ + } + +// Checks that the given mi_row, mi_col and search point +// are inside the borders of the tile. +static INLINE int is_inside(const int mi_col, const int mi_row, + const int cur_tile_mi_col_start, + const int cur_tile_mi_col_end, const int mi_rows, + const int (*mv_ref_search)[2], int idx) { + int mi_search_col; + const int mi_search_row = mi_row + mv_ref_search[idx][1];; + + // Check that the candidate is within the border. We only need to check + // the left side because all the positive right side ones are for blocks that + // are large enough to support the + value they have within their border. + if (mi_search_row < 0) + return 0; + + mi_search_col = mi_col + mv_ref_search[idx][0]; + if (mi_search_col < cur_tile_mi_col_start) + return 0; + + return 1; } // This function searches the neighbourhood of a given MB/SB // to try and find candidate reference vectors. -// void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, - MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int *ref_sign_bias, - int block_idx) { - int i; - MODE_INFO *candidate_mi; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int_mv c_refmv; - int_mv c2_refmv; - MV_REFERENCE_FRAME c_ref_frame; - MV_REFERENCE_FRAME c2_ref_frame; - int candidate_scores[MAX_MV_REF_CANDIDATES] = { 0 }; + const MODE_INFO *lf_here, + const MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, const int *ref_sign_bias, + const int block_idx, + const int mi_row, const int mi_col) { + int idx; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int refmv_count = 0; const int (*mv_ref_search)[2] = mv_ref_blocks[mbmi->sb_type]; - const int mi_col = get_mi_col(xd); - const int mi_row = get_mi_row(xd); - int intra_count = 0; - int zero_count = 0; - int newmv_count = 0; - int x_idx = 0, y_idx = 0; + const MODE_INFO *candidate; + const int check_sub_blocks = block_idx >= 0; + int different_ref_found = 0; + int context_counter = 0; - // Blank the reference vector lists and other local structures. - vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); + // Blank the reference vector list + vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { - x_idx = block_idx & 1; - y_idx = block_idx >> 1; - } + // The nearest 2 blocks are treated differently + // if the size < 8x8 we get the mv from the bmi substructure, + // and we also need to keep a mode count. + for (idx = 0; idx < 2; ++idx) { + if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, + cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx)) + continue; - // We first scan for candidate vectors that match the current reference frame - // Look at nearest neigbours - for (i = 0; i < 2; ++i) { - const int mi_search_col = mi_col + mv_ref_search[i][0]; - const int mi_search_row = mi_row + mv_ref_search[i][1]; - if ((mi_search_col >= cm->cur_tile_mi_col_start) && - (mi_search_col < cm->cur_tile_mi_col_end) && - (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { - int b; + candidate = here + mv_ref_search[idx][0] + + mv_ref_search[idx][1] * xd->mode_info_stride; - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate->mbmi.mode]; - if (block_idx >= 0) { - if (mv_ref_search[i][0]) - b = 1 + y_idx * 2; - else - b = 2 + x_idx; - } else { - b = -1; - } - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, b)) { - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 16); - } - - // Count number of neihgbours coded intra and zeromv - intra_count += (candidate_mi->mbmi.mode < NEARESTMV); - zero_count += (candidate_mi->mbmi.mode == ZEROMV); - newmv_count += (candidate_mi->mbmi.mode >= NEWMV); - } - } - - // More distant neigbours - for (i = 2; (i < MVREF_NEIGHBOURS) && - (refmv_count < MAX_MV_REF_CANDIDATES); ++i) { - const int mi_search_col = mi_col + mv_ref_search[i][0]; - const int mi_search_row = mi_row + mv_ref_search[i][1]; - if ((mi_search_col >= cm->cur_tile_mi_col_start) && - (mi_search_col < cm->cur_tile_mi_col_end) && - (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); - - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) { - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 16); - } - } - } - - // Look in the last frame if it exists - if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) { - candidate_mi = lf_here; - if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) { - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 16); - } - } - - // If we have not found enough candidates consider ones where the - // reference frame does not match. Break out when we have - // MAX_MV_REF_CANDIDATES candidates. - // Look first at spatial neighbours - for (i = 0; (i < MVREF_NEIGHBOURS) && - (refmv_count < MAX_MV_REF_CANDIDATES); ++i) { - const int mi_search_col = mi_col + mv_ref_search[i][0]; - const int mi_search_row = mi_row + mv_ref_search[i][1]; - if ((mi_search_col >= cm->cur_tile_mi_col_start) && - (mi_search_col < cm->cur_tile_mi_col_end) && - (mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) { - candidate_mi = here + mv_ref_search[i][0] + - (mv_ref_search[i][1] * xd->mode_info_stride); - - get_non_matching_candidates(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - if (c_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 1); - } - - if (c2_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c2_refmv, 1); - } - } - } - - // Look at the last frame if it exists - if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) { - candidate_mi = lf_here; - get_non_matching_candidates(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - if (c_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c_refmv, 1); - } - - if (c2_ref_frame != INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); - add_candidate_mv(mv_ref_list, candidate_scores, - &refmv_count, c2_refmv, 1); - } - } - - if (!intra_count) { - if (!newmv_count) { - // 0 = both zero mv - // 1 = one zero mv + one a predicted mv - // 2 = two predicted mvs - mbmi->mb_mode_context[ref_frame] = 2 - zero_count; + // Check if the candidate comes from the same reference frame. + if (candidate->mbmi.ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 0, + mv_ref_search[idx][0], block_idx)); + different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame; } else { - // 3 = one predicted/zero and one new mv - // 4 = two new mvs - mbmi->mb_mode_context[ref_frame] = 2 + newmv_count; + different_ref_found = 1; + if (candidate->mbmi.ref_frame[1] == ref_frame) { + // Add second motion vector if it has the same ref_frame. + ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 1, + mv_ref_search[idx][0], block_idx)); + } } - } else { - // 5 = one intra neighbour + x - // 6 = two intra neighbours - mbmi->mb_mode_context[ref_frame] = 4 + intra_count; } + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; idx < MVREF_NEIGHBOURS; ++idx) { + if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, + cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx)) + continue; + + candidate = here + mv_ref_search[idx][0] + + mv_ref_search[idx][1] * xd->mode_info_stride; + + if (candidate->mbmi.ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(candidate->mbmi.mv[0]); + different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame; + } else { + different_ref_found = 1; + if (candidate->mbmi.ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(candidate->mbmi.mv[1]); + } + } + } + + // Check the last frame's mode and mv info. + if (lf_here != NULL) { + if (lf_here->mbmi.ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(lf_here->mbmi.mv[0]); + } else if (lf_here->mbmi.ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(lf_here->mbmi.mv[1]); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (idx = 0; idx < MVREF_NEIGHBOURS; ++idx) { + if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, + cm->cur_tile_mi_col_end, cm->mi_rows, mv_ref_search, idx)) + continue; + + candidate = here + mv_ref_search[idx][0] + + mv_ref_search[idx][1] * xd->mode_info_stride; + + // If the candidate is INTRA we don't want to consider its mv. + if (candidate->mbmi.ref_frame[0] == INTRA_FRAME) + continue; + + IF_DIFF_REF_FRAME_ADD_MV(candidate); + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (lf_here != NULL && lf_here->mbmi.ref_frame[0] != INTRA_FRAME) { + IF_DIFF_REF_FRAME_ADD_MV(lf_here); + } + + Done: + + mbmi->mb_mode_context[ref_frame] = counter_to_context[context_counter]; + // Clamp vectors - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - clamp_mv_ref(xd, &mv_ref_list[i]); + for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) { + clamp_mv_ref(xd, &mv_ref_list[idx]); } } + +#undef ADD_MV_REF_LIST +#undef IF_DIFF_REF_FRAME_ADD_MV
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index 7290f10..c5f89eb 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h
@@ -17,11 +17,13 @@ void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, - MODE_INFO *lf_here, - MV_REFERENCE_FRAME ref_frame, + const MODE_INFO *lf_here, + const MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, - int *ref_sign_bias, - int block_idx); + const int *ref_sign_bias, + const int block_idx, + const int mi_row, + const int mi_col); static INLINE void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, @@ -29,9 +31,10 @@ MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, - int *ref_sign_bias) { + int *ref_sign_bias, + int mi_row, int mi_col) { vp9_find_mv_refs_idx(cm, xd, here, lf_here, ref_frame, - mv_ref_list, ref_sign_bias, -1); + mv_ref_list, ref_sign_bias, -1, mi_row, mi_col); } #endif // VP9_COMMON_VP9_MVREF_COMMON_H_
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 878ea31..152a932 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h
@@ -64,7 +64,7 @@ [COEF_BANDS][PREV_COEF_CONTEXTS]; unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; - unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2]; + unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES]; unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; unsigned int single_ref[REF_CONTEXTS][2][2]; @@ -240,8 +240,7 @@ xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); } -static int check_bsize_coverage(VP9_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, +static int check_bsize_coverage(VP9_COMMON *cm, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) { int bsl = mi_width_log2(bsize), bs = 1 << bsl; int ms = bs / 2; @@ -278,14 +277,6 @@ xd->right_available = (mi_col + bw < cm->cur_tile_mi_col_end); } -static int get_mi_row(const MACROBLOCKD *xd) { - return ((-xd->mb_to_top_edge) >> (3 + LOG2_MI_SIZE)); -} - -static int get_mi_col(const MACROBLOCKD *xd) { - return ((-xd->mb_to_left_edge) >> (3 + LOG2_MI_SIZE)); -} - static int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (48 * 16 + 4); }
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index e8bcdea..795962a 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c
@@ -55,34 +55,28 @@ } // Returns a context number for the given MB prediction signal unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) { - int pred_context; const MODE_INFO *const mi = xd->mode_info_context; const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi; const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi; const int left_in_image = xd->left_available && left_mbmi->mb_in_image; const int above_in_image = xd->up_available && above_mbmi->mb_in_image; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. - if (above_in_image && left_in_image) { // both edges available - if (left_mbmi->ref_frame[0] == INTRA_FRAME && - above_mbmi->ref_frame[0] == INTRA_FRAME) { // intra/intra (3) - pred_context = 3; - } else { // intra/inter (1) or inter/inter (0) - pred_context = left_mbmi->ref_frame[0] == INTRA_FRAME || - above_mbmi->ref_frame[0] == INTRA_FRAME; - } - } else if (above_in_image || left_in_image) { // one edge available - const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mbmi); - // inter: 0, intra: 2 - pred_context = 2 * (edge_mbmi->ref_frame[0] == INTRA_FRAME); - } else { - pred_context = 0; - } - assert(pred_context >= 0 && pred_context < INTRA_INTER_CONTEXTS); - return pred_context; + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + // 0 - inter/inter, inter/--, --/inter, --/-- + // 1 - intra/inter, inter/intra + // 2 - intra/--, --/intra + // 3 - intra/intra + if (above_in_image && left_in_image) // both edges available + return left_intra && above_intra ? 3 + : left_intra || above_intra; + else if (above_in_image || left_in_image) // one edge available + return 2 * (above_in_image ? above_intra : left_intra); + else + return 0; } // Returns a context number for the given MB prediction signal unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h index e4b6575..238290b 100644 --- a/vp9/common/vp9_pred_common.h +++ b/vp9/common/vp9_pred_common.h
@@ -110,9 +110,9 @@ static const vp9_prob *get_tx_probs(BLOCK_SIZE_TYPE bsize, uint8_t context, const struct tx_probs *tx_probs) { - if (bsize < BLOCK_SIZE_MB16X16) + if (bsize < BLOCK_16X16) return tx_probs->p8x8[context]; - else if (bsize < BLOCK_SIZE_SB32X32) + else if (bsize < BLOCK_32X32) return tx_probs->p16x16[context]; else return tx_probs->p32x32[context]; @@ -127,9 +127,9 @@ static void update_tx_counts(BLOCK_SIZE_TYPE bsize, uint8_t context, TX_SIZE tx_size, struct tx_counts *tx_counts) { - if (bsize >= BLOCK_SIZE_SB32X32) + if (bsize >= BLOCK_32X32) tx_counts->p32x32[context][tx_size]++; - else if (bsize >= BLOCK_SIZE_MB16X16) + else if (bsize >= BLOCK_16X16) tx_counts->p16x16[context][tx_size]++; else tx_counts->p8x8[context][tx_size]++;
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 8c49393..0b65e06 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c
@@ -197,14 +197,14 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *src_mv, + const MV *src_mv, const struct scale_factors *scale, int w, int h, int weight, const struct subpix_fn_table *subpix, enum mv_precision precision) { const MV32 mv = precision == MV_PRECISION_Q4 - ? scale->scale_mv_q4(&src_mv->as_mv, scale) - : scale->scale_mv_q3_to_q4(&src_mv->as_mv, scale); + ? scale->scale_mv_q4(src_mv, scale) + : scale->scale_mv_q3_to_q4(src_mv, scale); const int subpel_x = mv.col & 15; const int subpel_y = mv.row & 15; @@ -220,43 +220,44 @@ return (value < 0 ? value - 2 : value + 2) / 4; } -static int mi_mv_pred_row_q4(const MODE_INFO *mi, int idx) { - return round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + - mi->bmi[1].as_mv[idx].as_mv.row + - mi->bmi[2].as_mv[idx].as_mv.row + - mi->bmi[3].as_mv[idx].as_mv.row); +static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { + MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + + mi->bmi[1].as_mv[idx].as_mv.row + + mi->bmi[2].as_mv[idx].as_mv.row + + mi->bmi[3].as_mv[idx].as_mv.row), + round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + + mi->bmi[3].as_mv[idx].as_mv.col) }; + return res; } -static int mi_mv_pred_col_q4(const MODE_INFO *mi, int idx) { - return round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + - mi->bmi[1].as_mv[idx].as_mv.col + - mi->bmi[2].as_mv[idx].as_mv.col + - mi->bmi[3].as_mv[idx].as_mv.col); -} + // TODO(jkoleszar): yet another mv clamping function :-( MV clamp_mv_to_umv_border_sb(const MV *src_mv, int bwl, int bhl, int ss_x, int ss_y, int mb_to_left_edge, int mb_to_top_edge, int mb_to_right_edge, int mb_to_bottom_edge) { - /* If the MV points so far into the UMV border that no visible pixels - * are used for reconstruction, the subpel part of the MV can be - * discarded and the MV limited to 16 pixels with equivalent results. - */ + // If the MV points so far into the UMV border that no visible pixels + // are used for reconstruction, the subpel part of the MV can be + // discarded and the MV limited to 16 pixels with equivalent results. const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4; const int spel_right = spel_left - (1 << 4); const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4; const int spel_bottom = spel_top - (1 << 4); - MV clamped_mv; - + MV clamped_mv = { + src_mv->row << (1 - ss_y), + src_mv->col << (1 - ss_x) + }; assert(ss_x <= 1); assert(ss_y <= 1); - clamped_mv.col = clamp(src_mv->col << (1 - ss_x), - (mb_to_left_edge << (1 - ss_x)) - spel_left, - (mb_to_right_edge << (1 - ss_x)) + spel_right); - clamped_mv.row = clamp(src_mv->row << (1 - ss_y), - (mb_to_top_edge << (1 - ss_y)) - spel_top, - (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom); + + clamp_mv(&clamped_mv, (mb_to_left_edge << (1 - ss_x)) - spel_left, + (mb_to_right_edge << (1 - ss_x)) + spel_right, + (mb_to_top_edge << (1 - ss_y)) - spel_top, + (mb_to_bottom_edge << (1 - ss_y)) + spel_bottom); + return clamped_mv; } @@ -298,44 +299,30 @@ // dest uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x; - // motion vector - const MV *mv; - MV split_chroma_mv; - int_mv clamped_mv; + // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the + // same MV (the average of the 4 luma MVs) but we could do something + // smarter for non-4:2:0. Just punt for now, pending the changes to get + // rid of SPLITMV mode entirely. + const MV mv = mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 + ? (plane == 0 ? mi->bmi[block].as_mv[which_mv].as_mv + : mi_mv_pred_q4(mi, which_mv)) + : mi->mbmi.mv[which_mv].as_mv; - if (mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - if (plane == 0) { - mv = &mi->bmi[block].as_mv[which_mv].as_mv; - } else { - // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the - // same MV (the average of the 4 luma MVs) but we could do something - // smarter for non-4:2:0. Just punt for now, pending the changes to get - // rid of SPLITMV mode entirely. - split_chroma_mv.row = mi_mv_pred_row_q4(mi, which_mv); - split_chroma_mv.col = mi_mv_pred_col_q4(mi, which_mv); - mv = &split_chroma_mv; - } - } else { - mv = &mi->mbmi.mv[which_mv].as_mv; - } - - /* TODO(jkoleszar): This clamping is done in the incorrect place for the - * scaling case. It needs to be done on the scaled MV, not the pre-scaling - * MV. Note however that it performs the subsampling aware scaling so - * that the result is always q4. - */ - clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl, - xd->plane[plane].subsampling_x, - xd->plane[plane].subsampling_y, - xd->mb_to_left_edge, - xd->mb_to_top_edge, - xd->mb_to_right_edge, - xd->mb_to_bottom_edge); + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + const MV res_mv = clamp_mv_to_umv_border_sb(&mv, bwl, bhl, + xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y, + xd->mb_to_left_edge, + xd->mb_to_top_edge, + xd->mb_to_right_edge, + xd->mb_to_bottom_edge); scale->set_scaled_offsets(scale, arg->y + y, arg->x + x); - vp9_build_inter_predictor(pre, pre_stride, dst, arg->dst_stride[plane], - &clamped_mv, &xd->scale_factor[which_mv], + &res_mv, &xd->scale_factor[which_mv], 4 << pred_w, 4 << pred_h, which_mv, &xd->subpix, MV_PRECISION_Q4); }
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index e37750d..6ec7323 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h
@@ -39,7 +39,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *mv_q3, + const MV *mv_q3, const struct scale_factors *scale, int w, int h, int do_avg, const struct subpix_fn_table *subpix,
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index f004d1c..2b16cfa 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh
@@ -7,9 +7,7 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_enums.h" -struct loop_filter_info; struct macroblockd; -struct loop_filter_info; /* Encoder forward decls */ struct macroblock; @@ -22,7 +20,11 @@ } forward_decls vp9_common_forward_decls -[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 +# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. +[ $arch = "x86_64" -o ! "$CONFIG_PIC" = "yes" -o "$CONFIG_FORCE_X86INC" == "yes" ] && mmx_x86inc=mmx && sse2_x86inc=sse2 && ssse3_x86inc=ssse3 + +# this variable is for functions that are 64 bit only. +[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3 # # Dequant @@ -214,7 +216,7 @@ # Loopfilter # prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_mb_lpf_vertical_edge_w sse2 +specialize vp9_mb_lpf_vertical_edge_w sse2 neon prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_mbloop_filter_vertical_edge sse2 neon @@ -223,7 +225,7 @@ specialize vp9_loop_filter_vertical_edge mmx neon prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_mb_lpf_horizontal_edge_w sse2 +specialize vp9_mb_lpf_horizontal_edge_w sse2 neon prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" specialize vp9_mbloop_filter_horizontal_edge sse2 neon @@ -265,10 +267,10 @@ # Sub Pixel Filters # prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_copy sse2 +specialize vp9_convolve_copy $sse2_x86inc prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_avg sse2 +specialize vp9_convolve_avg $sse2_x86inc prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" specialize vp9_convolve8 ssse3 neon @@ -321,9 +323,6 @@ prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" specialize vp9_short_idct1_32x32 -prototype void vp9_short_idct10_32x32_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct10_32x32_add - prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type" specialize vp9_short_iht4x4_add sse2 @@ -705,9 +704,7 @@ specialize vp9_block_error sse2 prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" -specialize vp9_subtract_block sse2 - -[ $arch = "x86_64" ] && ssse3_x86_64=ssse3 +specialize vp9_subtract_block $sse2_x86inc prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" specialize vp9_quantize_b $ssse3_x86_64 @@ -719,13 +716,11 @@ # Structured Similarity (SSIM) # if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 - prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_8x8 $sse2_on_x86_64 + specialize vp9_ssim_parms_8x8 $sse2_x86_64 prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_16x16 $sse2_on_x86_64 + specialize vp9_ssim_parms_16x16 $sse2_x86_64 fi # fdct functions
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index e175fd2..8f740f4 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -2813,6 +2813,12 @@ } } +#define LOAD_DQCOEFF(reg, input) \ + { \ + reg = _mm_load_si128((__m128i *) input); \ + input += 8; \ + } \ + void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); @@ -2880,48 +2886,126 @@ stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i, j; + int i, j, i32; + __m128i zero_idx[16]; + int zero_flag[2]; // We work on a 8x32 block each time, and loop 8 times for 2-D 32x32 idct. for (i = 0; i < 8; i++) { + i32 = (i << 5); if (i < 4) { // First 1-D idct // Load input data. - in0 = _mm_load_si128((__m128i *)input); - in8 = _mm_load_si128((__m128i *)(input + 8 * 1)); - in16 = _mm_load_si128((__m128i *)(input + 8 * 2)); - in24 = _mm_load_si128((__m128i *)(input + 8 * 3)); - in1 = _mm_load_si128((__m128i *)(input + 8 * 4)); - in9 = _mm_load_si128((__m128i *)(input + 8 * 5)); - in17 = _mm_load_si128((__m128i *)(input + 8 * 6)); - in25 = _mm_load_si128((__m128i *)(input + 8 * 7)); - in2 = _mm_load_si128((__m128i *)(input + 8 * 8)); - in10 = _mm_load_si128((__m128i *)(input + 8 * 9)); - in18 = _mm_load_si128((__m128i *)(input + 8 * 10)); - in26 = _mm_load_si128((__m128i *)(input + 8 * 11)); - in3 = _mm_load_si128((__m128i *)(input + 8 * 12)); - in11 = _mm_load_si128((__m128i *)(input + 8 * 13)); - in19 = _mm_load_si128((__m128i *)(input + 8 * 14)); - in27 = _mm_load_si128((__m128i *)(input + 8 * 15)); + LOAD_DQCOEFF(in0, input); + LOAD_DQCOEFF(in8, input); + LOAD_DQCOEFF(in16, input); + LOAD_DQCOEFF(in24, input); + LOAD_DQCOEFF(in1, input); + LOAD_DQCOEFF(in9, input); + LOAD_DQCOEFF(in17, input); + LOAD_DQCOEFF(in25, input); + LOAD_DQCOEFF(in2, input); + LOAD_DQCOEFF(in10, input); + LOAD_DQCOEFF(in18, input); + LOAD_DQCOEFF(in26, input); + LOAD_DQCOEFF(in3, input); + LOAD_DQCOEFF(in11, input); + LOAD_DQCOEFF(in19, input); + LOAD_DQCOEFF(in27, input); - in4 = _mm_load_si128((__m128i *)(input + 8 * 16)); - in12 = _mm_load_si128((__m128i *)(input + 8 * 17)); - in20 = _mm_load_si128((__m128i *)(input + 8 * 18)); - in28 = _mm_load_si128((__m128i *)(input + 8 * 19)); - in5 = _mm_load_si128((__m128i *)(input + 8 * 20)); - in13 = _mm_load_si128((__m128i *)(input + 8 * 21)); - in21 = _mm_load_si128((__m128i *)(input + 8 * 22)); - in29 = _mm_load_si128((__m128i *)(input + 8 * 23)); - in6 = _mm_load_si128((__m128i *)(input + 8 * 24)); - in14 = _mm_load_si128((__m128i *)(input + 8 * 25)); - in22 = _mm_load_si128((__m128i *)(input + 8 * 26)); - in30 = _mm_load_si128((__m128i *)(input + 8 * 27)); - in7 = _mm_load_si128((__m128i *)(input + 8 * 28)); - in15 = _mm_load_si128((__m128i *)(input + 8 * 29)); - in23 = _mm_load_si128((__m128i *)(input + 8 * 30)); - in31 = _mm_load_si128((__m128i *)(input + 8 * 31)); + LOAD_DQCOEFF(in4, input); + LOAD_DQCOEFF(in12, input); + LOAD_DQCOEFF(in20, input); + LOAD_DQCOEFF(in28, input); + LOAD_DQCOEFF(in5, input); + LOAD_DQCOEFF(in13, input); + LOAD_DQCOEFF(in21, input); + LOAD_DQCOEFF(in29, input); + LOAD_DQCOEFF(in6, input); + LOAD_DQCOEFF(in14, input); + LOAD_DQCOEFF(in22, input); + LOAD_DQCOEFF(in30, input); + LOAD_DQCOEFF(in7, input); + LOAD_DQCOEFF(in15, input); + LOAD_DQCOEFF(in23, input); + LOAD_DQCOEFF(in31, input); - input += 256; + // checking if all entries are zero + zero_idx[0] = _mm_or_si128(in0, in1); + zero_idx[1] = _mm_or_si128(in2, in3); + zero_idx[2] = _mm_or_si128(in4, in5); + zero_idx[3] = _mm_or_si128(in6, in7); + zero_idx[4] = _mm_or_si128(in8, in9); + zero_idx[5] = _mm_or_si128(in10, in11); + zero_idx[6] = _mm_or_si128(in12, in13); + zero_idx[7] = _mm_or_si128(in14, in15); + zero_idx[8] = _mm_or_si128(in16, in17); + zero_idx[9] = _mm_or_si128(in18, in19); + zero_idx[10] = _mm_or_si128(in20, in21); + zero_idx[11] = _mm_or_si128(in22, in23); + zero_idx[12] = _mm_or_si128(in24, in25); + zero_idx[13] = _mm_or_si128(in26, in27); + zero_idx[14] = _mm_or_si128(in28, in29); + zero_idx[15] = _mm_or_si128(in30, in31); + + zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); + zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); + + zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); + + zero_idx[0] = _mm_unpackhi_epi64(zero_idx[14], zero_idx[14]); + zero_idx[1] = _mm_or_si128(zero_idx[0], zero_idx[14]); + zero_idx[2] = _mm_srli_epi64(zero_idx[1], 32); + zero_flag[0] = _mm_cvtsi128_si32(zero_idx[1]); + zero_flag[1] = _mm_cvtsi128_si32(zero_idx[2]); + + if (!zero_flag[0] && !zero_flag[1]) { + col[i32 + 0] = _mm_setzero_si128(); + col[i32 + 1] = _mm_setzero_si128(); + col[i32 + 2] = _mm_setzero_si128(); + col[i32 + 3] = _mm_setzero_si128(); + col[i32 + 4] = _mm_setzero_si128(); + col[i32 + 5] = _mm_setzero_si128(); + col[i32 + 6] = _mm_setzero_si128(); + col[i32 + 7] = _mm_setzero_si128(); + col[i32 + 8] = _mm_setzero_si128(); + col[i32 + 9] = _mm_setzero_si128(); + col[i32 + 10] = _mm_setzero_si128(); + col[i32 + 11] = _mm_setzero_si128(); + col[i32 + 12] = _mm_setzero_si128(); + col[i32 + 13] = _mm_setzero_si128(); + col[i32 + 14] = _mm_setzero_si128(); + col[i32 + 15] = _mm_setzero_si128(); + col[i32 + 16] = _mm_setzero_si128(); + col[i32 + 17] = _mm_setzero_si128(); + col[i32 + 18] = _mm_setzero_si128(); + col[i32 + 19] = _mm_setzero_si128(); + col[i32 + 20] = _mm_setzero_si128(); + col[i32 + 21] = _mm_setzero_si128(); + col[i32 + 22] = _mm_setzero_si128(); + col[i32 + 23] = _mm_setzero_si128(); + col[i32 + 24] = _mm_setzero_si128(); + col[i32 + 25] = _mm_setzero_si128(); + col[i32 + 26] = _mm_setzero_si128(); + col[i32 + 27] = _mm_setzero_si128(); + col[i32 + 28] = _mm_setzero_si128(); + col[i32 + 29] = _mm_setzero_si128(); + col[i32 + 30] = _mm_setzero_si128(); + col[i32 + 31] = _mm_setzero_si128(); + continue; + } // Transpose 32x8 block to 8x32 block TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, @@ -3292,38 +3376,38 @@ // final stage if (i < 4) { // 1_D: Store 32 intermediate results for each 8x32 block. - col[i * 32 + 0] = _mm_add_epi16(stp1_0, stp1_31); - col[i * 32 + 1] = _mm_add_epi16(stp1_1, stp1_30); - col[i * 32 + 2] = _mm_add_epi16(stp1_2, stp1_29); - col[i * 32 + 3] = _mm_add_epi16(stp1_3, stp1_28); - col[i * 32 + 4] = _mm_add_epi16(stp1_4, stp1_27); - col[i * 32 + 5] = _mm_add_epi16(stp1_5, stp1_26); - col[i * 32 + 6] = _mm_add_epi16(stp1_6, stp1_25); - col[i * 32 + 7] = _mm_add_epi16(stp1_7, stp1_24); - col[i * 32 + 8] = _mm_add_epi16(stp1_8, stp1_23); - col[i * 32 + 9] = _mm_add_epi16(stp1_9, stp1_22); - col[i * 32 + 10] = _mm_add_epi16(stp1_10, stp1_21); - col[i * 32 + 11] = _mm_add_epi16(stp1_11, stp1_20); - col[i * 32 + 12] = _mm_add_epi16(stp1_12, stp1_19); - col[i * 32 + 13] = _mm_add_epi16(stp1_13, stp1_18); - col[i * 32 + 14] = _mm_add_epi16(stp1_14, stp1_17); - col[i * 32 + 15] = _mm_add_epi16(stp1_15, stp1_16); - col[i * 32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); - col[i * 32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); - col[i * 32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); - col[i * 32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); - col[i * 32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); - col[i * 32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); - col[i * 32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); - col[i * 32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); - col[i * 32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); - col[i * 32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); - col[i * 32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); - col[i * 32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); - col[i * 32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); - col[i * 32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); - col[i * 32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); - col[i * 32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); + col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); + col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); + col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); + col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); + col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); + col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); + col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); + col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); + col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); + col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); + col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); + col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); + col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); + col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); + col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); + col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); + col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); + col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); + col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); + col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); + col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); + col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); + col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); + col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); + col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); + col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); + col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); + col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); + col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); + col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); + col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); + col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); } else { const __m128i zero = _mm_setzero_si128();
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index b01d26f..a3e2ad3 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c
@@ -30,8 +30,12 @@ return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p); } -static MB_PREDICTION_MODE read_inter_mode(vp9_reader *r, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(r, vp9_inter_mode_tree, p); +static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, + uint8_t context) { + MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree, + cm->fc.inter_mode_probs[context]); + ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; + return mode; } static int read_segment_id(vp9_reader *r, const struct segmentation *seg) { @@ -43,9 +47,9 @@ const uint8_t context = vp9_get_pred_context_tx_size(xd); const vp9_prob *tx_probs = get_tx_probs(bsize, context, &cm->fc.tx_probs); TX_SIZE tx_size = vp9_read(r, tx_probs[0]); - if (tx_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) { + if (tx_size != TX_4X4 && bsize >= BLOCK_16X16) { tx_size += vp9_read(r, tx_probs[1]); - if (tx_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32) + if (tx_size != TX_8X8 && bsize >= BLOCK_32X32) tx_size += vp9_read(r, tx_probs[2]); } @@ -59,13 +63,13 @@ VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_SIZE_SB8X8) + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, bsize, r); - else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_SIZE_SB32X32) + else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32) return TX_32X32; - else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_SIZE_MB16X16) + else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16) return TX_16X16; - else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_SIZE_SB8X8) + else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8) return TX_8X8; else return TX_4X4; @@ -158,6 +162,7 @@ mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_SIZE_SB8X8) { const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); @@ -228,16 +233,16 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, - nmv_context_counts *counts, int usehp) { + nmv_context_counts *counts, int allow_hp) { const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints); + const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; - usehp = usehp && vp9_use_mv_hp(ref); if (mv_joint_vertical(j)) - diff.row = read_mv_component(r, &ctx->comps[0], usehp); + diff.row = read_mv_component(r, &ctx->comps[0], use_hp); if (mv_joint_horizontal(j)) - diff.col = read_mv_component(r, &ctx->comps[1], usehp); + diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); @@ -245,29 +250,30 @@ mv->col = ref->col + diff.col; } -static void update_mv(vp9_reader *r, vp9_prob *p, vp9_prob upd_p) { - if (vp9_read(r, upd_p)) +static void update_mv(vp9_reader *r, vp9_prob *p) { + if (vp9_read(r, VP9_NMV_UPDATE_PROB)) *p = (vp9_read_literal(r, 7) << 1) | 1; } -static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) { +static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { int i, j, k; for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(r, &mvc->joints[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &mvc->joints[j]); for (i = 0; i < 2; ++i) { nmv_component *const comp = &mvc->comps[i]; - update_mv(r, &comp->sign, VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->sign); + for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(r, &comp->classes[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->classes[j]); for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(r, &comp->class0[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->class0[j]); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(r, &comp->bits[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->bits[j]); } for (i = 0; i < 2; ++i) { @@ -275,16 +281,16 @@ for (j = 0; j < CLASS0_SIZE; ++j) for (k = 0; k < 3; ++k) - update_mv(r, &comp->class0_fp[j][k], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->class0_fp[j][k]); for (j = 0; j < 3; ++j) - update_mv(r, &comp->fp[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->fp[j]); } - if (usehp) { + if (allow_hp) { for (i = 0; i < 2; ++i) { - update_mv(r, &mvc->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); - update_mv(r, &mvc->comps[i].hp, VP9_NMV_UPDATE_PROB); + update_mv(r, &mvc->comps[i].class0_hp); + update_mv(r, &mvc->comps[i].hp); } } } @@ -320,18 +326,19 @@ ref_frame[fix_ref_idx] = cm->comp_fixed_ref; ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; } else { - const int ref1_ctx = vp9_get_pred_context_single_ref_p1(xd); - ref_frame[1] = NONE; - if (vp9_read(r, fc->single_ref_prob[ref1_ctx][0])) { - const int ref2_ctx = vp9_get_pred_context_single_ref_p2(xd); - const int b = vp9_read(r, fc->single_ref_prob[ref2_ctx][1]); - ref_frame[0] = b ? ALTREF_FRAME : GOLDEN_FRAME; - counts->single_ref[ref1_ctx][0][1]++; - counts->single_ref[ref2_ctx][1][b]++; + const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); + const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); + ++counts->single_ref[ctx0][0][bit0]; + if (bit0) { + const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); + const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; + ++counts->single_ref[ctx1][1][bit1]; } else { ref_frame[0] = LAST_FRAME; - counts->single_ref[ref1_ctx][0][0]++; } + + ref_frame[1] = NONE; } } } @@ -359,16 +366,6 @@ return mode; } -static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src, - int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) { - dst->as_int = src->as_int; - clamp_mv(dst, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, - mb_to_bottom_edge); -} - static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( VP9D_COMP *pbi, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; @@ -435,7 +432,7 @@ } static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, - vp9_reader *r) { + int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; nmv_context *const nmvc = &cm->fc.nmvc; @@ -443,10 +440,11 @@ int_mv *const mv0 = &mbmi->mv[0]; int_mv *const mv1 = &mbmi->mv[1]; const BLOCK_SIZE_TYPE bsize = mbmi->sb_type; + const int allow_hp = xd->allow_high_precision_mv; int_mv nearest, nearby, best_mv; int_mv nearest_second, nearby_second, best_mv_second; - vp9_prob *mv_ref_p; + uint8_t inter_mode_ctx; MV_REFERENCE_FRAME ref0, ref1; read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame); @@ -454,16 +452,16 @@ ref1 = mbmi->ref_frame[1]; vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, - ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias); + ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias, + mi_row, mi_col); - mv_ref_p = cm->fc.inter_mode_probs[mbmi->mb_mode_context[ref0]]; + inter_mode_ctx = mbmi->mb_mode_context[ref0]; - if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) mbmi->mode = ZEROMV; - } else if (bsize >= BLOCK_SIZE_SB8X8) { - mbmi->mode = read_inter_mode(r, mv_ref_p); - vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref0]); - } + else if (bsize >= BLOCK_SIZE_SB8X8) + mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx); + mbmi->uv_mode = DC_PRED; // nearest, nearby @@ -478,7 +476,8 @@ if (ref1 > INTRA_FRAME) { vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, - ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias); + ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias, + mi_row, mi_col); if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], @@ -495,24 +494,26 @@ for (idx = 0; idx < 2; idx += num_4x4_w) { int_mv blockmv, secondmv; const int j = idy * 2 + idx; - const int blockmode = read_inter_mode(r, mv_ref_p); + const int b_mode = read_inter_mode(cm, r, inter_mode_ctx); - vp9_accum_mv_refs(cm, blockmode, mbmi->mb_mode_context[ref0]); - if (blockmode == NEARESTMV || blockmode == NEARMV) { - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0); + if (b_mode == NEARESTMV || b_mode == NEARMV) { + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0, + mi_row, mi_col); + if (ref1 > 0) vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, - &nearby_second, j, 1); + &nearby_second, j, 1, + mi_row, mi_col); } - switch (blockmode) { + switch (b_mode) { case NEWMV: read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); + &cm->counts.mv, allow_hp); if (ref1 > 0) read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); + &cm->counts.mv, allow_hp); break; case NEARESTMV: blockmv.as_int = nearest.as_int; @@ -540,43 +541,32 @@ mi->bmi[j + 2] = mi->bmi[j]; if (num_4x4_w == 2) mi->bmi[j + 1] = mi->bmi[j]; - mi->mbmi.mode = blockmode; + mi->mbmi.mode = b_mode; } } mv0->as_int = mi->bmi[3].as_mv[0].as_int; mv1->as_int = mi->bmi[3].as_mv[1].as_int; } else { - const int mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - const int mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; - const int mb_to_left_edge = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - const int mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - switch (mbmi->mode) { case NEARMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (ref1 > 0) - assign_and_clamp_mv(mv1, &nearby_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); + mv0->as_int = nearby.as_int; + clamp_mv2(&mv0->as_mv, xd); + + if (ref1 > 0) { + mv1->as_int = nearby_second.as_int; + clamp_mv2(&mv1->as_mv, xd); + } break; case NEARESTMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv0, &nearest, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (ref1 > 0) - assign_and_clamp_mv(mv1, &nearest_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); + mv0->as_int = nearest.as_int; + clamp_mv2(&mv0->as_mv, xd); + + if (ref1 > 0) { + mv1->as_int = nearest_second.as_int; + clamp_mv2(&mv1->as_mv, xd); + } break; case ZEROMV: @@ -586,11 +576,10 @@ break; case NEWMV: - read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, - xd->allow_high_precision_mv); + read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp); if (ref1 > 0) - read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); + read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv, + allow_hp); break; default: assert(!"Invalid inter mode value"); @@ -613,7 +602,7 @@ !mbmi->mb_skip_coeff || !inter_block, r); if (inter_block) - read_inter_block_mode_info(pbi, mi, r); + read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r); else read_intra_block_mode_info(pbi, mi, r); }
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 2b6f5a9..feb6024 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c
@@ -141,8 +141,8 @@ const int mode = plane == 0 ? mi->mbmi.mode : mi->mbmi.uv_mode; - if (plane == 0 && mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - assert(bsize == BLOCK_SIZE_SB8X8); + if (plane == 0 && mi->mbmi.sb_type < BLOCK_8X8) { + assert(bsize == BLOCK_8X8); b_mode = mi->bmi[raster_block].as_mode; } else { b_mode = mode; @@ -226,7 +226,7 @@ vp9_reader *r, BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - const int less8x8 = bsize < BLOCK_SIZE_SB8X8; + const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; if (less8x8) @@ -237,12 +237,12 @@ vp9_read_mode_info(pbi, mi_row, mi_col, r); if (less8x8) - bsize = BLOCK_SIZE_SB8X8; + bsize = BLOCK_8X8; // Has to be called after set_offsets mbmi = &xd->mode_info_context->mbmi; - if (mbmi->ref_frame[0] == INTRA_FRAME) { + if (!is_inter_block(mbmi)) { // Intra reconstruction decode_tokens(pbi, bsize, r); foreach_transformed_block(xd, bsize, decode_block_intra, xd); @@ -283,12 +283,12 @@ if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols) return; - if (bsize < BLOCK_SIZE_SB8X8) { + if (bsize < BLOCK_8X8) { if (xd->ab_index != 0) return; } else { int pl; - const int idx = check_bsize_coverage(pc, xd, mi_row, mi_col, bsize); + const int idx = check_bsize_coverage(pc, mi_row, mi_col, bsize); set_partition_seg_context(pc, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); @@ -335,8 +335,8 @@ } // update partition context - if (bsize >= BLOCK_SIZE_SB8X8 && - (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { + if (bsize >= BLOCK_8X8 && + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) { set_partition_seg_context(pc, xd, mi_row, mi_col); update_partition_context(xd, subsize, bsize); } @@ -502,7 +502,7 @@ : vp9_rb_read_literal(rb, 2); } -static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb, +static void read_frame_size(struct vp9_read_bit_buffer *rb, int *width, int *height) { const int w = vp9_rb_read_literal(rb, 16) + 1; const int h = vp9_rb_read_literal(rb, 16) + 1; @@ -510,12 +510,11 @@ *height = h; } -static void setup_display_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; +static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { cm->display_width = cm->width; cm->display_height = cm->height; if (vp9_rb_read_bit(rb)) - read_frame_size(cm, rb, &cm->display_width, &cm->display_height); + read_frame_size(rb, &cm->display_width, &cm->display_height); } static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { @@ -551,10 +550,9 @@ static void setup_frame_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; int width, height; - read_frame_size(cm, rb, &width, &height); - setup_display_size(pbi, rb); + read_frame_size(rb, &width, &height); + setup_display_size(&pbi->common, rb); apply_frame_size(pbi, width, height); } @@ -575,13 +573,13 @@ } if (!found) - read_frame_size(cm, rb, &width, &height); + read_frame_size(rb, &width, &height); if (!width || !height) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame with invalid size"); - setup_display_size(pbi, rb); + setup_display_size(cm, rb); apply_frame_size(pbi, width, height); } @@ -608,7 +606,7 @@ vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context)); for (mi_col = pc->cur_tile_mi_col_start; mi_col < pc->cur_tile_mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64); + decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64); } if (pbi->do_loopfilter_inline) {
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index e004c08..0021643 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c
@@ -95,7 +95,7 @@ FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; ENTROPY_CONTEXT above_ec, left_ec; - const int ref = xd->mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; + const int ref = is_inter_block(&xd->mode_info_context->mbmi); int band, pt, c = 0; vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; @@ -269,7 +269,7 @@ const int mod = bw - ss_tx_size - pd->subsampling_x; const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size; const int loff = (off >> mod) << ss_tx_size; - + const int tx_size_in_blocks = 1 << ss_tx_size; ENTROPY_CONTEXT *A = pd->above_context + aoff; ENTROPY_CONTEXT *L = pd->left_context + loff; const int eob = decode_coefs(&arg->pbi->common, xd, arg->r, block, @@ -278,10 +278,11 @@ ss_tx_size, pd->dequant, A, L); if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L); + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff, + A, L); } else { int pt; - for (pt = 0; pt < (1 << ss_tx_size); pt++) + for (pt = 0; pt < tx_size_in_blocks; pt++) A[pt] = L[pt] = eob > 0; } pd->eobs[block] = eob;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 5f21d15..98ef420 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c
@@ -169,7 +169,6 @@ static void update_mode( vp9_writer *w, int n, - const struct vp9_token tok[/* n */], vp9_tree tree, vp9_prob Pnew[/* n-1 */], vp9_prob Pcur[/* n-1 */], @@ -194,8 +193,7 @@ unsigned int bct[VP9_INTRA_MODES - 1][2]; for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_encodings, - vp9_intra_mode_tree, pnew, + update_mode(bc, VP9_INTRA_MODES, vp9_intra_mode_tree, pnew, cm->fc.y_mode_prob[j], bct, (unsigned int *)cpi->y_mode_count[j]); } @@ -205,9 +203,9 @@ const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const vp9_prob *tx_probs = get_tx_probs2(xd, &cpi->common.fc.tx_probs); vp9_write(w, tx_size != TX_4X4, tx_probs[0]); - if (bsize >= BLOCK_SIZE_MB16X16 && tx_size != TX_4X4) { + if (bsize >= BLOCK_16X16 && tx_size != TX_4X4) { vp9_write(w, tx_size != TX_8X8, tx_probs[1]); - if (bsize >= BLOCK_SIZE_SB32X32 && tx_size != TX_8X8) + if (bsize >= BLOCK_32X32 && tx_size != TX_8X8) vp9_write(w, tx_size != TX_16X16, tx_probs[2]); } } @@ -265,12 +263,17 @@ static void update_inter_mode_probs(VP9_COMMON *pc, vp9_writer* const bc) { int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < VP9_INTER_MODES - 1; j++) { + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { + unsigned int branch_ct[VP9_INTER_MODES - 1][2]; + vp9_prob new_prob[VP9_INTER_MODES - 1]; + + vp9_tree_probs_from_distribution(vp9_inter_mode_tree, + new_prob, branch_ct, + pc->counts.inter_mode[i], NEARESTMV); + + for (j = 0; j < VP9_INTER_MODES - 1; ++j) vp9_cond_prob_diff_update(bc, &pc->fc.inter_mode_probs[i][j], - VP9_MODE_UPDATE_PROB, - pc->counts.inter_mode[i][j]); - } + VP9_MODE_UPDATE_PROB, branch_ct[j]); } } @@ -393,8 +396,7 @@ // the reference frame is fully coded by the segment } -static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, - vp9_writer *bc, int mi_row, int mi_col) { +static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { VP9_COMMON *const pc = &cpi->common; const nmv_context *nmvc = &pc->fc.nmvc; MACROBLOCK *const x = &cpi->mb; @@ -406,6 +408,7 @@ const int segment_id = mi->segment_id; int skip_coeff; const BLOCK_SIZE_TYPE bsize = mi->sb_type; + const int allow_hp = xd->allow_high_precision_mv; x->partition_info = x->pi + (m - pc->mi); @@ -446,13 +449,14 @@ write_intra_mode(bc, mode, pc->fc.y_mode_prob[size_group_lookup[bsize]]); } else { int idx, idy; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode; write_intra_mode(bc, bm, pc->fc.y_mode_prob[0]); } + } } write_intra_mode(bc, mi->uv_mode, pc->fc.uv_mode_prob[mode]); } else { @@ -468,7 +472,8 @@ if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { if (bsize >= BLOCK_SIZE_SB8X8) { write_sb_mv_ref(bc, mode, mv_ref_p); - vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); + ++pc->counts.inter_mode[mi->mb_mode_context[rf]] + [inter_mode_offset(mode)]; } } @@ -485,8 +490,8 @@ int j; MB_PREDICTION_MODE blockmode; int_mv blockmv; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { @@ -494,19 +499,21 @@ blockmode = x->partition_info->bmi[j].mode; blockmv = m->bmi[j].as_mv[0]; write_sb_mv_ref(bc, blockmode, mv_ref_p); - vp9_accum_mv_refs(&cpi->common, blockmode, mi->mb_mode_context[rf]); + ++pc->counts.inter_mode[mi->mb_mode_context[rf]] + [inter_mode_offset(blockmode)]; + if (blockmode == NEWMV) { #ifdef ENTROPY_STATS active_section = 11; #endif vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + nmvc, allow_hp); if (mi->ref_frame[1] > INTRA_FRAME) vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv, &mi->best_second_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + nmvc, allow_hp); } } } @@ -514,21 +521,18 @@ #ifdef ENTROPY_STATS active_section = 5; #endif - vp9_encode_mv(cpi, bc, - &mi->mv[0].as_mv, &mi->best_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv, + nmvc, allow_hp); if (mi->ref_frame[1] > INTRA_FRAME) - vp9_encode_mv(cpi, bc, - &mi->mv[1].as_mv, &mi->best_second_mv.as_mv, - nmvc, xd->allow_high_precision_mv); + vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv, + nmvc, allow_hp); } } } -static void write_mb_modes_kf(const VP9_COMP *cpi, - MODE_INFO *m, - vp9_writer *bc, int mi_row, int mi_col) { +static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO *m, + vp9_writer *bc) { const VP9_COMMON *const c = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const int ym = m->mbmi.mode; @@ -550,11 +554,11 @@ write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]); } else { int idx, idy; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type]; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - int i = idy * 2 + idx; + const int i = idy * 2 + idx; const MB_PREDICTION_MODE A = above_block_mode(m, i, mis); const MB_PREDICTION_MODE L = (xd->left_available || idx) ? left_block_mode(m, i) : DC_PRED; @@ -584,12 +588,12 @@ 1 << mi_height_log2(m->mbmi.sb_type), mi_col, 1 << mi_width_log2(m->mbmi.sb_type)); if ((cm->frame_type == KEY_FRAME) || cm->intra_only) { - write_mb_modes_kf(cpi, m, bc, mi_row, mi_col); + write_mb_modes_kf(cpi, m, bc); #ifdef ENTROPY_STATS active_section = 8; #endif } else { - pack_inter_mode_mvs(cpi, m, bc, mi_row, mi_col); + pack_inter_mode_mvs(cpi, m, bc); #ifdef ENTROPY_STATS active_section = 1; #endif @@ -623,7 +627,7 @@ if (bsize >= BLOCK_SIZE_SB8X8) { int pl; - const int idx = check_bsize_coverage(cm, xd, mi_row, mi_col, bsize); + const int idx = check_bsize_coverage(cm, mi_row, mi_col, bsize); set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); // encode the partition information @@ -690,8 +694,7 @@ vp9_zero(c->left_seg_context); for (mi_col = c->cur_tile_mi_col_start; mi_col < c->cur_tile_mi_col_end; mi_col += MI_BLOCK_SIZE, m += MI_BLOCK_SIZE) - write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, - BLOCK_SIZE_SB64X64); + write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, BLOCK_64X64); } } @@ -1420,7 +1423,7 @@ for (i = 0; i < NUM_PARTITION_CONTEXTS; ++i) { vp9_prob pnew[PARTITION_TYPES - 1]; unsigned int bct[PARTITION_TYPES - 1][2]; - update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings, + update_mode(&header_bc, PARTITION_TYPES, vp9_partition_tree, pnew, fc->partition_prob[cm->frame_type][i], bct, (unsigned int *)cpi->partition_count[i]);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 634c0b4..800e3ba 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h
@@ -47,7 +47,7 @@ int hybrid_pred_diff; int comp_pred_diff; int single_pred_diff; - int64_t txfm_rd_diff[TX_MODES]; + int64_t tx_rd_diff[TX_MODES]; int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; // Bit flag for each mode whether it has high error in comparison to others.
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index abdf071..f4490ae 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c
@@ -60,11 +60,28 @@ * Eventually this should be replaced by custom no-reference routines, * which will be faster. */ -static const uint8_t VP9_VAR_OFFS[16] = {128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128}; +static const uint8_t VP9_VAR_OFFS[64] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128 +}; + +static unsigned int get_sb_variance(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bs) { + unsigned int var, sse; + var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, + x->plane[0].src.stride, + VP9_VAR_OFFS, 0, &sse); + return var >> num_pels_log2_lookup[bs]; +} // Original activity measure from Tim T's code. -static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) { +static unsigned int tt_activity_measure(MACROBLOCK *x) { unsigned int act; unsigned int sse; /* TODO: This could also be done over smaller areas (8x8), but that would @@ -106,7 +123,7 @@ mb_activity = alt_activity_measure(cpi, x, use_dc_pred); } else { // Original activity measure from Tim T's code. - mb_activity = tt_activity_measure(cpi, x); + mb_activity = tt_activity_measure(x); } if (mb_activity < VP9_ACTIVITY_AVG_MIN) @@ -323,8 +340,8 @@ int mb_mode_index = ctx->best_mode_index; const int mis = cpi->common.mode_info_stride; - const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; assert(mi->mbmi.mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); @@ -345,13 +362,13 @@ } // FIXME(rbultje) I'm pretty sure this should go to the end of this block // (i.e. after the output_enabled) - if (bsize < BLOCK_SIZE_SB32X32) { - if (bsize < BLOCK_SIZE_MB16X16) - ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8]; - ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; + if (bsize < BLOCK_32X32) { + if (bsize < BLOCK_16X16) + ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8]; + ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16]; } - if (mbmi->ref_frame[0] != INTRA_FRAME && mbmi->sb_type < BLOCK_SIZE_SB8X8) { + if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { *x->partition_info = ctx->partition_info; mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; @@ -363,7 +380,7 @@ if (!vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) { for (i = 0; i < TX_MODES; i++) - cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i]; + cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i]; } if (cpi->common.frame_type == KEY_FRAME) { @@ -394,7 +411,7 @@ } else { // Note how often each mode chosen as best cpi->mode_chosen_counts[mb_mode_index]++; - if (mbmi->ref_frame[0] != INTRA_FRAME + if (is_inter_block(mbmi) && (mbmi->sb_type < BLOCK_SIZE_SB8X8 || mbmi->mode == NEWMV)) { int_mv best_mv, best_second_mv; const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0]; @@ -464,6 +481,7 @@ const int mb_row = mi_row >> 1; const int mb_col = mi_col >> 1; const int idx_map = mb_row * cm->mb_cols + mb_col; + const struct segmentation *const seg = &xd->seg; int i; // entropy context structures @@ -513,16 +531,16 @@ x->rdmult = cpi->RDMULT; /* segment ID */ - if (xd->seg.enabled) { - uint8_t *map = xd->seg.update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; + if (seg->enabled) { + uint8_t *map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); vp9_mb_init_quantizer(cpi, x); - if (xd->seg.enabled && cpi->seg0_cnt > 0 - && !vp9_segfeature_active(&xd->seg, 0, SEG_LVL_REF_FRAME) - && vp9_segfeature_active(&xd->seg, 1, SEG_LVL_REF_FRAME)) { + if (seg->enabled && cpi->seg0_cnt > 0 + && !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) + && vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; } else { const int y = mb_row & ~3; @@ -554,9 +572,12 @@ x->rd_search = 1; - if (bsize < BLOCK_SIZE_SB8X8) + if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index != 0) return; + } set_offsets(cpi, mi_row, mi_col, bsize); xd->mode_info_context->mbmi.sb_type = bsize; @@ -573,12 +594,12 @@ bsize, ctx, best_rd); } -static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK * const x = &cpi->mb; - MACROBLOCKD * const xd = &x->e_mbd; +static void update_stats(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = xd->mode_info_context; - MB_MODE_INFO * const mbmi = &mi->mbmi; + MB_MODE_INFO *const mbmi = &mi->mbmi; if (cm->frame_type != KEY_FRAME) { const int seg_ref_active = vp9_segfeature_active(&xd->seg, mbmi->segment_id, @@ -614,38 +635,38 @@ } // TODO(jingning): the variables used here are little complicated. need further -// refactoring on organizing the the temporary buffers, when recursive +// refactoring on organizing the temporary buffers, when recursive // partition down to 4x4 block size is enabled. static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD * const xd = &x->e_mbd; switch (bsize) { - case BLOCK_SIZE_SB64X64: + case BLOCK_64X64: return &x->sb64_context; - case BLOCK_SIZE_SB64X32: + case BLOCK_64X32: return &x->sb64x32_context[xd->sb_index]; - case BLOCK_SIZE_SB32X64: + case BLOCK_32X64: return &x->sb32x64_context[xd->sb_index]; - case BLOCK_SIZE_SB32X32: + case BLOCK_32X32: return &x->sb32_context[xd->sb_index]; - case BLOCK_SIZE_SB32X16: + case BLOCK_32X16: return &x->sb32x16_context[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_SB16X32: + case BLOCK_16X32: return &x->sb16x32_context[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_MB16X16: + case BLOCK_16X16: return &x->mb_context[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_SB16X8: + case BLOCK_16X8: return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB8X16: + case BLOCK_8X16: return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB8X4: + case BLOCK_8X4: return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_SB4X8: + case BLOCK_4X8: return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index]; - case BLOCK_SIZE_AB4X4: + case BLOCK_4X4: return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index]; default: assert(0); @@ -657,13 +678,13 @@ BLOCK_SIZE_TYPE bsize) { MACROBLOCKD *xd = &x->e_mbd; switch (bsize) { - case BLOCK_SIZE_SB64X64: + case BLOCK_64X64: return &x->sb64_partitioning; - case BLOCK_SIZE_SB32X32: + case BLOCK_32X32: return &x->sb_partitioning[xd->sb_index]; - case BLOCK_SIZE_MB16X16: + case BLOCK_16X16: return &x->mb_partitioning[xd->sb_index][xd->mb_index]; - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index]; default: assert(0); @@ -676,12 +697,12 @@ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE_TYPE bsize) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK * const x = &cpi->mb; - MACROBLOCKD * const xd = &x->e_mbd; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int p; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { @@ -707,12 +728,12 @@ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE_TYPE bsize) { - VP9_COMMON * const cm = &cpi->common; - MACROBLOCK * const x = &cpi->mb; - MACROBLOCKD * const xd = &x->e_mbd; + const VP9_COMMON *const cm = &cpi->common; + const MACROBLOCK *const x = &cpi->mb; + const MACROBLOCKD *const xd = &x->e_mbd; int p; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; @@ -748,15 +769,18 @@ if (sub_index != -1) *(get_sb_index(xd, bsize)) = sub_index; - if (bsize < BLOCK_SIZE_SB8X8) + if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index > 0) return; + } set_offsets(cpi, mi_row, mi_col, bsize); update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); if (output_enabled) { - update_stats(cpi, mi_row, mi_col); + update_stats(cpi); (*tp)->token = EOSB_TOKEN; (*tp)++; @@ -778,7 +802,7 @@ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - c1 = BLOCK_SIZE_AB4X4; + c1 = BLOCK_4X4; if (bsize >= BLOCK_SIZE_SB8X8) { set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); @@ -908,28 +932,28 @@ static void tree_to_node(void *data, BLOCK_SIZE_TYPE block_size, vt_node *node) { int i; switch (block_size) { - case BLOCK_SIZE_SB64X64: { + case BLOCK_64X64: { v64x64 *vt = (v64x64 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].vt.none; break; } - case BLOCK_SIZE_SB32X32: { + case BLOCK_32X32: { v32x32 *vt = (v32x32 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].vt.none; break; } - case BLOCK_SIZE_MB16X16: { + case BLOCK_16X16: { v16x16 *vt = (v16x16 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].vt.none; break; } - case BLOCK_SIZE_SB8X8: { + case BLOCK_8X8: { v8x8 *vt = (v8x8 *) data; node->vt = &vt->vt; for (i = 0; i < 4; i++) @@ -1088,7 +1112,8 @@ dp = 64; if (cm->frame_type != KEY_FRAME) { int_mv nearest_mv, near_mv; - YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[0]; + const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)]; + YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx]; YV12_BUFFER_CONFIG *second_ref_fb = NULL; setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col, @@ -1104,7 +1129,6 @@ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_SB64X64); d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; - } // Fill in the entire tree of 8x8 variances for splits. @@ -1131,32 +1155,32 @@ // values. for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - fill_variance_tree(&vt.split[i].split[j], BLOCK_SIZE_MB16X16); + fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); } - fill_variance_tree(&vt.split[i], BLOCK_SIZE_SB32X32); + fill_variance_tree(&vt.split[i], BLOCK_32X32); } - fill_variance_tree(&vt, BLOCK_SIZE_SB64X64); + fill_variance_tree(&vt, BLOCK_64X64); // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold, or we // hit 8x8. - if (!set_vt_partitioning(cpi, &vt, m, BLOCK_SIZE_SB64X64, mi_row, mi_col, + if (!set_vt_partitioning(cpi, &vt, m, BLOCK_64X64, mi_row, mi_col, 4)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); - if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_SIZE_SB32X32, + if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_32X32, (mi_row + y32_idx), (mi_col + x32_idx), 2)) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); if (!set_vt_partitioning(cpi, &vt.split[i].split[j], m, - BLOCK_SIZE_MB16X16, + BLOCK_16X16, (mi_row + y32_idx + y16_idx), (mi_col + x32_idx + x16_idx), 1)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); - set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis, + set_block_size(cm, m, BLOCK_8X8, mis, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx)); } @@ -1166,6 +1190,7 @@ } } } + static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, int64_t *dist, int do_recon) { @@ -1174,8 +1199,8 @@ MACROBLOCKD *xd = &cpi->mb.e_mbd; const int mis = cm->mode_info_stride; int bsl = b_width_log2(bsize); - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int ms = num_4x4_blocks_wide / 2; int mh = num_4x4_blocks_high / 2; int bss = (1 << bsl) / 4; @@ -1192,7 +1217,7 @@ int64_t none_dist = INT_MAX; int chosen_rate = INT_MAX; int64_t chosen_dist = INT_MAX; - BLOCK_SIZE_TYPE sub_subsize = BLOCK_SIZE_AB4X4; + BLOCK_SIZE_TYPE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE_TYPE bs_type = m->mbmi.sb_type; @@ -1204,6 +1229,8 @@ subsize = get_subsize(bsize, partition); if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index != 0) { *rate = 0; *dist = 0; @@ -1437,7 +1464,7 @@ BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 }; -// Look at neighbouring blocks and set a min and max partition size based on +// Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, BLOCK_SIZE_TYPE * min_block_size, @@ -1479,7 +1506,7 @@ } // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are -// unlikely to be selected depending on previously rate-distortion optimization +// unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, @@ -1499,12 +1526,15 @@ (void) *tp_orig; - if (bsize < BLOCK_SIZE_SB8X8) + if (bsize < BLOCK_SIZE_SB8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. if (xd->ab_index != 0) { *rate = 0; *dist = 0; return; } + } assert(mi_height_log2(bsize) == mi_width_log2(bsize)); save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); @@ -1554,26 +1584,29 @@ } } + // Use 4 subblocks' motion estimation results to speed up current + // partition's checking. x->fast_ms = 0; x->pred_mv.as_int = 0; x->subblock_ref = 0; - // Use 4 subblocks' motion estimation results to speed up current - // partition's checking. - if (cpi->sf.using_small_partition_info) { + if (cpi->sf.using_small_partition_info && + (!cpi->sf.auto_min_max_partition_size || + (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size))) { // Only use 8x8 result for non HD videos. // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0; int use_8x8 = 1; if (cm->frame_type && !cpi->is_src_frame_alt_ref && - ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) || - bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) { + ((use_8x8 && bsize == BLOCK_16X16) || + bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) { int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0; PICK_MODE_CONTEXT *block_context = NULL; - if (bsize == BLOCK_SIZE_MB16X16) { + if (bsize == BLOCK_16X16) { block_context = x->sb8x8_context[xd->sb_index][xd->mb_index]; - } else if (bsize == BLOCK_SIZE_SB32X32) { + } else if (bsize == BLOCK_32X32) { block_context = x->mb_context[xd->sb_index]; } else if (bsize == BLOCK_SIZE_SB64X64) { block_context = x->sb32_context; @@ -1652,7 +1685,7 @@ } } - if (!cpi->sf.use_max_partition_size || + if (!cpi->sf.auto_min_max_partition_size || bsize <= cpi->sf.max_partition_size) { int larger_is_better = 0; // PARTITION_NONE @@ -1824,8 +1857,7 @@ } // Examines 64x64 block and chooses a best reference frame -static void rd_pick_reference_frame(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, - int mi_col, int *rate, int64_t *dist) { +static void rd_pick_reference_frame(VP9_COMP *cpi, int mi_row, int mi_col) { VP9_COMMON * const cm = &cpi->common; MACROBLOCK * const x = &cpi->mb; MACROBLOCKD * const xd = &x->e_mbd; @@ -1856,23 +1888,7 @@ cpi->set_ref_frame_mask = 0; } - *rate = r; - *dist = d; - // RDCOST(x->rdmult, x->rddiv, r, d) - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_SIZE_SB64X64); - - /*if (srate < INT_MAX && sdist < INT_MAX) - encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64); - - if (bsize == BLOCK_SIZE_SB64X64) { - assert(tp_orig < *tp); - assert(srate < INT_MAX); - assert(sdist < INT_MAX); - } else { - assert(tp_orig == *tp); - } - */ } static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, @@ -1897,10 +1913,8 @@ else cpi->unused_mode_skip_mask = 0xFFFFFFFFFFFFFE00; - if (cpi->sf.reference_masking) { - rd_pick_reference_frame(cpi, tp, mi_row, mi_col, - &dummy_rate, &dummy_dist); - } + if (cpi->sf.reference_masking) + rd_pick_reference_frame(cpi, mi_row, mi_col); if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning || cpi->sf.use_one_partition_size_always ) { @@ -1941,8 +1955,7 @@ } else { // If required set upper and lower partition size limits if (cpi->sf.auto_min_max_partition_size) { - rd_auto_partition_range(cpi, - &cpi->sf.min_partition_size, + rd_auto_partition_range(cpi, &cpi->sf.min_partition_size, &cpi->sf.max_partition_size); } @@ -2236,13 +2249,13 @@ int n; assert(bwl < bsl && bhl < bsl); - if (bsize == BLOCK_SIZE_SB64X64) { - subsize = BLOCK_SIZE_SB32X32; - } else if (bsize == BLOCK_SIZE_SB32X32) { - subsize = BLOCK_SIZE_MB16X16; + if (bsize == BLOCK_64X64) { + subsize = BLOCK_32X32; + } else if (bsize == BLOCK_32X32) { + subsize = BLOCK_16X16; } else { - assert(bsize == BLOCK_SIZE_MB16X16); - subsize = BLOCK_SIZE_SB8X8; + assert(bsize == BLOCK_16X16); + subsize = BLOCK_8X8; } for (n = 0; n < 4; n++) { @@ -2560,7 +2573,7 @@ // Increase zbin size to suppress noise cpi->zbin_mode_boost = 0; if (cpi->zbin_mode_boost_enabled) { - if (mbmi->ref_frame[0] != INTRA_FRAME) { + if (is_inter_block(mbmi)) { if (mbmi->mode == ZEROMV) { if (mbmi->ref_frame[0] != LAST_FRAME) cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; @@ -2633,7 +2646,7 @@ if (output_enabled) { if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_SIZE_SB8X8 && - !(mbmi->ref_frame[0] != INTRA_FRAME && + !(is_inter_block(mbmi) && (mbmi->mb_skip_coeff || vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP)))) { const uint8_t context = vp9_get_pred_context_tx_size(xd); @@ -2642,14 +2655,14 @@ int x, y; TX_SIZE sz = (cm->tx_mode == TX_MODE_SELECT) ? TX_32X32 : cm->tx_mode; // The new intra coding scheme requires no change of transform size - if (mi->mbmi.ref_frame[0] != INTRA_FRAME) { - if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) + if (is_inter_block(&mi->mbmi)) { + if (sz == TX_32X32 && bsize < BLOCK_32X32) sz = TX_16X16; - if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) + if (sz == TX_16X16 && bsize < BLOCK_16X16) sz = TX_8X8; - if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8) + if (sz == TX_8X8 && bsize < BLOCK_8X8) sz = TX_4X4; - } else if (bsize >= BLOCK_SIZE_SB8X8) { + } else if (bsize >= BLOCK_8X8) { sz = mbmi->txfm_size; } else { sz = TX_4X4;
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index d49e532..edbd2d9 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c
@@ -21,7 +21,7 @@ x->skip_encode = 0; mbmi->mode = DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->txfm_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_SIZE_MB16X16 ? + mbmi->txfm_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; vp9_encode_intra_block_y(&cpi->common, x, mbmi->sb_type); return vp9_get_mb_ss(x->plane[0].src_diff);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index a92ecf2..40b0a4e 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c
@@ -47,7 +47,7 @@ xd->inv_txm4x4_add(dqcoeff, dest, stride); } -static void inverse_transform_b_8x8_add(MACROBLOCKD *xd, int eob, +static void inverse_transform_b_8x8_add(int eob, int16_t *dqcoeff, uint8_t *dest, int stride) { if (eob <= 1) @@ -58,7 +58,7 @@ vp9_short_idct8x8_add(dqcoeff, dest, stride); } -static void inverse_transform_b_16x16_add(MACROBLOCKD *xd, int eob, +static void inverse_transform_b_16x16_add(int eob, int16_t *dqcoeff, uint8_t *dest, int stride) { if (eob <= 1) @@ -141,12 +141,12 @@ return pt; } -static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, +static void optimize_b(MACROBLOCK *mb, int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, TX_SIZE tx_size) { - const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; + const int ref = is_inter_block(&xd->mode_info_context->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, @@ -372,7 +372,7 @@ } void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, - int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb, + int ss_txfrm_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { MACROBLOCKD *const xd = &mb->e_mbd; int x, y; @@ -380,15 +380,14 @@ // find current entropy context txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); - optimize_b(cm, mb, plane, block, bsize, + optimize_b(mb, plane, block, bsize, &ctx->ta[plane][x], &ctx->tl[plane][y], ss_txfrm_size / 2); } static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { const struct encode_b_args* const args = arg; - vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x, - args->ctx); + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->x, args->ctx); } void optimize_init_b(int plane, BLOCK_SIZE_TYPE bsize, void *arg) { @@ -539,7 +538,7 @@ xform_quant(plane, block, bsize, ss_txfrm_size, arg); if (x->optimize) - vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, x, args->ctx); if (x->skip_encode) return; @@ -551,12 +550,12 @@ vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); break; case TX_16X16: - inverse_transform_b_16x16_add(xd, pd->eobs[block], dqcoeff, - dst, pd->dst.stride); + inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst, + pd->dst.stride); break; case TX_8X8: - inverse_transform_b_8x8_add(xd, pd->eobs[block], dqcoeff, - dst, pd->dst.stride); + inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst, + pd->dst.stride); break; case TX_4X4: // this is like vp9_short_idct4x4 but has a special case around eob<=1 @@ -654,7 +653,7 @@ // if (x->optimize) // vp9_optimize_b(plane, block, bsize, ss_txfrm_size, - // args->cm, x, args->ctx); + // x, args->ctx); switch (tx_size) { case TX_32X32: @@ -705,7 +704,7 @@ pd->dequant, p->zbin_extra, eob, scan, iscan); if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) - inverse_transform_b_16x16_add(xd, *eob, dqcoeff, dst, pd->dst.stride); + inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride); else vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); } @@ -734,7 +733,7 @@ pd->dequant, p->zbin_extra, eob, scan, iscan); if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) - inverse_transform_b_8x8_add(xd, *eob, dqcoeff, dst, pd->dst.stride); + inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride); else vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); } @@ -743,11 +742,11 @@ tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan = get_scan_4x4(tx_type); iscan = get_iscan_4x4(tx_type); - if (mbmi->sb_type < BLOCK_SIZE_SB8X8 && plane == 0) { + if (mbmi->sb_type < BLOCK_8X8 && plane == 0) mode = xd->mode_info_context->bmi[block].as_mode; - } else { + else mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; - } + xoff = 4 * (block & twmask); yoff = 4 * (block >> twl); dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 6101450..f647fd9 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h
@@ -34,7 +34,7 @@ }; void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, - int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x, + int ss_txfrm_size, MACROBLOCK *x, struct optimize_ctx *ctx); void vp9_optimize_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_optimize_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index 2f5e16c..1c6fa3a 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c
@@ -478,7 +478,7 @@ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; int idx, idy; - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + if (mbmi->sb_type < BLOCK_8X8) { PARTITION_INFO *pi = x->partition_info; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index ff336d7..6ba2a4f 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c
@@ -347,17 +347,17 @@ xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset; switch (xd->mode_info_context->mbmi.sb_type) { - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: vp9_mse8x8(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, (unsigned int *)(best_motion_err)); break; - case BLOCK_SIZE_SB16X8: + case BLOCK_16X8: vp9_mse16x8(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, (unsigned int *)(best_motion_err)); break; - case BLOCK_SIZE_SB8X16: + case BLOCK_8X16: vp9_mse8x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, (unsigned int *)(best_motion_err)); @@ -403,13 +403,13 @@ // override the default variance function to use MSE switch (xd->mode_info_context->mbmi.sb_type) { - case BLOCK_SIZE_SB8X8: + case BLOCK_8X8: v_fn_ptr.vf = vp9_mse8x8; break; - case BLOCK_SIZE_SB16X8: + case BLOCK_16X8: v_fn_ptr.vf = vp9_mse16x8; break; - case BLOCK_SIZE_SB8X16: + case BLOCK_8X16: v_fn_ptr.vf = vp9_mse8x16; break; default: @@ -549,15 +549,15 @@ if (mb_col * 2 + 1 < cm->mi_cols) { if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; + xd->mode_info_context->mbmi.sb_type = BLOCK_16X16; } else { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB16X8; + xd->mode_info_context->mbmi.sb_type = BLOCK_16X8; } } else { if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X16; + xd->mode_info_context->mbmi.sb_type = BLOCK_8X16; } else { - xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB8X8; + xd->mode_info_context->mbmi.sb_type = BLOCK_8X8; } } xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; @@ -1282,7 +1282,6 @@ // Update the motion related elements to the GF arf boost calculation static void accumulate_frame_motion_stats( - VP9_COMP *cpi, FIRSTPASS_STATS *this_frame, double *this_frame_mv_in_out, double *mv_in_out_accumulator, @@ -1377,7 +1376,7 @@ break; // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &this_frame, + accumulate_frame_motion_stats(&this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); @@ -1413,7 +1412,7 @@ break; // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &this_frame, + accumulate_frame_motion_stats(&this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); @@ -1665,7 +1664,7 @@ flash_detected = detect_flash(cpi, 0); // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &next_frame, + accumulate_frame_motion_stats(&next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index c6736fe..154d31a 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c
@@ -63,7 +63,7 @@ } vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16); + vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, INT_MAX); @@ -77,9 +77,7 @@ return best_err; } -static int do_16x16_motion_search(VP9_COMP *cpi, - int_mv *ref_mv, int_mv *dst_mv, - int buf_mb_y_offset, int mb_y_offset, +static int do_16x16_motion_search(VP9_COMP *cpi, int_mv *ref_mv, int_mv *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -118,9 +116,7 @@ return err; } -static int do_16x16_zerozero_search(VP9_COMP *cpi, - int_mv *dst_mv, - int buf_mb_y_offset, int mb_y_offset) { +static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err; @@ -210,7 +206,6 @@ g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, &stats->ref[GOLDEN_FRAME].m.mv, - mb_y_offset, gld_y_offset, mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { @@ -224,8 +219,7 @@ xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; xd->plane[0].pre[0].stride = alt_ref->y_stride; a_motion_error = do_16x16_zerozero_search(cpi, - &stats->ref[ALTREF_FRAME].m.mv, - mb_y_offset, arf_y_offset); + &stats->ref[ALTREF_FRAME].m.mv); stats->ref[ALTREF_FRAME].err = a_motion_error; } else { @@ -261,7 +255,7 @@ xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; xd->mode_info_context = &mi_local; - mi_local.mbmi.sb_type = BLOCK_SIZE_MB16X16; + mi_local.mbmi.sb_type = BLOCK_16X16; mi_local.mbmi.ref_frame[0] = LAST_FRAME; mi_local.mbmi.ref_frame[1] = NONE;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 0be9891..88beee7 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c
@@ -58,7 +58,7 @@ } int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], - int weight, int ishp) { + int weight) { MV v; v.row = mv->as_mv.row - ref->as_mv.row; v.col = mv->as_mv.col - ref->as_mv.col; @@ -68,7 +68,7 @@ } static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], - int error_per_bit, int ishp) { + int error_per_bit) { if (mvcost) { MV v; v.row = mv->as_mv.row - ref->as_mv.row; @@ -269,7 +269,6 @@ int maxc, minc, maxr, minr; int y_stride; int offset; - int usehp = xd->allow_high_precision_mv; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + @@ -300,8 +299,7 @@ // calculate central point error besterr = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // TODO: Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) @@ -371,13 +369,7 @@ tc = bc; } - if (xd->allow_high_precision_mv) { - usehp = vp9_use_mv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - - if (usehp) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); @@ -451,7 +443,6 @@ int maxc, minc, maxr, minr; int y_stride; int offset; - int usehp = xd->allow_high_precision_mv; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); uint8_t *y = xd->plane[0].pre[0].buf + @@ -490,8 +481,7 @@ comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) @@ -561,13 +551,7 @@ tc = bc; } - if (xd->allow_high_precision_mv) { - usehp = vp9_use_mv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - - if (usehp) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { hstep >>= 1; while (--eighthiters) { CHECK_BETTER(left, tr, tc - hstep); @@ -638,7 +622,6 @@ int thismse; int y_stride; MACROBLOCKD *xd = &x->e_mbd; - int usehp = xd->allow_high_precision_mv; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + @@ -654,15 +637,14 @@ // calculate central point error bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -674,7 +656,7 @@ this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -687,8 +669,7 @@ this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -699,8 +680,8 @@ this_mv.as_mv.row += 8; thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (down < bestmse) { *bestmv = this_mv; @@ -742,8 +723,8 @@ break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -784,8 +765,8 @@ src_stride, &sse); } - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -799,7 +780,7 @@ SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -822,8 +803,7 @@ z, src_stride, &sse); } - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -835,8 +815,9 @@ this_mv.as_mv.row += 4; thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); + if (down < bestmse) { *bestmv = this_mv; @@ -923,8 +904,8 @@ break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -933,12 +914,7 @@ *sse1 = sse; } - if (x->e_mbd.allow_high_precision_mv) { - usehp = vp9_use_mv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - if (!usehp) + if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv))) return bestmse; /* Now do 1/8th pixel */ @@ -968,8 +944,8 @@ z, src_stride, &sse); } - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -982,7 +958,7 @@ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -1005,8 +981,7 @@ SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); } - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -1019,8 +994,8 @@ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (down < bestmse) { *bestmv = this_mv; @@ -1107,8 +1082,8 @@ break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -1153,15 +1128,14 @@ // calculate central point error bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // go left then right and check error this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (left < bestmse) { *bestmv = this_mv; @@ -1173,7 +1147,7 @@ this_mv.as_mv.col += 8; thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, xd->allow_high_precision_mv); + error_per_bit); if (right < bestmse) { *bestmv = this_mv; @@ -1186,8 +1160,7 @@ this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; @@ -1198,8 +1171,8 @@ this_mv.as_mv.row += 8; thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (down < bestmse) { *bestmv = this_mv; @@ -1238,8 +1211,8 @@ break; } - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, - xd->allow_high_precision_mv); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit); if (diag < bestmse) { *bestmv = this_mv; @@ -1326,7 +1299,8 @@ fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; // adjust ref_mv to make sure it is within MV range - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + clamp_mv(&ref_mv->as_mv, + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->as_mv.row; bc = ref_mv->as_mv.col; @@ -1482,7 +1456,8 @@ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + clamp_mv(&ref_mv->as_mv, + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; @@ -1580,11 +1555,9 @@ if (bestsad == INT_MAX) return INT_MAX; - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, + (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, + mvcost, x->errorperbit); } int vp9_diamond_search_sadx4(MACROBLOCK *x, @@ -1624,7 +1597,8 @@ fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + clamp_mv(&ref_mv->as_mv, + x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; @@ -1754,11 +1728,9 @@ if (bestsad == INT_MAX) return INT_MAX; - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, + (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, + center_mv, mvjcost, mvcost, x->errorperbit); } /* do_refine: If last step (1-away) of n-step search doesn't pick the center @@ -1914,8 +1886,7 @@ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2042,8 +2013,7 @@ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2197,8 +2167,7 @@ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2274,8 +2243,7 @@ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2381,8 +2349,7 @@ return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2472,12 +2439,10 @@ if (bestsad < INT_MAX) { // FIXME(rbultje, yunqing): add full-pixel averaging variance functions // so we don't have to use the subpixel with xoff=0,yoff=0 here. - int besterr = fn_ptr->svaf(best_address, in_what_stride, 0, 0, + return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, (unsigned int *)(&thissad), second_pred) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, - xd->allow_high_precision_mv); - return besterr; + mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); } else { return INT_MAX; }
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index c13ea75..097d33c 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h
@@ -25,7 +25,7 @@ void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv); int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, - int *mvcost[2], int weight, int ishp); + int *mvcost[2], int weight); void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 78992cf..f2fa552 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c
@@ -243,16 +243,17 @@ static void setup_features(VP9_COMP *cpi) { MACROBLOCKD *xd = &cpi->mb.e_mbd; - struct loopfilter *lf = &xd->lf; + struct loopfilter *const lf = &xd->lf; + struct segmentation *const seg = &xd->seg; // Set up default state for MB feature flags - xd->seg.enabled = 0; + seg->enabled = 0; - xd->seg.update_map = 0; - xd->seg.update_data = 0; - vpx_memset(xd->seg.tree_probs, 255, sizeof(xd->seg.tree_probs)); + seg->update_map = 0; + seg->update_data = 0; + vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); lf->mode_ref_delta_enabled = 0; lf->mode_ref_delta_update = 0; @@ -324,6 +325,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct segmentation *seg = &xd->seg; int high_q = (int)(cpi->avg_q > 48.0); int qi_delta; @@ -332,26 +334,26 @@ if (cm->frame_type == KEY_FRAME) { // Clear down the global segmentation map vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; cpi->static_mb_pct = 0; // Disable segmentation vp9_disable_segmentation((VP9_PTR)cpi); // Clear down the segment features. - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); } else if (cpi->refresh_alt_ref_frame) { // If this is an alt ref frame // Clear down the global segmentation map vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; cpi->static_mb_pct = 0; // Disable segmentation and individual segment features by default vp9_disable_segmentation((VP9_PTR)cpi); - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); // Scan frames from current to arf frame. // This function re-enables segmentation if appropriate. @@ -359,45 +361,45 @@ // If segmentation was enabled set those features needed for the // arf itself. - if (xd->seg.enabled) { - xd->seg.update_map = 1; - xd->seg.update_data = 1; + if (seg->enabled) { + seg->update_map = 1; + seg->update_data = 1; qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875)); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_LF, -2); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_Q); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_LF); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); // Where relevant assume segment data is delta data - xd->seg.abs_delta = SEGMENT_DELTADATA; + seg->abs_delta = SEGMENT_DELTADATA; } - } else if (xd->seg.enabled) { + } else if (seg->enabled) { // All other frames if segmentation has been enabled // First normal frame in a valid gf or alt ref group if (cpi->frames_since_golden == 0) { // Set up segment features for normal frames in an arf group if (cpi->source_alt_ref_active) { - xd->seg.update_map = 0; - xd->seg.update_data = 1; - xd->seg.abs_delta = SEGMENT_DELTADATA; + seg->update_map = 0; + seg->update_data = 1; + seg->abs_delta = SEGMENT_DELTADATA; qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 1.125)); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_ALT_LF, -2); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_ALT_LF); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); // Segment coding disabled for compred testing if (high_q || (cpi->static_mb_pct == 100)) { - vp9_set_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_SKIP); + vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); + vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); } } else { // Disable segmentation and clear down features if alt ref @@ -407,10 +409,10 @@ vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; - vp9_clearall_segfeatures(&xd->seg); + vp9_clearall_segfeatures(seg); } } else if (cpi->is_src_frame_alt_ref) { // Special case where we are coding over the top of a previous @@ -418,28 +420,28 @@ // Segment coding disabled for compred testing // Enable ref frame features for segment 0 as well - vp9_enable_segfeature(&xd->seg, 0, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_REF_FRAME); + vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME); + vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); // All mbs should use ALTREF_FRAME - vp9_clear_segdata(&xd->seg, 0, SEG_LVL_REF_FRAME); - vp9_set_segdata(&xd->seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); - vp9_clear_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME); - vp9_set_segdata(&xd->seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME); + vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); + vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME); + vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); // Skip all MBs if high Q (0,0 mv and skip coeffs) if (high_q) { - vp9_enable_segfeature(&xd->seg, 0, SEG_LVL_SKIP); - vp9_enable_segfeature(&xd->seg, 1, SEG_LVL_SKIP); + vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP); + vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); } // Enable data update - xd->seg.update_data = 1; + seg->update_data = 1; } else { // All other frames. // No updates.. leave things as they are. - xd->seg.update_map = 0; - xd->seg.update_data = 0; + seg->update_map = 0; + seg->update_data = 0; } } } @@ -718,7 +720,7 @@ sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->comp_inter_joint_search_thresh = BLOCK_SIZE_AB4X4; + sf->comp_inter_joint_search_thresh = BLOCK_4X4; sf->adaptive_rd_thresh = 0; sf->use_lastframe_partitioning = 0; sf->tx_size_search_method = USE_FULL_RD; @@ -734,9 +736,9 @@ sf->auto_min_max_partition_size = 0; sf->auto_min_max_partition_interval = 0; sf->auto_min_max_partition_count = 0; - sf->use_max_partition_size = 0; + // sf->use_max_partition_size = 0; sf->max_partition_size = BLOCK_64X64; - sf->use_min_partition_size = 0; + // sf->use_min_partition_size = 0; sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4; @@ -748,8 +750,8 @@ sf->use_uv_intra_rd_estimate = 0; sf->using_small_partition_info = 0; // Skip any mode not chosen at size < X for all sizes > X - // Hence BLOCK_SIZE_SB64X64 (skip is off) - sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64; + // Hence BLOCK_64X64 (skip is off) + sf->unused_mode_skip_lvl = BLOCK_64X64; #if CONFIG_MULTIPLE_ARF // Switch segmentation off. @@ -785,7 +787,7 @@ cpi->common.show_frame == 0); sf->disable_splitmv = (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; - sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32; + sf->unused_mode_skip_lvl = BLOCK_32X32; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA; @@ -795,8 +797,8 @@ sf->auto_mv_step_size = 1; sf->auto_min_max_partition_size = 1; - sf->use_max_partition_size = 1; - sf->use_min_partition_size = 1; + // sf->use_max_partition_size = 1; + // sf->use_min_partition_size = 1; sf->auto_min_max_partition_interval = 1; } if (speed == 2) { @@ -807,7 +809,7 @@ sf->use_lastframe_partitioning = 1; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; - sf->unused_mode_skip_lvl = BLOCK_SIZE_SB32X32; + sf->unused_mode_skip_lvl = BLOCK_32X32; sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only || cpi->common.show_frame == 0) ? @@ -846,7 +848,7 @@ if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; sf->use_one_partition_size_always = 1; - sf->always_this_block_size = BLOCK_SIZE_MB16X16; + sf->always_this_block_size = BLOCK_16X16; sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME || cpi->common.intra_only || cpi->common.show_frame == 0) ? @@ -867,15 +869,15 @@ /* if (speed == 2) { sf->first_step = 0; - sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8; + sf->comp_inter_joint_search_thresh = BLOCK_8X8; sf->use_max_partition_size = 1; - sf->max_partition_size = BLOCK_SIZE_MB16X16; + sf->max_partition_size = BLOCK_16X16; } if (speed == 3) { sf->first_step = 0; - sf->comp_inter_joint_search_thresh = BLOCK_SIZE_SB8X8; + sf->comp_inter_joint_search_thresh = BLOCK_B8X8; sf->use_min_partition_size = 1; - sf->min_partition_size = BLOCK_SIZE_SB8X8; + sf->min_partition_size = BLOCK_8X8; } */ @@ -2415,8 +2417,9 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct loopfilter *lf = &xd->lf; if (xd->lossless) { - xd->lf.filter_level = 0; + lf->filter_level = 0; } else { struct vpx_usec_timer timer; @@ -2430,9 +2433,9 @@ cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } - if (xd->lf.filter_level > 0) { - vp9_set_alt_lf_level(cpi, xd->lf.filter_level); - vp9_loop_filter_frame(cm, xd, xd->lf.filter_level, 0); + if (lf->filter_level > 0) { + vp9_set_alt_lf_level(cpi, lf->filter_level); + vp9_loop_filter_frame(cm, xd, lf->filter_level, 0); } vp9_extend_frame_inner_borders(cm->frame_to_show, @@ -2522,6 +2525,7 @@ SPEED_FEATURES *sf = &cpi->sf; unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height); + struct segmentation *seg = &xd->seg; #if RESET_FOREACH_FILTER int q_low0; int q_high0; @@ -2621,9 +2625,9 @@ setup_features(cpi); // If segmentation is enabled force a map update for key frames - if (xd->seg.enabled) { - xd->seg.update_map = 1; - xd->seg.update_data = 1; + if (seg->enabled) { + seg->update_map = 1; + seg->update_data = 1; } // The alternate reference frame cannot be active for a key frame @@ -4002,7 +4006,7 @@ unsigned int threshold[MAX_SEGMENTS]) { VP9_COMP *cpi = (VP9_COMP *) comp; signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS]; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct segmentation *seg = &cpi->mb.e_mbd.seg; int i; if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) @@ -4029,14 +4033,14 @@ // Enable the loop and quant changes in the feature mask for (i = 0; i < MAX_SEGMENTS; i++) { if (delta_q[i]) - vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); else - vp9_disable_segfeature(&xd->seg, i, SEG_LVL_ALT_Q); + vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q); if (delta_lf[i]) - vp9_enable_segfeature(&xd->seg, i, SEG_LVL_ALT_LF); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF); else - vp9_disable_segfeature(&xd->seg, i, SEG_LVL_ALT_LF); + vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF); } // Initialise the feature data structure
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index f4d8db4..4529ff0 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h
@@ -261,10 +261,10 @@ int auto_min_max_partition_size; int auto_min_max_partition_interval; int auto_min_max_partition_count; - int use_min_partition_size; BLOCK_SIZE_TYPE min_partition_size; - int use_max_partition_size; BLOCK_SIZE_TYPE max_partition_size; + // int use_min_partition_size; // not used in code + // int use_max_partition_size; int adjust_partitioning_from_last_frame; int last_partitioning_redo_frequency; int disable_splitmv;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 4e96210..f9c8f6e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c
@@ -453,7 +453,7 @@ int *out_rate_sum, int64_t *out_dist_sum, int *out_skip) { int t = 4, j, k; - BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4; + BLOCK_SIZE_TYPE bs = BLOCK_4X4; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const int width = plane_block_width(bsize, pd); @@ -525,7 +525,7 @@ { 1, 2, 3, 4, 11, 1024 - 21, 0 }, }; -static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, +static INLINE int cost_coeffs(MACROBLOCK *mb, int plane, int block, PLANE_TYPE type, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, @@ -646,7 +646,7 @@ txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx, &y_idx); - args->rate += cost_coeffs(args->cm, args->x, plane, block, + args->rate += cost_coeffs(args->x, plane, block, xd->plane[plane].plane_type, args->t_above + x_idx, args->t_left + y_idx, args->tx_size, args->scan, args->nb); @@ -838,7 +838,7 @@ int64_t ref_best_rd, BLOCK_SIZE_TYPE bs) { const TX_SIZE max_txfm_size = TX_32X32 - - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); + - (bs < BLOCK_32X32) - (bs < BLOCK_16X16); VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -866,10 +866,10 @@ int (*r)[2], int *rate, int64_t *d, int64_t *distortion, int *s, int *skip, - int64_t txfm_cache[TX_MODES], + int64_t tx_cache[TX_MODES], BLOCK_SIZE_TYPE bs) { - const TX_SIZE max_txfm_size = TX_32X32 - - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); + const TX_SIZE max_tx_size = TX_32X32 + - (bs < BLOCK_32X32) - (bs < BLOCK_16X16); VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -880,11 +880,11 @@ const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs); - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { r[n][1] = r[n][0]; if (r[n][0] == INT_MAX) continue; - for (m = 0; m <= n - (n == max_txfm_size); m++) { + for (m = 0; m <= n - (n == max_tx_size); m++) { if (m == n) r[n][1] += vp9_cost_zero(tx_probs[m]); else @@ -896,7 +896,7 @@ s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { if (d[n] == INT64_MAX) { rd[n][0] = rd[n][1] = INT64_MAX; continue; @@ -909,13 +909,13 @@ } } - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && (cm->tx_mode == ALLOW_32X32 || (cm->tx_mode == TX_MODE_SELECT && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; - } else if (max_txfm_size >= TX_16X16 && + } else if (max_tx_size >= TX_16X16 && (cm->tx_mode == ALLOW_16X16 || cm->tx_mode == ALLOW_32X32 || (cm->tx_mode == TX_MODE_SELECT && @@ -935,34 +935,34 @@ *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT]; *skip = s[mbmi->txfm_size]; - txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; - txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; - txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; - txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; - if (max_txfm_size == TX_32X32 && + tx_cache[ONLY_4X4] = rd[TX_4X4][0]; + tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; + tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; + tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; + if (max_tx_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) - txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (max_txfm_size >= TX_16X16 && + tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; + else if (max_tx_size >= TX_16X16 && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) - txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; + tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; else - txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? + tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? rd[TX_4X4][1] : rd[TX_8X8][1]; - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) { cpi->txfm_stepdown_count[0]++; - } else if (max_txfm_size >= TX_16X16 && + } else if (max_tx_size >= TX_16X16 && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++; + cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++; + cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++; } else { - cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++; + cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++; } } @@ -974,7 +974,7 @@ BLOCK_SIZE_TYPE bs, int *model_used) { const TX_SIZE max_txfm_size = TX_32X32 - - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); + - (bs < BLOCK_32X32) - (bs < BLOCK_16X16); VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -1098,48 +1098,46 @@ if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && mbmi->ref_frame[0] > INTRA_FRAME) { int model_used[TX_SIZES] = {1, 1, 1, 1}; - if (bs >= BLOCK_SIZE_SB32X32) { - if (model_used[TX_32X32]) { + if (bs >= BLOCK_32X32) { + if (model_used[TX_32X32]) model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], &sse[TX_32X32], INT64_MAX, bs, TX_32X32); - } } - if (bs >= BLOCK_SIZE_MB16X16) { - if (model_used[TX_16X16]) { + if (bs >= BLOCK_16X16) { + if (model_used[TX_16X16]) model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], &sse[TX_16X16], INT64_MAX, bs, TX_16X16); - } } - if (model_used[TX_8X8]) { + if (model_used[TX_8X8]) model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], &sse[TX_8X8], INT64_MAX, bs, TX_8X8); - } - if (model_used[TX_4X4]) { + + if (model_used[TX_4X4]) model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - } else { + else super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], &sse[TX_4X4], INT64_MAX, bs, TX_4X4); - } + choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, skip, sse, ref_best_rd, bs, model_used); } else { - if (bs >= BLOCK_SIZE_SB32X32) + if (bs >= BLOCK_32X32) super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], &sse[TX_32X32], ref_best_rd, bs, TX_32X32); - if (bs >= BLOCK_SIZE_MB16X16) + if (bs >= BLOCK_16X16) super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], &sse[TX_16X16], ref_best_rd, bs, TX_16X16); @@ -1188,7 +1186,6 @@ int64_t best_rd = rd_thresh; int rate = 0; int64_t distortion; - VP9_COMMON *const cm = &cpi->common; struct macroblock_plane *p = &x->plane[0]; struct macroblockd_plane *pd = &xd->plane[0]; const int src_stride = p->src.stride; @@ -1202,8 +1199,8 @@ ENTROPY_CONTEXT ta[2], tempa[2]; ENTROPY_CONTEXT tl[2], templ[2]; TX_TYPE tx_type = DCT_DCT; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy, block; uint8_t best_dst[8 * 8]; @@ -1260,7 +1257,7 @@ } scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block)); - ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, + ratey += cost_coeffs(x, 0, block, PLANE_TYPE_Y_WITH_DC, tempa + idx, templ + idy, TX_4X4, scan, vp9_get_coef_neighbors_handle(scan)); distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, @@ -1307,17 +1304,20 @@ return best_rd; } -static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, - int *Rate, int *rate_y, - int64_t *Distortion, int64_t best_rd) { +static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, + MACROBLOCK * const mb, + int * const rate, + int * const rate_y, + int64_t * const distortion, + int64_t best_rd) { int i, j; MACROBLOCKD *const xd = &mb->e_mbd; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; int cost = 0; - int64_t distortion = 0; + int64_t total_distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; @@ -1355,7 +1355,7 @@ total_rd += this_rd; cost += r; - distortion += d; + total_distortion += d; tot_rate_y += ry; mic->bmi[i].as_mode = best_mode; @@ -1369,19 +1369,19 @@ } } - *Rate = cost; + *rate = cost; *rate_y = tot_rate_y; - *Distortion = distortion; + *distortion = total_distortion; xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode; - return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); + return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); } static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE_TYPE bsize, - int64_t txfm_cache[TX_MODES], + int64_t tx_cache[TX_MODES], int64_t best_rd) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); @@ -1392,14 +1392,13 @@ int i; int *bmode_costs = x->mbmode_cost; - if (cpi->sf.tx_size_search_method == USE_FULL_RD) { + if (cpi->sf.tx_size_search_method == USE_FULL_RD) for (i = 0; i < TX_MODES; i++) - txfm_cache[i] = INT64_MAX; - } + tx_cache[i] = INT64_MAX; /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int64_t local_txfm_cache[TX_MODES]; + int64_t local_tx_cache[TX_MODES]; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; @@ -1413,7 +1412,7 @@ x->e_mbd.mode_info_context->mbmi.mode = mode; super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, - bsize, local_txfm_cache, best_rd); + bsize, local_tx_cache, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1433,10 +1432,10 @@ if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) { for (i = 0; i < TX_MODES; i++) { - int64_t adj_rd = this_rd + local_txfm_cache[i] - - local_txfm_cache[cpi->common.tx_mode]; - if (adj_rd < txfm_cache[i]) { - txfm_cache[i] = adj_rd; + const int64_t adj_rd = this_rd + local_tx_cache[i] - + local_tx_cache[cpi->common.tx_mode]; + if (adj_rd < tx_cache[i]) { + tx_cache[i] = adj_rd; } } } @@ -1599,8 +1598,8 @@ MB_MODE_INFO * mbmi = &mic->mbmi; int cost = 0, thismvcost = 0; int idx, idy; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array @@ -1613,12 +1612,11 @@ case NEWMV: this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, - 102, xd->allow_high_precision_mv); + 102); if (mbmi->ref_frame[1] > 0) { this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, - mvjcost, mvcost, 102, - xd->allow_high_precision_mv); + mvjcost, mvcost, 102); } break; case NEARESTMV: @@ -1668,11 +1666,12 @@ ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl) { int k; - VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; - const int width = plane_block_width(bsize, &xd->plane[0]); - const int height = plane_block_height(bsize, &xd->plane[0]); + struct macroblockd_plane *const pd = &xd->plane[0]; + MODE_INFO *const mi = xd->mode_info_context; + const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; + const int width = plane_block_width(bsize, pd); + const int height = plane_block_height(bsize, pd); int idx, idy; const int src_stride = x->plane[0].src.stride; uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, @@ -1682,39 +1681,33 @@ x->plane[0].src_diff); int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[0].buf, - xd->plane[0].pre[0].stride); + pd->pre[0].buf, + pd->pre[0].stride); uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); + pd->dst.buf, + pd->dst.stride); int64_t thisdistortion = 0, thissse = 0; int thisrate = 0; - vp9_build_inter_predictor(pre, - xd->plane[0].pre[0].stride, - dst, - xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[0], + vp9_build_inter_predictor(pre, pd->pre[0].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[0].as_mv, &xd->scale_factor[0], - width, height, 0, &xd->subpix, - MV_PRECISION_Q3); + width, height, 0, &xd->subpix, MV_PRECISION_Q3); - if (xd->mode_info_context->mbmi.ref_frame[1] > 0) { + if (mi->mbmi.ref_frame[1] > 0) { uint8_t* const second_pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[1].buf, - xd->plane[0].pre[1].stride); - vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, - dst, xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[1], + pd->pre[1].buf, pd->pre[1].stride); + vp9_build_inter_predictor(second_pre, pd->pre[1].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[1].as_mv, &xd->scale_factor[1], - width, height, 1, - &xd->subpix, MV_PRECISION_Q3); + width, height, 1, &xd->subpix, MV_PRECISION_Q3); } - vp9_subtract_block(height, width, src_diff, 8, - src, src_stride, - dst, xd->plane[0].dst.stride); + vp9_subtract_block(height, width, src_diff, 8, src, src_stride, + dst, pd->dst.stride); k = i; for (idy = 0; idy < height / 4; ++idy) { @@ -1727,11 +1720,10 @@ coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); x->fwd_txm4x4(src_diff, coeff, 16); x->quantize_b_4x4(x, k, DCT_DCT, 16); - thisdistortion += vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, - k, 16), 16, &ssz); + thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k, 16), + 16, &ssz); thissse += ssz; - thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, + thisrate += cost_coeffs(x, 0, k, PLANE_TYPE_Y_WITH_DC, ta + (k & 1), tl + (k >> 1), TX_4X4, vp9_default_scan_4x4, @@ -1826,8 +1818,8 @@ int label_mv_thresh; int segmentyrate = 0; BLOCK_SIZE_TYPE bsize = mbmi->sb_type; - int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; vp9_variance_fn_ptr_t *v_fn_ptr; ENTROPY_CONTEXT t_above[2], t_left[2]; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; @@ -1861,12 +1853,12 @@ vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, &frame_mv[NEARESTMV][mbmi->ref_frame[0]], &frame_mv[NEARMV][mbmi->ref_frame[0]], - i, 0); + i, 0, mi_row, mi_col); if (mbmi->ref_frame[1] > 0) vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, &frame_mv[NEARESTMV][mbmi->ref_frame[1]], &frame_mv[NEARMV][mbmi->ref_frame[1]], - i, 1); + i, 1, mi_row, mi_col); // search for the best motion vector on this segment for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { @@ -1974,7 +1966,7 @@ // Should we do a full search (best quality only) if (cpi->compressor_speed == 0) { /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, @@ -2348,7 +2340,7 @@ int_mv *ref_mv, int_mv *second_ref_mv, int64_t comp_pred_diff[NB_PREDICTION_TYPES], - int64_t txfm_size_diff[TX_MODES], + int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) { MACROBLOCKD *const xd = &x->e_mbd; @@ -2370,7 +2362,7 @@ // FIXME(rbultje) does this memcpy the whole array? I believe sizeof() // doesn't actually work this way - memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); + memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); memcpy(ctx->best_filter_diff, best_filter_diff, sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1)); } @@ -2434,7 +2426,7 @@ xd->prev_mode_info_context, frame_type, mbmi->ref_mvs[frame_type], - cpi->common.ref_frame_sign_bias); + cpi->common.ref_frame_sign_bias, mi_row, mi_col); // Candidate refinement carried out at encoder and decoder vp9_find_best_ref_mvs(xd, @@ -2459,7 +2451,7 @@ return scaled_ref_frame; } -static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { +static INLINE int get_switchable_rate(MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -2565,7 +2557,7 @@ } *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, x->nmvjointcost, x->mvcost, - 96, xd->allow_high_precision_mv); + 96); if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) @@ -2653,7 +2645,7 @@ vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, - &frame_mv[refs[!id]], + &frame_mv[refs[!id]].as_mv, &xd->scale_factor[!id], pw, ph, 0, &xd->subpix, MV_PRECISION_Q3); @@ -2720,12 +2712,10 @@ } *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], &mbmi->ref_mvs[refs[0]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); + x->nmvjointcost, x->mvcost, 96); *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], &mbmi->ref_mvs[refs[1]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); + x->nmvjointcost, x->mvcost, 96); vpx_free(second_pred); } @@ -2778,12 +2768,10 @@ } else { rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], &mbmi->ref_mvs[refs[0]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); + x->nmvjointcost, x->mvcost, 96); rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], &mbmi->ref_mvs[refs[1]][0], - x->nmvjointcost, x->mvcost, 96, - x->e_mbd.allow_high_precision_mv); + x->nmvjointcost, x->mvcost, 96); } if (frame_mv[refs[0]].as_int == INVALID_MV || frame_mv[refs[1]].as_int == INVALID_MV) @@ -2838,10 +2826,8 @@ for (i = 0; i < num_refs; ++i) { cur_mv[i] = frame_mv[refs[i]]; // Clip "next_nearest" so that it does not extend to far out of image - if (this_mode == NEWMV) - assert(!clamp_mv2(&cur_mv[i], xd)); - else - clamp_mv2(&cur_mv[i], xd); + if (this_mode != NEWMV) + clamp_mv2(&cur_mv[i].as_mv, xd); if (mv_check_bounds(x, &cur_mv[i])) return INT64_MAX; @@ -2900,7 +2886,7 @@ const int is_intpel_interp = intpel_mv; mbmi->interp_filter = filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = get_switchable_rate(cm, x); + rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (interpolating_intpel_seen && is_intpel_interp) { @@ -2981,7 +2967,7 @@ mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? cm->mcomp_filter_type : *best_filter; vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0); + rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -3015,7 +3001,7 @@ } if (cpi->common.mcomp_filter_type == SWITCHABLE) - *rate2 += get_switchable_rate(cm, x); + *rate2 += get_switchable_rate(x); if (!is_comp_pred) { if (cpi->active_map_enabled && x->active_ptr[0] == 0) @@ -3142,15 +3128,13 @@ MACROBLOCKD *const xd = &x->e_mbd; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip; - int64_t dist_y = 0, dist_uv = 0, txfm_cache[TX_MODES]; - + int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; x->skip_encode = 0; - vpx_memset(&txfm_cache, 0, sizeof(txfm_cache)); ctx->skip = 0; xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; if (bsize >= BLOCK_SIZE_SB8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, bsize, txfm_cache, + &dist_y, &y_skip, bsize, tx_cache, best_rd) >= best_rd) { *returnrate = INT_MAX; return; @@ -3159,8 +3143,8 @@ &dist_uv, &uv_skip, bsize); } else { y_skip = 0; - if (rd_pick_intra4x4mby_modes(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, best_rd) >= best_rd) { + if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, best_rd) >= best_rd) { *returnrate = INT_MAX; return; } @@ -3172,17 +3156,15 @@ *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); *returndist = dist_y + (dist_uv >> 2); - memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); + vp9_zero(ctx->tx_rd_diff); } else { int i; *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); *returndist = dist_y + (dist_uv >> 2); - if (cpi->sf.tx_size_search_method == USE_FULL_RD) { - for (i = 0; i < TX_MODES; i++) { - ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->tx_mode]; - } - } + if (cpi->sf.tx_size_search_method == USE_FULL_RD) + for (i = 0; i < TX_MODES; i++) + ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; } ctx->mic = *xd->mode_info_context; @@ -3198,6 +3180,7 @@ VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const struct segmentation *seg = &xd->seg; const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; @@ -3214,8 +3197,8 @@ cpi->alt_fb_idx}; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise - int64_t best_txfm_rd[TX_MODES]; - int64_t best_txfm_diff[TX_MODES]; + int64_t best_tx_rd[TX_MODES]; + int64_t best_tx_diff[TX_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; int64_t best_pred_rd[NB_PREDICTION_TYPES]; int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1]; @@ -3271,7 +3254,7 @@ for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < TX_MODES; i++) - best_txfm_rd[i] = INT64_MAX; + best_tx_rd[i] = INT64_MAX; for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) best_filter_rd[i] = INT64_MAX; for (i = 0; i < TX_SIZES; i++) @@ -3330,14 +3313,14 @@ int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable; - int64_t txfm_cache[TX_MODES]; + int64_t tx_cache[TX_MODES]; int i; int this_skip2 = 0; int64_t total_sse = INT_MAX; int early_term = 0; for (i = 0; i < TX_MODES; ++i) - txfm_cache[i] = INT64_MAX; + tx_cache[i] = INT64_MAX; x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; @@ -3363,7 +3346,7 @@ // Do not allow compound prediction if the segment level reference // frame feature is in use as in this case there can only be one reference. if ((second_ref_frame > INTRA_FRAME) && - vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) + vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; // Skip some checking based on small partitions' result. @@ -3453,7 +3436,7 @@ } } - // Select predictors + // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; if (comp_pred) @@ -3462,19 +3445,19 @@ // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME) && - vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) != + if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && + vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; // If the segment skip feature is enabled.... // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP) && + } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. - } else if (!vp9_segfeature_active(&xd->seg, segment_id, + } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, // unless ARNR filtering is enabled in which case we want @@ -3510,9 +3493,10 @@ continue; */ + // I4X4_PRED is only considered for block sizes less than 8x8. mbmi->txfm_size = TX_4X4; - if (rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, - &distortion_y, best_rd) >= best_rd) + if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, + &distortion_y, best_rd) >= best_rd) continue; rate2 += rate; rate2 += intra_cost_penalty; @@ -3529,9 +3513,9 @@ distortion2 += dist_uv[TX_4X4]; distortion_uv = dist_uv[TX_4X4]; mbmi->uv_mode = mode_uv[TX_4X4]; - txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); for (i = 0; i < TX_MODES; ++i) - txfm_cache[i] = txfm_cache[ONLY_4X4]; + tx_cache[i] = tx_cache[ONLY_4X4]; } else if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; // Only search the oblique modes if the best so far is @@ -3546,7 +3530,7 @@ continue; } super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, - bsize, txfm_cache, best_rd); + bsize, tx_cache, best_rd); if (rate_y == INT_MAX) continue; @@ -3626,7 +3610,7 @@ if (tmp_rd == INT64_MAX) continue; cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; - rs = get_switchable_rate(cm, x); + rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd); @@ -3688,7 +3672,7 @@ continue; } else { if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = get_switchable_rate(cm, x); + int rs = get_switchable_rate(x); tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); } tmp_rd = tmp_best_rdu; @@ -3707,7 +3691,7 @@ distortion2 += distortion; if (cpi->common.mcomp_filter_type == SWITCHABLE) - rate2 += get_switchable_rate(cm, x); + rate2 += get_switchable_rate(x); if (!mode_excluded) { if (is_comp_pred) @@ -3732,14 +3716,14 @@ skippable = skippable && uv_skippable; total_sse += uv_sse; - txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); for (i = 0; i < TX_MODES; ++i) - txfm_cache[i] = txfm_cache[ONLY_4X4]; + tx_cache[i] = tx_cache[ONLY_4X4]; } } else { compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME); this_rd = handle_inter_mode(cpi, x, bsize, - txfm_cache, + tx_cache, &rate2, &distortion2, &skippable, &rate_y, &distortion_y, &rate_uv, &distortion_uv, @@ -3769,7 +3753,7 @@ // necessary adjustment for rate. Ignore if skip is coded at // segment level as the cost wont have been added in. // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(&xd->seg, segment_id, + const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP); if (skippable && bsize >= BLOCK_SIZE_SB8X8) { @@ -3960,25 +3944,25 @@ } /* keep record of best txfm size */ - if (bsize < BLOCK_SIZE_SB32X32) { - if (bsize < BLOCK_SIZE_MB16X16) { + if (bsize < BLOCK_32X32) { + if (bsize < BLOCK_16X16) { if (this_mode == SPLITMV || this_mode == I4X4_PRED) - txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; - txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; + tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; + tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; } - txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; + tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; } if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < TX_MODES; i++) { int64_t adj_rd = INT64_MAX; if (this_mode != I4X4_PRED) { - adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->tx_mode]; + adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; } else { adj_rd = this_rd; } - if (adj_rd < best_txfm_rd[i]) - best_txfm_rd[i] = adj_rd; + if (adj_rd < best_tx_rd[i]) + best_tx_rd[i] = adj_rd; } } @@ -4047,7 +4031,7 @@ (cm->mcomp_filter_type == best_mbmode.interp_filter) || (best_mbmode.ref_frame[0] == INTRA_FRAME)); - // Updating rd_thresh_freq_fact[] here means that the differnt + // Updating rd_thresh_freq_fact[] here means that the different // partition/block sizes are handled independently based on the best // choice for the current partition. It may well be better to keep a scaled // best rd so far value and update rd_thresh_freq_fact based on the mode/size @@ -4130,13 +4114,13 @@ if (!x->skip) { for (i = 0; i < TX_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = 0; + if (best_tx_rd[i] == INT64_MAX) + best_tx_diff[i] = 0; else - best_txfm_diff[i] = best_rd - best_txfm_rd[i]; + best_tx_diff[i] = best_rd - best_tx_rd[i]; } } else { - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); + vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff)); } set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], @@ -4146,7 +4130,7 @@ &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]][0], - best_pred_diff, best_txfm_diff, best_filter_diff); + best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; }
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index ef84cc5..9564edc 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c
@@ -57,8 +57,7 @@ } // Based on set of segment counts calculate a probability tree -static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts, - vp9_prob *segment_tree_probs) { +static void calc_segtree_probs(int *segcounts, vp9_prob *segment_tree_probs) { // Work out probabilities of each segment const int c01 = segcounts[0] + segcounts[1]; const int c23 = segcounts[2] + segcounts[3]; @@ -75,7 +74,7 @@ } // Based on set of segment counts and probabilities calculate a cost estimate -static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) { +static int cost_segmap(int *segcounts, vp9_prob *probs) { const int c01 = segcounts[0] + segcounts[1]; const int c23 = segcounts[2] + segcounts[3]; const int c45 = segcounts[4] + segcounts[5]; @@ -189,13 +188,13 @@ int n; assert(bwl < bsl && bhl < bsl); - if (bsize == BLOCK_SIZE_SB64X64) { - subsize = BLOCK_SIZE_SB32X32; - } else if (bsize == BLOCK_SIZE_SB32X32) { - subsize = BLOCK_SIZE_MB16X16; + if (bsize == BLOCK_64X64) { + subsize = BLOCK_32X32; + } else if (bsize == BLOCK_32X32) { + subsize = BLOCK_16X16; } else { - assert(bsize == BLOCK_SIZE_MB16X16); - subsize = BLOCK_SIZE_SB8X8; + assert(bsize == BLOCK_16X16); + subsize = BLOCK_8X8; } for (n = 0; n < 4; n++) { @@ -211,7 +210,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + struct segmentation *seg = &cpi->mb.e_mbd.seg; int no_pred_cost; int t_pred_cost = INT_MAX; @@ -231,8 +230,8 @@ // Set default state for the segment tree probabilities and the // temporal coding probabilities - vpx_memset(xd->seg.tree_probs, 255, sizeof(xd->seg.tree_probs)); - vpx_memset(xd->seg.pred_probs, 255, sizeof(xd->seg.pred_probs)); + vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); + vpx_memset(seg->pred_probs, 255, sizeof(seg->pred_probs)); vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts)); vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts)); @@ -249,21 +248,21 @@ for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; mi_col += 8, mi += 8) count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count, - t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64); + t_unpred_seg_counts, mi_row, mi_col, BLOCK_64X64); } } // Work out probability tree for coding segments without prediction // and the cost. - calc_segtree_probs(xd, no_pred_segcounts, no_pred_tree); - no_pred_cost = cost_segmap(xd, no_pred_segcounts, no_pred_tree); + calc_segtree_probs(no_pred_segcounts, no_pred_tree); + no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree); // Key frames cannot use temporal prediction if (cm->frame_type != KEY_FRAME) { // Work out probability tree for coding those segments not // predicted using the temporal method and the cost. - calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree); - t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree); + calc_segtree_probs(t_unpred_seg_counts, t_pred_tree); + t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree); // Add in the cost of the signalling for each prediction context for (i = 0; i < PREDICTION_PROBS; i++) { @@ -280,11 +279,11 @@ // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { - xd->seg.temporal_update = 1; - vpx_memcpy(xd->seg.tree_probs, t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(xd->seg.pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); + seg->temporal_update = 1; + vpx_memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree)); + vpx_memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { - xd->seg.temporal_update = 0; - vpx_memcpy(xd->seg.tree_probs, no_pred_tree, sizeof(no_pred_tree)); + seg->temporal_update = 0; + vpx_memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree)); } }
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 821b7c6..a692c01 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c
@@ -40,10 +40,7 @@ int mv_col, uint8_t *pred) { const int which_mv = 0; - int_mv mv; - - mv.as_mv.row = mv_row; - mv.as_mv.col = mv_col; + MV mv = { mv_row, mv_col }; vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16,
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 27831aa..caa89b2 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c
@@ -40,7 +40,7 @@ static void fill_value_tokens() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; - vp9_extra_bit *const e = vp9_extra_bits; + const vp9_extra_bit *const e = vp9_extra_bits; int i = -DCT_MAX_VALUE; int sign = 1; @@ -69,7 +69,7 @@ // initialize the cost for extra bits for all possible coefficient value. { int cost = 0; - vp9_extra_bit *p = vp9_extra_bits + t[i].token; + const vp9_extra_bit *p = vp9_extra_bits + t[i].token; if (p->base_val) { const int extra = t[i].extra; @@ -110,12 +110,14 @@ ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff; ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff; const int eob = xd->plane[plane].eobs[block]; + const int tx_size_in_blocks = 1 << tx_size; if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - set_contexts_on_border(xd, bsize, plane, tx_size, eob, aoff, loff, A, L); + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff, + A, L); } else { - vpx_memset(A, eob > 0, sizeof(ENTROPY_CONTEXT) * (1 << tx_size)); - vpx_memset(L, eob > 0, sizeof(ENTROPY_CONTEXT) * (1 << tx_size)); + vpx_memset(A, eob > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + vpx_memset(L, eob > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } } @@ -125,7 +127,8 @@ VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; - TX_SIZE tx_size = ss_txfrm_size >> 1; + const TX_SIZE tx_size = ss_txfrm_size >> 1; + const int tx_size_in_blocks = 1 << tx_size; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ int c = 0, rc = 0; @@ -145,7 +148,7 @@ const int16_t *scan, *nb; vp9_coeff_count *counts; vp9_coeff_probs_model *coef_probs; - const int ref = mbmi->ref_frame[0] != INTRA_FRAME; + const int ref = is_inter_block(mbmi); ENTROPY_CONTEXT above_ec, left_ec; uint8_t token_cache[1024]; const uint8_t *band_translate; @@ -225,10 +228,11 @@ *tp = t; if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - set_contexts_on_border(xd, bsize, plane, tx_size, c, aoff, loff, A, L); + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, c, aoff, loff, + A, L); } else { - vpx_memset(A, c > 0, sizeof(ENTROPY_CONTEXT) * (1 << tx_size)); - vpx_memset(L, c > 0, sizeof(ENTROPY_CONTEXT) * (1 << tx_size)); + vpx_memset(A, c > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + vpx_memset(L, c > 0, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } }
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 5a0c1c9..b2b2a80 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk
@@ -74,7 +74,6 @@ VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm @@ -83,6 +82,10 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm endif +ifeq ($(USE_X86INC),yes) +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm +endif + VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c @@ -91,5 +94,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM) $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))