Implement ERP tool in single patch STATS_CHANGED for some changes outside config flags Change-Id: Ia41604d01d6dcc95d8c010b2bca5c368ed05fdec
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 9e6b5b1..d1d2b9a 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -107,6 +107,7 @@ specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/; specialize qw/aom_dc_left_predictor_8x16 sse2/; specialize qw/aom_dc_left_predictor_8x32 sse2/; + specialize qw/aom_dc_left_predictor_16x4 sse2/; specialize qw/aom_dc_left_predictor_16x8 sse2/; specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/; @@ -195,9 +196,6 @@ specialize qw/aom_paeth_predictor_64x32 ssse3 avx2/; specialize qw/aom_paeth_predictor_64x64 ssse3 avx2/; specialize qw/aom_paeth_predictor_64x16 ssse3 avx2/; -specialize qw/aom_paeth_predictor_16x8 ssse3/; -specialize qw/aom_paeth_predictor_16x16 ssse3/; -specialize qw/aom_paeth_predictor_16x32 ssse3/; specialize qw/aom_paeth_predictor_32x16 ssse3/; specialize qw/aom_paeth_predictor_32x32 ssse3/; specialize qw/aom_smooth_predictor_4x4 neon ssse3/; @@ -322,13 +320,13 @@ specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/; specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/; specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/; -specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/; specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/; specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/; +specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/; specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/; +specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/; specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/; specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/; -specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/; specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/; specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/; specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/; @@ -666,6 +664,7 @@ specialize qw/aom_sad16x32 msa sse2/; specialize qw/aom_sad16x16 neon msa sse2/; specialize qw/aom_sad16x8 neon msa sse2/; + specialize qw/aom_sad16x4 sse2/; specialize qw/aom_sad8x16 neon msa sse2/; specialize qw/aom_sad8x8 neon msa sse2/; specialize qw/aom_sad8x4 msa sse2/; @@ -673,7 +672,6 @@ specialize qw/aom_sad4x4 neon msa sse2/; specialize qw/aom_sad4x16 sse2/; - specialize qw/aom_sad16x4 sse2/; specialize qw/aom_sad8x32 sse2/; specialize qw/aom_sad32x8 sse2/; specialize qw/aom_sad16x64 sse2/; @@ -711,6 +709,7 @@ specialize qw/aom_sad16x32_avg msa sse2/; specialize qw/aom_sad16x16_avg msa sse2/; specialize qw/aom_sad16x8_avg msa sse2/; + specialize qw/aom_sad16x4_avg sse2/; specialize qw/aom_sad8x16_avg msa sse2/; specialize qw/aom_sad8x8_avg msa sse2/; specialize qw/aom_sad8x4_avg msa sse2/; @@ -718,7 +717,6 @@ specialize qw/aom_sad4x4_avg msa sse2/; specialize qw/aom_sad4x16_avg sse2/; - specialize qw/aom_sad16x4_avg sse2/; specialize qw/aom_sad8x32_avg sse2/; specialize qw/aom_sad32x8_avg sse2/; specialize qw/aom_sad16x64_avg sse2/; @@ -894,9 +892,9 @@ specialize qw/aom_sad32x16x4d avx2 msa sse2/; specialize qw/aom_sad32x8x4d avx2 sse2/; specialize qw/aom_sad16x64x4d sse2/; - specialize qw/aom_sad16x32x4d msa sse2/; - specialize qw/aom_sad16x16x4d neon msa sse2/; - specialize qw/aom_sad16x8x4d msa sse2/; + specialize qw/aom_sad16x32x4d msa sse2/; + specialize qw/aom_sad16x8x4d msa sse2/; + specialize qw/aom_sad16x16x4d neon msa sse2/; specialize qw/aom_sad8x16x4d msa sse2/; specialize qw/aom_sad8x8x4d msa sse2/;
diff --git a/aom_dsp/x86/aom_convolve_copy_avx2.c b/aom_dsp/x86/aom_convolve_copy_avx2.c index 39c6a40..d3ac810 100644 --- a/aom_dsp/x86/aom_convolve_copy_avx2.c +++ b/aom_dsp/x86/aom_convolve_copy_avx2.c
@@ -27,7 +27,6 @@ void aom_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h) { if (w >= 16) { - assert(!((intptr_t)dst % 16)); assert(!(dst_stride % 16)); } @@ -71,9 +70,9 @@ src += src_stride; s[1] = _mm_loadu_si128((__m128i *)src); src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); + _mm_storeu_si128((__m128i *)dst, s[0]); dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); + _mm_storeu_si128((__m128i *)dst, s[1]); dst += dst_stride; h -= 2; } while (h); @@ -157,7 +156,6 @@ uint16_t *dst, ptrdiff_t dst_stride, int w, int h) { if (w >= 16) { - assert(!((intptr_t)dst % 16)); assert(!(dst_stride % 16)); } @@ -191,9 +189,9 @@ src += src_stride; s[1] = _mm_loadu_si128((__m128i *)src); src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); + _mm_storeu_si128((__m128i *)dst, s[0]); dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); + _mm_storeu_si128((__m128i *)dst, s[1]); dst += dst_stride; h -= 2; } while (h);
diff --git a/aom_dsp/x86/aom_convolve_copy_sse2.c b/aom_dsp/x86/aom_convolve_copy_sse2.c index f7b468a..9db91c3 100644 --- a/aom_dsp/x86/aom_convolve_copy_sse2.c +++ b/aom_dsp/x86/aom_convolve_copy_sse2.c
@@ -35,7 +35,6 @@ void aom_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h) { if (w >= 16) { - assert(!((intptr_t)dst % 16)); assert(!(dst_stride % 16)); } @@ -79,9 +78,9 @@ src += src_stride; s[1] = _mm_loadu_si128((__m128i *)src); src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); + _mm_storeu_si128((__m128i *)dst, s[0]); dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); + _mm_storeu_si128((__m128i *)dst, s[1]); dst += dst_stride; h -= 2; } while (h); @@ -200,7 +199,6 @@ uint16_t *dst, ptrdiff_t dst_stride, int w, int h) { if (w >= 16) { - assert(!((intptr_t)dst % 16)); assert(!(dst_stride % 16)); } @@ -236,9 +234,9 @@ src += src_stride; s[1] = _mm_loadu_si128((__m128i *)src); src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); + _mm_storeu_si128((__m128i *)dst, s[0]); dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); + _mm_storeu_si128((__m128i *)dst, s[1]); dst += dst_stride; h -= 2; } while (h); @@ -251,11 +249,11 @@ s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); src += src_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]); + _mm_storeu_si128((__m128i *)(dst + 0 * 8), s[0]); + _mm_storeu_si128((__m128i *)(dst + 1 * 8), s[1]); dst += dst_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[2]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[3]); + _mm_storeu_si128((__m128i *)(dst + 0 * 8), s[2]); + _mm_storeu_si128((__m128i *)(dst + 1 * 8), s[3]); dst += dst_stride; h -= 2; } while (h); @@ -272,15 +270,15 @@ s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 8)); s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 8)); src += src_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]); - _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]); - _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]); + _mm_storeu_si128((__m128i *)(dst + 0 * 8), s[0]); + _mm_storeu_si128((__m128i *)(dst + 1 * 8), s[1]); + _mm_storeu_si128((__m128i *)(dst + 2 * 8), s[2]); + _mm_storeu_si128((__m128i *)(dst + 3 * 8), s[3]); dst += dst_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[4]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[5]); - _mm_store_si128((__m128i *)(dst + 2 * 8), s[6]); - _mm_store_si128((__m128i *)(dst + 3 * 8), s[7]); + _mm_storeu_si128((__m128i *)(dst + 0 * 8), s[4]); + _mm_storeu_si128((__m128i *)(dst + 1 * 8), s[5]); + _mm_storeu_si128((__m128i *)(dst + 2 * 8), s[6]); + _mm_storeu_si128((__m128i *)(dst + 3 * 8), s[7]); dst += dst_stride; h -= 2; } while (h);
diff --git a/aom_dsp/x86/highbd_intrapred_asm_sse2.asm b/aom_dsp/x86/highbd_intrapred_asm_sse2.asm index 91b3d12..a7aa55e 100644 --- a/aom_dsp/x86/highbd_intrapred_asm_sse2.asm +++ b/aom_dsp/x86/highbd_intrapred_asm_sse2.asm
@@ -65,15 +65,15 @@ psrlw m0, 4 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 + movu [dstq ], m0 + movu [dstq+strideq*2 ], m0 + movu [dstq+strideq*4 ], m0 + movu [dstq+stride3q*2], m0 lea dstq, [dstq+strideq*8] - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 + movu [dstq ], m0 + movu [dstq+strideq*2 ], m0 + movu [dstq+strideq*4 ], m0 + movu [dstq+stride3q*2], m0 RESTORE_GOT RET @@ -195,15 +195,15 @@ mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 + movu [dstq ], m0 + movu [dstq+strideq*2 ], m0 + movu [dstq+strideq*4 ], m0 + movu [dstq+stride3q*2], m0 lea dstq, [dstq+strideq*8] - mova [dstq ], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+stride3q*2], m0 + movu [dstq ], m0 + movu [dstq+strideq*2 ], m0 + movu [dstq+strideq*4 ], m0 + movu [dstq+stride3q*2], m0 RET INIT_XMM sse2
diff --git a/aom_dsp/x86/highbd_intrapred_sse2.c b/aom_dsp/x86/highbd_intrapred_sse2.c index 5a55736..e577190 100644 --- a/aom_dsp/x86/highbd_intrapred_sse2.c +++ b/aom_dsp/x86/highbd_intrapred_sse2.c
@@ -54,13 +54,13 @@ const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff); (void)above; (void)bd; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3)); } void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride, @@ -77,21 +77,21 @@ const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff); (void)above; (void)bd; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3)); + _mm_storeu_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row4, row4)); + _mm_storeu_si128((__m128i *)dst, _mm_unpackhi_epi64(row4, row4)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row5, row5)); + _mm_storeu_si128((__m128i *)dst, _mm_unpackhi_epi64(row5, row5)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row6, row6)); + _mm_storeu_si128((__m128i *)dst, _mm_unpackhi_epi64(row6, row6)); dst += stride; - _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row7, row7)); + _mm_storeu_si128((__m128i *)dst, _mm_unpackhi_epi64(row7, row7)); } void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride, @@ -357,7 +357,7 @@ const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo); int i; for (i = 0; i < height; ++i, dst += stride) { - _mm_store_si128((__m128i *)dst, dc_dup); + _mm_storeu_si128((__m128i *)dst, dc_dup); } } @@ -707,10 +707,10 @@ (void)left; (void)bd; const __m128i above_u16 = _mm_load_si128((const __m128i *)above); - _mm_store_si128((__m128i *)dst, above_u16); - _mm_store_si128((__m128i *)(dst + stride), above_u16); - _mm_store_si128((__m128i *)(dst + 2 * stride), above_u16); - _mm_store_si128((__m128i *)(dst + 3 * stride), above_u16); + _mm_storeu_si128((__m128i *)dst, above_u16); + _mm_storeu_si128((__m128i *)(dst + stride), above_u16); + _mm_storeu_si128((__m128i *)(dst + 2 * stride), above_u16); + _mm_storeu_si128((__m128i *)(dst + 3 * stride), above_u16); } void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride, @@ -721,10 +721,10 @@ const __m128i above_u16 = _mm_load_si128((const __m128i *)above); int i; for (i = 0; i < 4; ++i) { - _mm_store_si128((__m128i *)dst, above_u16); - _mm_store_si128((__m128i *)(dst + stride), above_u16); - _mm_store_si128((__m128i *)(dst + 2 * stride), above_u16); - _mm_store_si128((__m128i *)(dst + 3 * stride), above_u16); + _mm_storeu_si128((__m128i *)dst, above_u16); + _mm_storeu_si128((__m128i *)(dst + stride), above_u16); + _mm_storeu_si128((__m128i *)(dst + 2 * stride), above_u16); + _mm_storeu_si128((__m128i *)(dst + 3 * stride), above_u16); dst += stride << 2; } } @@ -848,13 +848,13 @@ sum32 /= 12; const __m128i row = _mm_set1_epi16((uint16_t)sum32); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); } void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride, @@ -873,13 +873,13 @@ const __m128i row = _mm_set1_epi16((uint16_t)sum32); int i; for (i = 0; i < 4; ++i) { - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; } }
diff --git a/aom_dsp/x86/highbd_sad4d_sse2.asm b/aom_dsp/x86/highbd_sad4d_sse2.asm index 7ae1ca1..14ea419 100644 --- a/aom_dsp/x86/highbd_sad4d_sse2.asm +++ b/aom_dsp/x86/highbd_sad4d_sse2.asm
@@ -98,7 +98,7 @@ ; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro HIGH_PROCESS_8x2x4 5-6 0 ; 1st 8 px - mova m0, [srcq +%2*2] + movu m0, [srcq +%2*2] %if %1 == 1 movu m4, [ref1q+%3*2] movu m5, [ref2q+%3*2] @@ -156,7 +156,7 @@ %endif ; 2nd 8 px - mova m0, [srcq +(%4)*2] + movu m0, [srcq +(%4)*2] mova m3, m0 movu m2, [ref1q+(%5)*2] psubusw m3, m2
diff --git a/aom_dsp/x86/highbd_sad_sse2.asm b/aom_dsp/x86/highbd_sad_sse2.asm index 58f1ac9..a2510d5 100644 --- a/aom_dsp/x86/highbd_sad_sse2.asm +++ b/aom_dsp/x86/highbd_sad_sse2.asm
@@ -20,20 +20,21 @@ ; Arg 2: Height ; Arg 3: Number of general purpose registers: 5 for 32-bit build, 6 for 64-bit ; Arg 4: Type of function: if 0, normal sad; if 1, avg; if 2, skip rows -%macro HIGH_SAD_FN 4 +; Arg 5: Number of xmm registers. 8xh needs 8, others only need 7 +%macro HIGH_SAD_FN 4-5 7 %if %4 == 0 %if %3 == 5 -cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, n_rows +cglobal highbd_sad%1x%2, 4, %3, %5, src, src_stride, ref, ref_stride, n_rows %else ; %3 == 7 -cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \ +cglobal highbd_sad%1x%2, 4, %3, %5, src, src_stride, ref, ref_stride, \ src_stride3, ref_stride3, n_rows %endif ; %3 == 5/7 %elif %4 == 1 ; avg %if %3 == 5 -cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \ +cglobal highbd_sad%1x%2_avg, 5, 1 + %3, %5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \ +cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, %5, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 @@ -356,7 +357,7 @@ ; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD8XN 1-2 0 - HIGH_SAD_FN 8, %1, 7, %2 + HIGH_SAD_FN 8, %1, 7, %2, 8 %if %2 == 2 ; skip rows, so divide number of rows by 2 mov n_rowsd, %1/8 %else @@ -377,22 +378,30 @@ pavgw m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif - mova m5, [srcq] - psubusw m5, m1 - psubusw m1, [srcq] + mova m7, m1 + movu m5, [srcq] + psubusw m1, m5 + psubusw m5, m7 por m1, m5 - mova m5, [srcq+src_strideq*2] - psubusw m5, m2 - psubusw m2, [srcq+src_strideq*2] + + mova m7, m2 + movu m5, [srcq+src_strideq*2] + psubusw m2, m5 + psubusw m5, m7 por m2, m5 - mova m5, [srcq+src_strideq*4] - psubusw m5, m3 - psubusw m3, [srcq+src_strideq*4] + + mova m7, m3 + movu m5, [srcq+src_strideq*4] + psubusw m3, m5 + psubusw m5, m7 por m3, m5 - mova m5, [srcq+src_stride3q*2] - psubusw m5, m4 - psubusw m4, [srcq+src_stride3q*2] + + mova m7, m4 + movu m5, [srcq+src_stride3q*2] + psubusw m4, m5 + psubusw m5, m7 por m4, m5 + paddw m1, m2 paddw m3, m4 movhlps m2, m1
diff --git a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm index 5c78933..c0b39ce 100644 --- a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm +++ b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -338,8 +338,8 @@ movu m1, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*4] mova m4, m1 - mova m2, [dstq] - mova m3, [dstq+dst_strideq*2] + movu m2, [dstq] + movu m3, [dstq+dst_strideq*2] pmullw m1, filter_y_a pmullw m5, filter_y_b paddw m1, filter_rnd @@ -404,8 +404,8 @@ movu m1, [srcq + src_strideq*2] movu m4, [srcq + 2] movu m5, [srcq + src_strideq*2 + 2] - mova m2, [dstq] - mova m3, [dstq + dst_strideq*2] + movu m2, [dstq] + movu m3, [dstq + dst_strideq*2] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg @@ -476,8 +476,8 @@ pavgw m3, m5 pavgw m0, m2 pavgw m2, m3 - mova m4, [dstq] - mova m5, [dstq + dst_strideq*2] + movu m4, [dstq] + movu m5, [dstq + dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] add secq, sec_str @@ -591,9 +591,9 @@ paddw m0, filter_rnd psrlw m4, 4 paddw m0, m2 - mova m2, [dstq] + movu m2, [dstq] psrlw m0, 4 - mova m3, [dstq+dst_strideq*2] + movu m3, [dstq+dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] add secq, sec_str @@ -682,8 +682,8 @@ movu m1, [srcq+src_strideq*2] movu m2, [srcq+2] movu m3, [srcq+src_strideq*2+2] - mova m4, [dstq] - mova m5, [dstq+dst_strideq*2] + movu m4, [dstq] + movu m5, [dstq+dst_strideq*2] pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd @@ -817,8 +817,8 @@ paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 - mova m4, [dstq] - mova m5, [dstq+dst_strideq*2] + movu m4, [dstq] + movu m5, [dstq+dst_strideq*2] psrlw m2, 4 psrlw m3, 4 pavgw m0, m2 @@ -986,11 +986,11 @@ pmullw m3, filter_y_b paddw m0, m2 paddw m4, filter_rnd - mova m2, [dstq] + movu m2, [dstq] paddw m4, m3 psrlw m0, 4 psrlw m4, 4 - mova m3, [dstq+dst_strideq*2] + movu m3, [dstq+dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] add secq, sec_str
diff --git a/aom_dsp/x86/intrapred_asm_sse2.asm b/aom_dsp/x86/intrapred_asm_sse2.asm index 0eb6323..e9182b1 100644 --- a/aom_dsp/x86/intrapred_asm_sse2.asm +++ b/aom_dsp/x86/intrapred_asm_sse2.asm
@@ -226,10 +226,10 @@ punpcklqdq m0, m0 packuswb m0, m0 .loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 + movu [dstq ], m0 + movu [dstq+strideq ], m0 + movu [dstq+strideq*2], m0 + movu [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop @@ -285,10 +285,10 @@ punpcklqdq m0, m0 packuswb m0, m0 .loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 + movu [dstq ], m0 + movu [dstq+strideq ], m0 + movu [dstq+strideq*2], m0 + movu [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop @@ -486,10 +486,10 @@ lea stride3q, [strideq*3] mov nlines4d, 4 .loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 + movu [dstq ], m0 + movu [dstq+strideq ], m0 + movu [dstq+strideq*2], m0 + movu [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec nlines4d jnz .loop @@ -567,12 +567,12 @@ punpcklbw m0, m0 ; l1 to l4 each repeated 4 times pshufd m1, m0, 0x0 ; l1 repeated 16 times pshufd m2, m0, 0x55 ; l2 repeated 16 times - mova [dstq ], m1 - mova [dstq+strideq ], m2 + movu [dstq ], m1 + movu [dstq+strideq ], m2 pshufd m1, m0, 0xaa pshufd m2, m0, 0xff - mova [dstq+strideq*2], m1 - mova [dstq+stride3q ], m2 + movu [dstq+strideq*2], m1 + movu [dstq+stride3q ], m2 inc lineq lea leftq, [leftq+4 ] lea dstq, [dstq+strideq*4]
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c index 23c5b2b..a7daefc 100644 --- a/aom_dsp/x86/intrapred_avx2.c +++ b/aom_dsp/x86/intrapred_avx2.c
@@ -763,7 +763,7 @@ const __m256i l16 = _mm256_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm256_add_epi16(rep, one); } @@ -787,7 +787,7 @@ const __m256i l16 = _mm256_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm256_add_epi16(rep, one); } @@ -806,7 +806,7 @@ const __m256i l16 = _mm256_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm256_add_epi16(rep, one); } @@ -817,7 +817,7 @@ const __m256i l16 = _mm256_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm256_add_epi16(rep, one); }
diff --git a/aom_dsp/x86/intrapred_sse2.c b/aom_dsp/x86/intrapred_sse2.c index 5afef68..39a1d78 100644 --- a/aom_dsp/x86/intrapred_sse2.c +++ b/aom_dsp/x86/intrapred_sse2.c
@@ -36,7 +36,7 @@ ptrdiff_t stride) { int i; for (i = 0; i < height; ++i) { - _mm_store_si128((__m128i *)dst, *row); + _mm_storeu_si128((__m128i *)dst, *row); dst += stride; } } @@ -1171,7 +1171,7 @@ ptrdiff_t stride) { int i; for (i = 0; i < h; ++i) { - _mm_store_si128((__m128i *)dst, row[i]); + _mm_storeu_si128((__m128i *)dst, row[i]); dst += stride; } }
diff --git a/aom_dsp/x86/intrapred_ssse3.c b/aom_dsp/x86/intrapred_ssse3.c index 5a34ea0..1094fdc 100644 --- a/aom_dsp/x86/intrapred_ssse3.c +++ b/aom_dsp/x86/intrapred_ssse3.c
@@ -212,7 +212,7 @@ const __m128i l16 = _mm_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm_add_epi16(rep, one); } @@ -234,7 +234,7 @@ const __m128i l16 = _mm_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm_add_epi16(rep, one); } @@ -257,7 +257,7 @@ const __m128i l16 = _mm_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm_add_epi16(rep, one); } @@ -281,7 +281,7 @@ l16 = _mm_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm_add_epi16(rep, one); } @@ -292,7 +292,7 @@ l16 = _mm_shuffle_epi8(l, rep); const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16); - _mm_store_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)dst, row); dst += stride; rep = _mm_add_epi16(rep, one); }
diff --git a/aom_dsp/x86/sad4d_sse2.asm b/aom_dsp/x86/sad4d_sse2.asm index b5a17c5..82b5125 100644 --- a/aom_dsp/x86/sad4d_sse2.asm +++ b/aom_dsp/x86/sad4d_sse2.asm
@@ -210,7 +210,7 @@ %macro PROCESS_16x2x4 9 ; 1st 16 px HANDLE_FIRST_OFFSET %7, %2 - mova m0, [srcq + first_offset] + movu m0, [srcq + first_offset] HANDLE_SECOND_OFFSET %7, %8, %4 %if %1 == 1 movu m4, [ref1q+%3] @@ -256,7 +256,7 @@ %endif ; %1 == 1 ; 2nd 16 px - mova m0, [srcq + second_offset] + movu m0, [srcq + second_offset] movu m1, [ref1q+%5] movu m2, [ref2q+%5]
diff --git a/aom_dsp/x86/sad_sse2.asm b/aom_dsp/x86/sad_sse2.asm index de9845a..86cf70a 100644 --- a/aom_dsp/x86/sad_sse2.asm +++ b/aom_dsp/x86/sad_sse2.asm
@@ -270,25 +270,33 @@ pxor m0, m0 .loop: +; Handle the first two rows movu m1, [refq] movu m2, [refq+ref_strideq] - movu m3, [refq+ref_strideq*2] - movu m4, [refq+ref_stride3q] + movu m3, [srcq] + movu m4, [srcq+src_strideq] %if %2 == 1 pavgb m1, [second_predq+mmsize*0] pavgb m2, [second_predq+mmsize*1] +%endif + psadbw m1, m3 + psadbw m2, m4 +; Then the next two rows + movu m3, [refq+ref_strideq*2] + paddd m2, m1 + movu m4, [refq+ref_stride3q] + paddd m0, m2 + movu m1, [srcq+src_strideq*2] + movu m2, [srcq+src_stride3q] +%if %2 == 1 pavgb m3, [second_predq+mmsize*2] pavgb m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif - psadbw m1, [srcq] - psadbw m2, [srcq+src_strideq] - psadbw m3, [srcq+src_strideq*2] - psadbw m4, [srcq+src_stride3q] - paddd m1, m2 + psadbw m3, m1 + psadbw m4, m2 paddd m3, m4 lea refq, [refq+ref_strideq*4] - paddd m0, m1 lea srcq, [srcq+src_strideq*4] paddd m0, m3 dec n_rowsd
diff --git a/aom_dsp/x86/subpel_variance_sse2.asm b/aom_dsp/x86/subpel_variance_sse2.asm index cbf2890..ce3592e 100644 --- a/aom_dsp/x86/subpel_variance_sse2.asm +++ b/aom_dsp/x86/subpel_variance_sse2.asm
@@ -210,7 +210,7 @@ .x_zero_y_zero_loop: %if %1 == 16 movu m0, [srcq] - mova m1, [dstq] + movu m1, [dstq] %if %2 == 1 ; avg pavgb m0, [secq] punpckhbw m3, m1, m5 @@ -286,7 +286,7 @@ %if %1 == 16 movu m0, [srcq] movu m4, [srcq+src_strideq] - mova m1, [dstq] + movu m1, [dstq] pavgb m0, m4 punpckhbw m3, m1, m5 %if %2 == 1 ; avg @@ -389,7 +389,7 @@ %if %1 == 16 movu m0, [srcq] movu m4, [srcq+src_strideq] - mova m1, [dstq] + movu m1, [dstq] %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 @@ -507,7 +507,7 @@ %if %1 == 16 movu m0, [srcq] movu m4, [srcq+1] - mova m1, [dstq] + movu m1, [dstq] pavgb m0, m4 punpckhbw m3, m1, m5 %if %2 == 1 ; avg @@ -586,7 +586,7 @@ .x_half_y_half_loop: movu m4, [srcq] movu m3, [srcq+1] - mova m1, [dstq] + movu m1, [dstq] pavgb m4, m3 punpckhbw m3, m1, m5 pavgb m0, m4 @@ -716,7 +716,7 @@ .x_half_y_other_loop: movu m4, [srcq] movu m2, [srcq+1] - mova m1, [dstq] + movu m1, [dstq] pavgb m4, m2 %if cpuflag(ssse3) punpckhbw m2, m0, m4 @@ -870,7 +870,7 @@ %if %1 == 16 movu m0, [srcq] movu m4, [srcq+1] - mova m1, [dstq] + movu m1, [dstq] %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 @@ -1040,7 +1040,7 @@ movu m4, [srcq] movu m3, [srcq+1] %if cpuflag(ssse3) - mova m1, [dstq] + movu m1, [dstq] punpckhbw m2, m4, m3 punpcklbw m4, m3 pmaddubsw m2, filter_x_a @@ -1066,7 +1066,7 @@ paddw m2, filter_rnd paddw m4, m3 paddw m2, m1 - mova m1, [dstq] + movu m1, [dstq] psraw m4, 4 psraw m2, 4 punpckhbw m3, m1, m5 @@ -1257,7 +1257,7 @@ %if cpuflag(ssse3) movu m4, [srcq] movu m3, [srcq+1] - mova m1, [dstq] + movu m1, [dstq] punpckhbw m2, m4, m3 punpcklbw m4, m3 pmaddubsw m2, filter_x_a @@ -1303,7 +1303,7 @@ pmullw m0, filter_y_a pmullw m3, filter_y_b paddw m2, m1 - mova m1, [dstq] + movu m1, [dstq] paddw m0, filter_rnd psraw m2, 4 paddw m0, m3
diff --git a/aom_dsp/x86/subtract_sse2.asm b/aom_dsp/x86/subtract_sse2.asm index af38022..a3fd3d5 100644 --- a/aom_dsp/x86/subtract_sse2.asm +++ b/aom_dsp/x86/subtract_sse2.asm
@@ -38,10 +38,10 @@ je .case_64 %macro loop16 6 - mova m0, [srcq+%1] - mova m4, [srcq+%2] - mova m1, [predq+%3] - mova m5, [predq+%4] + movu m0, [srcq+%1] + movu m4, [srcq+%2] + movu m1, [predq+%3] + movu m5, [predq+%4] punpckhbw m2, m0, m7 punpckhbw m3, m1, m7 punpcklbw m0, m7
diff --git a/apps/aomenc.c b/apps/aomenc.c index 88ba906..580ebe0 100644 --- a/apps/aomenc.c +++ b/apps/aomenc.c
@@ -581,7 +581,11 @@ config->enable_rect_partitions = 1; config->enable_1to4_partitions = 1; config->disable_ml_transform_speed_features = 0; +#if CONFIG_EXT_RECUR_PARTITIONS + config->disable_ml_partition_speed_features = 1; +#else config->disable_ml_partition_speed_features = 0; +#endif // CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_SDP config->enable_sdp = 1; #endif
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c index 6400e19..dabd2e5 100644 --- a/av1/av1_cx_iface.c +++ b/av1/av1_cx_iface.c
@@ -386,11 +386,15 @@ 0, // film_grain_table_filename 0, // motion_vector_unit_test 1, // CDF update mode - 0, // disable ML based partition speed up features - 1, // enable rectangular partitions - 1, // enable ab shape partitions - 1, // enable 1:4 and 4:1 partitions - 0, // disable ml based transform speed features +#if CONFIG_EXT_RECUR_PARTITIONS + 1, // disable ML based partition speed up features +#else + 0, // disable ML based partition speed up features +#endif + 1, // enable rectangular partitions + 1, // enable ab shape partitions + 1, // enable 1:4 and 4:1 partitions + 0, // disable ml based transform speed features #if CONFIG_SDP 1, // enable semi-decoupled partitioning #endif // CONFIG_SDP @@ -1364,6 +1368,10 @@ #if CONFIG_SDP part_cfg->enable_sdp = extra_cfg->enable_sdp; #endif +#if CONFIG_EXT_RECUR_PARTITIONS + part_cfg->disable_ml_partition_speed_features = + extra_cfg->disable_ml_partition_speed_features; +#endif // CONFIG_EXT_RECUR_PARTITIONS part_cfg->min_partition_size = extra_cfg->min_partition_size; part_cfg->max_partition_size = extra_cfg->max_partition_size; @@ -3976,7 +3984,13 @@ 0, // use_fixed_qp_offsets { -1, -1, -1, -1, -1, -1 }, // fixed_qp_offsets { - 0, 128, 128, 4, 1, 1, 1, 0, 0, + 0, 128, 128, 4, 1, 1, 1, +#if CONFIG_EXT_RECUR_PARTITIONS + 1, +#else // CONFIG_EXT_RECUR_PARTITIONS + 0, +#endif // CONFIG_EXT_RECUR_PARTITIONS + 0, #if CONFIG_SDP 1, #endif // CONFIG_SDP
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c index d11f8f7..f588e7d 100644 --- a/av1/common/alloccommon.c +++ b/av1/common/alloccommon.c
@@ -174,8 +174,24 @@ above_contexts->num_planes = 0; } +static void free_sbi(CommonSBInfoParams *sbi_params) { + for (int i = 0; i < sbi_params->sbi_alloc_size; ++i) { +#if CONFIG_SDP + av1_free_ptree_recursive(sbi_params->sbi_grid_base[i].ptree_root[0]); + av1_free_ptree_recursive(sbi_params->sbi_grid_base[i].ptree_root[1]); +#else + av1_free_ptree_recursive(sbi_params->sbi_grid_base[i].ptree_root); +#endif // CONFIG_SDP + } + + aom_free(sbi_params->sbi_grid_base); + sbi_params->sbi_grid_base = NULL; + sbi_params->sbi_alloc_size = 0; +} + void av1_free_context_buffers(AV1_COMMON *cm) { cm->mi_params.free_mi(&cm->mi_params); + free_sbi(&cm->sbi_params); av1_free_above_context_buffers(&cm->above_contexts); @@ -274,10 +290,49 @@ return 0; } +static void set_sb_si(AV1_COMMON *cm) { + CommonSBInfoParams *const sbi_params = &cm->sbi_params; + const int mib_size_log2 = cm->seq_params.mib_size_log2; + sbi_params->sb_cols = + ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, mib_size_log2) >> mib_size_log2; + sbi_params->sb_rows = + ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, mib_size_log2) >> mib_size_log2; + sbi_params->sbi_stride = cm->mi_params.mi_stride >> mib_size_log2; +} + +static int alloc_sbi(CommonSBInfoParams *sbi_params) { + const int sbi_size = + sbi_params->sbi_stride * calc_mi_size(sbi_params->sb_rows); + + if (sbi_params->sbi_alloc_size < sbi_size) { + free_sbi(sbi_params); + sbi_params->sbi_grid_base = aom_calloc(sbi_size, sizeof(SB_INFO)); + + if (!sbi_params->sbi_grid_base) return 1; + + sbi_params->sbi_alloc_size = sbi_size; + for (int i = 0; i < sbi_size; ++i) { +#if CONFIG_SDP + sbi_params->sbi_grid_base[i].ptree_root[0] = NULL; + sbi_params->sbi_grid_base[i].ptree_root[1] = NULL; +#else + sbi_params->sbi_grid_base[i].ptree_root = NULL; +#endif + } + } + + return 0; +} + int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) { CommonModeInfoParams *const mi_params = &cm->mi_params; mi_params->set_mb_mi(mi_params, width, height); if (alloc_mi(mi_params)) goto fail; + + CommonSBInfoParams *const sbi_params = &cm->sbi_params; + set_sb_si(cm); + if (alloc_sbi(sbi_params)) goto fail; + return 0; fail:
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h index 0781160..96364ed 100644 --- a/av1/common/av1_common_int.h +++ b/av1/common/av1_common_int.h
@@ -641,6 +641,33 @@ /**@}*/ }; +typedef struct CommonSBInfoParams CommonSBInfoParams; +/*! + * \brief Params related to SB_INFO arrays and related info. + */ +struct CommonSBInfoParams { + /*! + * Grid of pointers to SB_INFO structs. + */ + SB_INFO *sbi_grid_base; + /*! + * Stride for 'sbi_grid_base'. + */ + int sbi_stride; + /*! + * Number of superblocks in the vertical direction. + */ + int sb_rows; + /*! + * Number of superblocks in the horizontal direction. + */ + int sb_cols; + /*! + * Number of SB_INFO structs that are currently allocated. + */ + int sbi_alloc_size; +}; + typedef struct CommonQuantParams CommonQuantParams; /*! * \brief Parameters related to quantization at the frame level. @@ -962,6 +989,11 @@ */ CommonModeInfoParams mi_params; + /*! + * Params related to SB_INFO arrays and related info. + */ + CommonSBInfoParams sbi_params; + #if CONFIG_ENTROPY_STATS /*! * Context type used by token CDFs, in the range 0 .. (TOKEN_CDF_Q_CTXS - 1). @@ -1382,28 +1414,23 @@ } static INLINE void set_entropy_context(MACROBLOCKD *xd, int mi_row, int mi_col, - const int num_planes) { - int i; - int row_offset = mi_row; - int col_offset = mi_col; + const int num_planes, + const CHROMA_REF_INFO *chr_ref_info) { #if CONFIG_SDP - for (i = (xd->tree_type == CHROMA_PART); i < num_planes; ++i) { + for (int i = (xd->tree_type == CHROMA_PART); i < num_planes; ++i) { #else - for (i = 0; i < num_planes; ++i) { + for (int i = 0; i < num_planes; ++i) { #endif struct macroblockd_plane *const pd = &xd->plane[i]; // Offset the buffer pointer -#if CONFIG_SDP - const BLOCK_SIZE bsize = xd->mi[0]->sb_type[xd->tree_type == CHROMA_PART]; -#else - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; -#endif - if (pd->subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) - row_offset = mi_row - 1; - if (pd->subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) - col_offset = mi_col - 1; - int above_idx = col_offset; - int left_idx = row_offset & MAX_MIB_MASK; + const int row_offset = + i && chr_ref_info ? chr_ref_info->mi_row_chroma_base : mi_row; + const int col_offset = + i && chr_ref_info ? chr_ref_info->mi_col_chroma_base : mi_col; + assert(row_offset >= 0); + assert(col_offset >= 0); + const int above_idx = col_offset; + const int left_idx = row_offset & MAX_MIB_MASK; pd->above_entropy_context = &xd->above_entropy_context[i][above_idx >> pd->subsampling_x]; pd->left_entropy_context = @@ -1417,15 +1444,26 @@ } static INLINE void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, - const int num_planes) { + const int num_planes, + const CHROMA_REF_INFO *chr_ref_info) { int i; #if CONFIG_SDP for (i = (xd->tree_type == CHROMA_PART); i < num_planes; i++) { #else for (i = 0; i < num_planes; i++) { #endif - xd->plane[i].width = (bw * MI_SIZE) >> xd->plane[i].subsampling_x; - xd->plane[i].height = (bh * MI_SIZE) >> xd->plane[i].subsampling_y; + if (chr_ref_info && i > 0) { + const BLOCK_SIZE plane_bsize = chr_ref_info->bsize_base; + assert(plane_bsize < BLOCK_SIZES_ALL); + + xd->plane[i].width = + block_size_wide[plane_bsize] >> xd->plane[i].subsampling_x; + xd->plane[i].height = + block_size_high[plane_bsize] >> xd->plane[i].subsampling_y; + } else { + xd->plane[i].width = (bw * MI_SIZE) >> xd->plane[i].subsampling_x; + xd->plane[i].height = (bh * MI_SIZE) >> xd->plane[i].subsampling_y; + } xd->plane[i].width = AOMMAX(xd->plane[i].width, 4); xd->plane[i].height = AOMMAX(xd->plane[i].height, 4); @@ -1434,7 +1472,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, int mi_row, int bh, int mi_col, int bw, - int mi_rows, int mi_cols) { + int mi_rows, int mi_cols, + const CHROMA_REF_INFO *chr_ref_info) { xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE); xd->mb_to_bottom_edge = GET_MV_SUBPEL((mi_rows - bh - mi_row) * MI_SIZE); xd->mb_to_left_edge = -GET_MV_SUBPEL((mi_col * MI_SIZE)); @@ -1445,17 +1484,9 @@ // Are edges available for intra prediction? xd->up_available = (mi_row > tile->mi_row_start); - - const int ss_x = xd->plane[1].subsampling_x; - const int ss_y = xd->plane[1].subsampling_y; - xd->left_available = (mi_col > tile->mi_col_start); xd->chroma_up_available = xd->up_available; xd->chroma_left_available = xd->left_available; - if (ss_x && bw < mi_size_wide[BLOCK_8X8]) - xd->chroma_left_available = (mi_col - 1) > tile->mi_col_start; - if (ss_y && bh < mi_size_high[BLOCK_8X8]) - xd->chroma_up_available = (mi_row - 1) > tile->mi_row_start; if (xd->up_available) { xd->above_mbmi = xd->mi[-xd->mi_stride]; } else { @@ -1468,28 +1499,38 @@ xd->left_mbmi = NULL; } - const int chroma_ref = ((mi_row & 0x01) || !(bh & 0x01) || !ss_y) && - ((mi_col & 0x01) || !(bw & 0x01) || !ss_x); - xd->is_chroma_ref = chroma_ref; - if (chroma_ref) { - // To help calculate the "above" and "left" chroma blocks, note that the - // current block may cover multiple luma blocks (eg, if partitioned into - // 4x4 luma blocks). - // First, find the top-left-most luma block covered by this chroma block - MB_MODE_INFO **base_mi = - &xd->mi[-(mi_row & ss_y) * xd->mi_stride - (mi_col & ss_x)]; + if (chr_ref_info) { + xd->is_chroma_ref = chr_ref_info->is_chroma_ref; + xd->chroma_left_available = + chr_ref_info->mi_col_chroma_base > tile->mi_col_start; + xd->chroma_up_available = + chr_ref_info->mi_row_chroma_base > tile->mi_row_start; + if (xd->is_chroma_ref) { + // To help calculate the "above" and "left" chroma blocks, note that the + // current block may cover multiple luma blocks (eg, if partitioned into + // 4x4 luma blocks). + // First, find the top-left-most luma block covered by this chroma block + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + const int mi_row_offset = mi_row - chr_ref_info->mi_row_chroma_base; + const int mi_col_offset = mi_col - chr_ref_info->mi_col_chroma_base; + MB_MODE_INFO **base_mi = + &xd->mi[-mi_row_offset * xd->mi_stride - mi_col_offset]; - // Then, we consider the luma region covered by the left or above 4x4 chroma - // prediction. We want to point to the chroma reference block in that - // region, which is the bottom-right-most mi unit. - // This leads to the following offsets: - MB_MODE_INFO *chroma_above_mi = - xd->chroma_up_available ? base_mi[-xd->mi_stride + ss_x] : NULL; - xd->chroma_above_mbmi = chroma_above_mi; + // Then, we consider the luma region covered by the left or above 4x4 + // chroma prediction. We want to point to the chroma reference block in + // that region, which is the bottom-right-most mi unit. This leads to the + // following offsets: + MB_MODE_INFO *chroma_above_mi = + xd->chroma_up_available ? base_mi[-xd->mi_stride + ss_x] : NULL; + xd->chroma_above_mbmi = chroma_above_mi; - MB_MODE_INFO *chroma_left_mi = - xd->chroma_left_available ? base_mi[ss_y * xd->mi_stride - 1] : NULL; - xd->chroma_left_mbmi = chroma_left_mi; + MB_MODE_INFO *chroma_left_mi = + xd->chroma_left_available ? base_mi[ss_y * xd->mi_stride - 1] : NULL; + xd->chroma_left_mbmi = chroma_left_mi; + } + } else { + xd->is_chroma_ref = 1; } xd->height = bh; @@ -1497,9 +1538,16 @@ xd->is_last_vertical_rect = 0; if (xd->width < xd->height) { +#if CONFIG_EXT_RECUR_PARTITIONS + // For PARTITION_VERT_3, it would be (0, 1, 1), because 2nd subpartition has + // ratio 1:2, so not enough top-right pixels are available. + // For other partitions, it would be (0, 1). + if (mi_col & (xd->height - 1)) xd->is_last_vertical_rect = 1; +#else if (!((mi_col + xd->width) & (xd->height - 1))) { xd->is_last_vertical_rect = 1; } +#endif // CONFIG_EXT_RECUR_PARTITIONS } xd->is_first_horizontal_rect = 0; @@ -1531,6 +1579,7 @@ PARTITION_CONTEXT *const left_ctx = xd->left_partition_context + (mi_row & MAX_MIB_MASK); #endif + assert(bsize < BLOCK_SIZES_ALL); const int bw = mi_size_wide[bsize]; const int bh = mi_size_high[bsize]; @@ -1561,10 +1610,14 @@ out[0] = CDF_PROB_TOP; out[0] -= cdf_element_prob(in, PARTITION_HORZ); out[0] -= cdf_element_prob(in, PARTITION_SPLIT); +#if CONFIG_EXT_RECUR_PARTITIONS + if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_HORZ_3); +#else // CONFIG_EXT_RECUR_PARTITIONS out[0] -= cdf_element_prob(in, PARTITION_HORZ_A); out[0] -= cdf_element_prob(in, PARTITION_HORZ_B); out[0] -= cdf_element_prob(in, PARTITION_VERT_A); if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_HORZ_4); +#endif // CONFIG_EXT_RECUR_PARTITIONS out[0] = AOM_ICDF(out[0]); out[1] = AOM_ICDF(CDF_PROB_TOP); } @@ -1576,10 +1629,14 @@ out[0] = CDF_PROB_TOP; out[0] -= cdf_element_prob(in, PARTITION_VERT); out[0] -= cdf_element_prob(in, PARTITION_SPLIT); +#if CONFIG_EXT_RECUR_PARTITIONS + if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_VERT_3); +#else // CONFIG_EXT_RECUR_PARTITIONS out[0] -= cdf_element_prob(in, PARTITION_HORZ_A); out[0] -= cdf_element_prob(in, PARTITION_VERT_A); out[0] -= cdf_element_prob(in, PARTITION_VERT_B); if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_VERT_4); +#endif // CONFIG_EXT_RECUR_PARTITIONS out[0] = AOM_ICDF(out[0]); out[1] = AOM_ICDF(CDF_PROB_TOP); } @@ -1588,9 +1645,13 @@ int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize, PARTITION_TYPE partition) { - if (bsize >= BLOCK_8X8) { + if (is_partition_point(bsize)) { const int hbs = mi_size_wide[bsize] / 2; - BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); +#if CONFIG_EXT_RECUR_PARTITIONS + const int quarter_step = hbs / 2; +#else + const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); +#endif // CONFIG_EXT_RECUR_PARTITIONS switch (partition) { case PARTITION_SPLIT: if (bsize != BLOCK_8X8) break; @@ -1598,10 +1659,28 @@ case PARTITION_NONE: case PARTITION_HORZ: case PARTITION_VERT: - case PARTITION_HORZ_4: - case PARTITION_VERT_4: update_partition_context(xd, mi_row, mi_col, subsize, bsize); break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: { + const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ); + update_partition_context(xd, mi_row, mi_col, subsize, subsize); + update_partition_context(xd, mi_row + quarter_step, mi_col, bsize3, + bsize3); + update_partition_context(xd, mi_row + 3 * quarter_step, mi_col, subsize, + subsize); + break; + } + case PARTITION_VERT_3: { + const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT); + update_partition_context(xd, mi_row, mi_col, subsize, subsize); + update_partition_context(xd, mi_row, mi_col + quarter_step, bsize3, + bsize3); + update_partition_context(xd, mi_row, mi_col + 3 * quarter_step, subsize, + subsize); + break; + } +#else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_HORZ_A: update_partition_context(xd, mi_row, mi_col, bsize2, subsize); update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize); @@ -1618,6 +1697,11 @@ update_partition_context(xd, mi_row, mi_col, subsize, subsize); update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize); break; + case PARTITION_HORZ_4: + case PARTITION_VERT_4: + update_partition_context(xd, mi_row, mi_col, subsize, bsize); + break; +#endif // CONFIG_EXT_RECUR_PARTITIONS default: assert(0 && "Invalid partition type"); } } @@ -1636,27 +1720,68 @@ const PARTITION_CONTEXT *left_ctx = xd->left_partition_context + (mi_row & MAX_MIB_MASK); #endif - // Minimum partition point is 8x8. Offset the bsl accordingly. - const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8]; - int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1; +#if CONFIG_EXT_RECUR_PARTITIONS + if (is_square_block(bsize)) { +#endif // CONFIG_EXT_RECUR_PARTITIONS + // Minimum partition point is 8x8. Offset the bsl accordingly. + const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8]; + int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1; - assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]); - assert(bsl >= 0); + assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]); + assert(bsl >= 0); - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; + return (left * 2 + above) + bsl * PARTITION_PLOFFSET; +#if CONFIG_EXT_RECUR_PARTITIONS + } else { + const int bsl_w = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8]; + const int bsl_h = mi_size_high_log2[bsize] - mi_size_high_log2[BLOCK_8X8]; + + const int above = (*above_ctx >> AOMMAX(bsl_w, 0)) & 1; + const int left = (*left_ctx >> AOMMAX(bsl_h, 0)) & 1; + + return (left * 2 + above) + + AOMMIN(bsl_w + 1, bsl_h + 1) * PARTITION_PLOFFSET; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS } // Return the number of elements in the partition CDF when // partitioning the (square) block with luma block size of bsize. static INLINE int partition_cdf_length(BLOCK_SIZE bsize) { +#if CONFIG_EXT_RECUR_PARTITIONS + if (bsize <= BLOCK_8X8 || bsize == BLOCK_128X128) return PARTITION_TYPES; +#else // CONFIG_EXT_RECUR_PARTITIONS if (bsize <= BLOCK_8X8) return PARTITION_TYPES; else if (bsize == BLOCK_128X128) return EXT_PARTITION_TYPES - 2; +#endif // CONFIG_EXT_RECUR_PARTITIONS else return EXT_PARTITION_TYPES; } +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE int partition_rec_cdf_length(BLOCK_SIZE bsize) { + assert(block_size_wide[bsize] != block_size_high[bsize]); + + switch (bsize) { + case BLOCK_4X8: + case BLOCK_8X4: return (PARTITION_LONG_SIDE_2_REC + 1); + case BLOCK_64X128: + case BLOCK_128X64: return (PARTITION_LONG_SIDE_3_REC + 1); + case BLOCK_8X16: + case BLOCK_16X8: + case BLOCK_16X32: + case BLOCK_32X16: + case BLOCK_32X64: + case BLOCK_64X32: return PARTITION_TYPES_REC; + default: + assert(0 && "Invalid splittable rectangular bsize"); + return PARTITION_INVALID_REC; + } +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + static INLINE int max_block_wide(const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane) { assert(bsize < BLOCK_SIZES_ALL); @@ -2130,30 +2255,52 @@ // Smaller height but same width. Is PARTITION_HORZ_4, PARTITION_HORZ or // PARTITION_HORZ_B. To distinguish the latter two, check if the lower // half was split. - if (sshigh * 4 == bhigh) return PARTITION_HORZ_4; + if (sshigh * 4 == bhigh) { +#if CONFIG_EXT_RECUR_PARTITIONS + return PARTITION_HORZ_3; +#else // CONFIG_EXT_RECUR_PARTITIONS + return PARTITION_HORZ_4; +#endif // CONFIG_EXT_RECUR_PARTITIONS + } +#if !CONFIG_EXT_RECUR_PARTITIONS assert(sshigh * 2 == bhigh); +#endif // !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_SDP if (mbmi_below->sb_type[plane_type] == subsize) #else if (mbmi_below->sb_type == subsize) -#endif +#endif // CONFIG_SDP return PARTITION_HORZ; + +#if !CONFIG_EXT_RECUR_PARTITIONS else return PARTITION_HORZ_B; +#endif // !CONFIG_EXT_RECUR_PARTITIONS } else if (sshigh == bhigh) { // Smaller width but same height. Is PARTITION_VERT_4, PARTITION_VERT or // PARTITION_VERT_B. To distinguish the latter two, check if the right // half was split. - if (sswide * 4 == bwide) return PARTITION_VERT_4; + if (sswide * 4 == bwide) { +#if CONFIG_EXT_RECUR_PARTITIONS + return PARTITION_VERT_3; +#else // CONFIG_EXT_RECUR_PARTITIONS + return PARTITION_VERT_4; +#endif // CONFIG_EXT_RECUR_PARTITIONS + } +#if !CONFIG_EXT_RECUR_PARTITIONS assert(sswide * 2 == bhigh); +#endif // !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_SDP if (mbmi_right->sb_type[plane_type] == subsize) #else if (mbmi_right->sb_type == subsize) #endif return PARTITION_VERT; + +#if !CONFIG_EXT_RECUR_PARTITIONS else return PARTITION_VERT_B; +#endif // !CONFIG_EXT_RECUR_PARTITIONS } else { // Smaller width and smaller height. Might be PARTITION_SPLIT or could be // PARTITION_HORZ_A or PARTITION_VERT_A. If subsize isn't halved in both @@ -2162,6 +2309,18 @@ // PARTITION_VERT_A, the right block will have height bhigh; with // PARTITION_HORZ_A, the lower block with have width bwide. Otherwise // it's PARTITION_SPLIT. +#if CONFIG_EXT_RECUR_PARTITIONS + if (sswide * 2 != bwide || sshigh * 2 != bhigh) { +#if CONFIG_SDP + if (mi_size_wide[mbmi_below->sb_type[plane_type]] < bwide && + mi_size_high[mbmi_right->sb_type[plane_type]] < bhigh) +#else + if (mi_size_wide[mbmi_below->sb_type] < bwide && + mi_size_high[mbmi_right->sb_type] < bhigh) +#endif // CONFIG_SDP + return PARTITION_SPLIT; + } +#else // CONFIG_EXT_RECUR_PARTITIONS if (sswide * 2 != bwide || sshigh * 2 != bhigh) return PARTITION_SPLIT; #if CONFIG_SDP if (mi_size_wide[mbmi_below->sb_type[plane_type]] == bwide) @@ -2172,6 +2331,7 @@ if (mi_size_wide[mbmi_below->sb_type] == bwide) return PARTITION_HORZ_A; if (mi_size_high[mbmi_right->sb_type] == bhigh) return PARTITION_VERT_A; #endif +#endif // CONFIG_EXT_RECUR_PARTITIONS return PARTITION_SPLIT; } } @@ -2194,6 +2354,22 @@ seq_params->mib_size_log2 = mi_size_wide_log2[seq_params->sb_size]; } +static INLINE SB_INFO *av1_get_sb_info(const AV1_COMMON *cm, int mi_row, + int mi_col) { + const int sb_row = mi_row >> cm->seq_params.mib_size_log2; + const int sb_col = mi_col >> cm->seq_params.mib_size_log2; + return cm->sbi_params.sbi_grid_base + sb_row * cm->sbi_params.sbi_stride + + sb_col; +} + +static INLINE void av1_set_sb_info(AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, + int mi_col) { + xd->sbi = av1_get_sb_info(cm, mi_row, mi_col); + + xd->sbi->mi_row = mi_row; + xd->sbi->mi_col = mi_col; +} + // Returns true if the frame is fully lossless at the coded resolution. // Note: If super-resolution is used, such a frame will still NOT be lossless at // the upscaled resolution.
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c index 6bbb854..5be219d 100644 --- a/av1/common/av1_loopfilter.c +++ b/av1/common/av1_loopfilter.c
@@ -196,29 +196,36 @@ } static TX_SIZE get_transform_size(const MACROBLOCKD *const xd, -#if CONFIG_SDP - const AV1_COMMON *const cm, -#endif const MB_MODE_INFO *const mbmi, const EDGE_DIR edge_dir, const int mi_row, const int mi_col, const int plane, +#if CONFIG_SDP + const TREE_TYPE tree_type, +#endif const struct macroblockd_plane *plane_ptr) { assert(mbmi != NULL); if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4; #if CONFIG_SDP - const int plane_type = - (frame_is_intra_only(cm) && plane > 0 && cm->seq_params.enable_sdp); + const int plane_type = av1_get_sdp_idx(tree_type); #endif - TX_SIZE tx_size = (plane == AOM_PLANE_Y) - ? mbmi->tx_size -#if CONFIG_SDP - : av1_get_max_uv_txsize(mbmi->sb_type[plane_type], - plane_ptr->subsampling_x, - plane_ptr->subsampling_y); +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + const BLOCK_SIZE bsize_base = + get_bsize_base_from_tree_type(mbmi, tree_type, plane); +#endif // CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + + TX_SIZE tx_size = + (plane == AOM_PLANE_Y) + ? mbmi->tx_size +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + : av1_get_max_uv_txsize(bsize_base, plane_ptr->subsampling_x, + plane_ptr->subsampling_y); +#elif CONFIG_SDP + : av1_get_max_uv_txsize(mbmi->sb_type[plane_type], + plane_ptr->subsampling_x, + plane_ptr->subsampling_y); #else - : av1_get_max_uv_txsize(mbmi->sb_type, - plane_ptr->subsampling_x, - plane_ptr->subsampling_y); + : av1_get_max_uv_txsize(mbmi->sb_type, plane_ptr->subsampling_x, + plane_ptr->subsampling_y); #endif assert(tx_size < TX_SIZES_ALL); #if CONFIG_SDP @@ -266,9 +273,15 @@ params->filter_length = 0; #if CONFIG_SDP - const int plane_type = - (frame_is_intra_only(cm) && plane > 0 && cm->seq_params.enable_sdp); -#endif + TREE_TYPE tree_type = SHARED_PART; + const bool is_sdp_eligible = frame_is_intra_only(cm) && + !cm->seq_params.monochrome && + cm->seq_params.enable_sdp; + if (is_sdp_eligible) { + tree_type = (plane == AOM_PLANE_Y) ? LUMA_PART : CHROMA_PART; + } + const int plane_type = is_sdp_eligible && plane > 0; +#endif // CONFIG_SDP // no deblocking is required const uint32_t width = plane_ptr->dst.width; @@ -296,7 +309,7 @@ const TX_SIZE ts = #if CONFIG_SDP - get_transform_size(xd, cm, mi[0], edge_dir, mi_row, mi_col, plane, + get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, tree_type, plane_ptr); #else get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, plane_ptr); @@ -316,7 +329,7 @@ av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi); #if CONFIG_SDP const int curr_skipped = - mbmi->skip_txfm[plane_type] && is_inter_block(mbmi, xd->tree_type); + mbmi->skip_txfm[plane_type] && is_inter_block(mbmi, tree_type); #else const int curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi); #endif @@ -331,7 +344,8 @@ (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col); const TX_SIZE pv_ts = get_transform_size( #if CONFIG_SDP - xd, cm, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr); + xd, mi_prev, edge_dir, pv_row, pv_col, plane, tree_type, + plane_ptr); #else xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr); #endif @@ -342,15 +356,24 @@ const int pv_skip_txfm = #if CONFIG_SDP mi_prev->skip_txfm[plane_type] && - is_inter_block(mi_prev, xd->tree_type); + is_inter_block(mi_prev, tree_type); #else mi_prev->skip_txfm && is_inter_block(mi_prev); #endif #if CONFIG_SDP - const BLOCK_SIZE bsize = get_plane_block_size( - mbmi->sb_type[plane > 0], plane_ptr->subsampling_x, + const BLOCK_SIZE bsize = get_mb_plane_block_size_from_tree_type( + mbmi, tree_type, plane, plane_ptr->subsampling_x, plane_ptr->subsampling_y); -#else +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(bsize == get_plane_block_size(mbmi->sb_type[plane_type], + plane_ptr->subsampling_x, + plane_ptr->subsampling_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#elif CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize = + get_mb_plane_block_size(xd, mbmi, plane, plane_ptr->subsampling_x, + plane_ptr->subsampling_y); +#else // !CONFIG_EXT_RECUR_PARTITIONS && !CONFIG_SDP const BLOCK_SIZE bsize = get_plane_block_size(mbmi->sb_type, plane_ptr->subsampling_x, plane_ptr->subsampling_y); @@ -681,8 +704,7 @@ else if (plane == 2 && !(cm->lf.filter_level_v)) continue; - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0, - plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, 0, 0, plane, plane + 1, NULL); av1_build_bitmask_vert_info(cm, &pd[plane], plane); av1_build_bitmask_horz_info(cm, &pd[plane], plane); @@ -690,19 +712,20 @@ // apply loop filtering which only goes through buffer once for (mi_row = start; mi_row < stop; mi_row += MI_SIZE_64X64) { for (mi_col = col_start; mi_col < col_end; mi_col += MI_SIZE_64X64) { - av1_setup_dst_planes(pd, BLOCK_64X64, frame_buffer, mi_row, mi_col, - plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_bitmask_vert(cm, &pd[plane], plane, mi_row, mi_col); if (mi_col - MI_SIZE_64X64 >= 0) { - av1_setup_dst_planes(pd, BLOCK_64X64, frame_buffer, mi_row, - mi_col - MI_SIZE_64X64, plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, + mi_col - MI_SIZE_64X64, plane, plane + 1, + NULL); av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row, mi_col - MI_SIZE_64X64); } } - av1_setup_dst_planes(pd, BLOCK_64X64, frame_buffer, mi_row, - mi_col - MI_SIZE_64X64, plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, mi_col - MI_SIZE_64X64, + plane, plane + 1, NULL); av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row, mi_col - MI_SIZE_64X64); } @@ -724,22 +747,21 @@ for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { // filter vertical edges - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col); // filter horizontal edges if (mi_col - MAX_MIB_SIZE >= 0) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, - mi_row, mi_col - MAX_MIB_SIZE, plane, - plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, + mi_col - MAX_MIB_SIZE, plane, plane + 1, NULL); av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col - MAX_MIB_SIZE); } } // filter horizontal edges - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col - MAX_MIB_SIZE, plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, mi_col - MAX_MIB_SIZE, + plane, plane + 1, NULL); av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col - MAX_MIB_SIZE); } @@ -747,8 +769,8 @@ // filter all vertical edges in every 128x128 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col); } @@ -757,8 +779,8 @@ // filter all horizontal edges in every 128x128 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(pd, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col); }
diff --git a/av1/common/blockd.c b/av1/common/blockd.c index e8612f2..01e745b 100644 --- a/av1/common/blockd.c +++ b/av1/common/blockd.c
@@ -15,6 +15,7 @@ #include "av1/common/av1_common_int.h" #include "av1/common/blockd.h" +#include "av1/common/enums.h" PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi) { if (!left_mi) return DC_PRED; @@ -38,6 +39,113 @@ return above_mi->mode; } +void av1_reset_is_mi_coded_map(MACROBLOCKD *xd, int stride) { + av1_zero(xd->is_mi_coded); + xd->is_mi_coded_stride = stride; +} + +void av1_mark_block_as_coded(MACROBLOCKD *xd, BLOCK_SIZE bsize, + BLOCK_SIZE sb_size) { + const int mi_row = xd->mi_row; + const int mi_col = xd->mi_col; + const int sb_mi_size = mi_size_wide[sb_size]; + const int mi_row_offset = mi_row & (sb_mi_size - 1); + const int mi_col_offset = mi_col & (sb_mi_size - 1); + + for (int r = 0; r < mi_size_high[bsize]; ++r) + for (int c = 0; c < mi_size_wide[bsize]; ++c) { + const int pos = + (mi_row_offset + r) * xd->is_mi_coded_stride + mi_col_offset + c; +#if CONFIG_SDP + switch (xd->tree_type) { + case SHARED_PART: + xd->is_mi_coded[0][pos] = 1; + xd->is_mi_coded[1][pos] = 1; + break; + case LUMA_PART: xd->is_mi_coded[0][pos] = 1; break; + case CHROMA_PART: xd->is_mi_coded[1][pos] = 1; break; + default: assert(0 && "Invalid tree type"); + } +#else + xd->is_mi_coded[pos] = 1; +#endif // CONFIG_SDP + } +} + +void av1_mark_block_as_not_coded(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE sb_size) { + const int sb_mi_size = mi_size_wide[sb_size]; + const int mi_row_offset = mi_row & (sb_mi_size - 1); + const int mi_col_offset = mi_col & (sb_mi_size - 1); + + for (int r = 0; r < mi_size_high[bsize]; ++r) { + const int pos = + (mi_row_offset + r) * xd->is_mi_coded_stride + mi_col_offset; +#if CONFIG_SDP + uint8_t *row_ptr_luma = &xd->is_mi_coded[0][pos]; + uint8_t *row_ptr_chroma = &xd->is_mi_coded[1][pos]; + switch (xd->tree_type) { + case SHARED_PART: + av1_zero_array(row_ptr_luma, mi_size_wide[bsize]); + av1_zero_array(row_ptr_chroma, mi_size_wide[bsize]); + break; + case LUMA_PART: av1_zero_array(row_ptr_luma, mi_size_wide[bsize]); break; + case CHROMA_PART: + av1_zero_array(row_ptr_chroma, mi_size_wide[bsize]); + break; + default: assert(0 && "Invalid tree type"); + } +#else + uint8_t *row_ptr = &xd->is_mi_coded[pos]; + av1_zero_array(row_ptr, mi_size_wide[bsize]); +#endif // CONFIG_SDP + } +} + +PARTITION_TREE *av1_alloc_ptree_node(PARTITION_TREE *parent, int index) { + PARTITION_TREE *ptree = NULL; + struct aom_internal_error_info error; + + AOM_CHECK_MEM_ERROR(&error, ptree, aom_calloc(1, sizeof(*ptree))); + + ptree->parent = parent; + ptree->index = index; + ptree->partition = PARTITION_NONE; + ptree->is_settled = 0; + for (int i = 0; i < 4; ++i) ptree->sub_tree[i] = NULL; + + return ptree; +} + +void av1_free_ptree_recursive(PARTITION_TREE *ptree) { + if (ptree == NULL) return; + + for (int i = 0; i < 4; ++i) { + av1_free_ptree_recursive(ptree->sub_tree[i]); + ptree->sub_tree[i] = NULL; + } + + aom_free(ptree); +} + +void av1_reset_ptree_in_sbi(SB_INFO *sbi +#if CONFIG_SDP + , + TREE_TYPE tree_type +#endif // CONFIG_SDP +) { +#if CONFIG_SDP + const int idx = av1_get_sdp_idx(tree_type); + if (sbi->ptree_root[idx]) av1_free_ptree_recursive(sbi->ptree_root[idx]); + + sbi->ptree_root[idx] = av1_alloc_ptree_node(NULL, 0); +#else + if (sbi->ptree_root) av1_free_ptree_recursive(sbi->ptree_root); + + sbi->ptree_root = av1_alloc_ptree_node(NULL, 0); +#endif // CONFIG_SDP +} + void av1_set_entropy_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, @@ -67,14 +175,42 @@ memset(l, has_eob, sizeof(*l) * txs_high); } } + void av1_reset_entropy_context(MACROBLOCKD *xd, BLOCK_SIZE bsize, const int num_planes) { - assert(bsize < BLOCK_SIZES_ALL); +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + // TODO(chiyotsai): This part is needed to avoid encoder/decoder mismatch. + // Investigate why this is the case. It seems like on the decoder side, the + // decoder is failing to clear the context after encoding a skip_txfm chroma + // block. + const int plane_start = (xd->tree_type == CHROMA_PART); + int plane_end = 0; + switch (xd->tree_type) { + case LUMA_PART: plane_end = 1; break; + case CHROMA_PART: plane_end = num_planes; break; + case SHARED_PART: + plane_end = 1 + (num_planes - 1) * xd->is_chroma_ref; + break; + default: assert(0); + } + for (int i = plane_start; i < plane_end; ++i) { +#else const int nplanes = 1 + (num_planes - 1) * xd->is_chroma_ref; for (int i = 0; i < nplanes; i++) { +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS struct macroblockd_plane *const pd = &xd->plane[i]; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, xd->mi[0], i, pd->subsampling_x, pd->subsampling_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == + get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS + (void)bsize; +#else const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_SDP const int txs_wide = mi_size_wide[plane_bsize]; const int txs_high = mi_size_high[plane_bsize]; memset(pd->above_entropy_context, 0, sizeof(ENTROPY_CONTEXT) * txs_wide);
diff --git a/av1/common/blockd.h b/av1/common/blockd.h index 97be296..63b40e0 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h
@@ -237,6 +237,15 @@ COMPOUND_TYPE type; } INTERINTER_COMPOUND_DATA; +typedef struct CHROMA_REF_INFO { + int is_chroma_ref; + int offset_started; + int mi_row_chroma_base; + int mi_col_chroma_base; + BLOCK_SIZE bsize; + BLOCK_SIZE bsize_base; +} CHROMA_REF_INFO; + #define INTER_TX_SIZE_BUF_LEN 16 #define TXK_TYPE_BUF_LEN 64 /*!\endcond */ @@ -377,6 +386,8 @@ uint8_t use_wedge_interintra : 1; /*! \brief CDEF strength per BLOCK_64X64 */ int8_t cdef_strength : 4; + /*! \brief chroma block info for sub-8x8 cases */ + CHROMA_REF_INFO chroma_ref_info; #if CONFIG_CCSO /*! \brief Whether to use cross-component sample offset for the U plane. */ uint8_t ccso_blk_u : 2; @@ -401,6 +412,38 @@ /*!\cond */ +typedef struct PARTITION_TREE { + struct PARTITION_TREE *parent; + struct PARTITION_TREE *sub_tree[4]; + PARTITION_TYPE partition; + BLOCK_SIZE bsize; + int is_settled; + int mi_row; + int mi_col; + int index; + CHROMA_REF_INFO chroma_ref_info; +} PARTITION_TREE; + +PARTITION_TREE *av1_alloc_ptree_node(PARTITION_TREE *parent, int index); +void av1_free_ptree_recursive(PARTITION_TREE *ptree); + +typedef struct SB_INFO { + int mi_row; + int mi_col; +#if CONFIG_SDP + PARTITION_TREE *ptree_root[2]; +#else + PARTITION_TREE *ptree_root; +#endif // CONFIG_SDP +} SB_INFO; + +void av1_reset_ptree_in_sbi(SB_INFO *sbi +#if CONFIG_SDP + , + TREE_TYPE tree_type +#endif // CONFIG_SDP +); + #if CONFIG_SDP static INLINE int is_intrabc_block(const MB_MODE_INFO *mbmi, int tree_type) { return mbmi->use_intrabc[tree_type == CHROMA_PART]; @@ -444,6 +487,30 @@ } #endif +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE PARTITION_TYPE get_partition_from_symbol_rec_block( + BLOCK_SIZE bsize, PARTITION_TYPE_REC partition_rec) { + if (block_size_wide[bsize] > block_size_high[bsize]) + return partition_map_from_symbol_block_wgth[partition_rec]; + else if (block_size_high[bsize] > block_size_wide[bsize]) + return partition_map_from_symbol_block_hgtw[partition_rec]; + else + return PARTITION_INVALID; +} + +static INLINE PARTITION_TYPE_REC get_symbol_from_partition_rec_block( + BLOCK_SIZE bsize, PARTITION_TYPE partition) { + assert(bsize < BLOCK_SIZES_ALL); + assert(partition < EXT_PARTITION_TYPES); + if (block_size_wide[bsize] > block_size_high[bsize]) + return symbol_map_from_partition_block_wgth[partition]; + else if (block_size_high[bsize] > block_size_wide[bsize]) + return symbol_map_from_partition_block_hgtw[partition]; + else + return PARTITION_INVALID_REC; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { return mbmi->ref_frame[1] > INTRA_FRAME; } @@ -503,6 +570,356 @@ block_size_allowed; } +static INLINE int is_square_block(BLOCK_SIZE bsize) { + return block_size_high[bsize] == block_size_wide[bsize]; +} + +static INLINE int is_partition_point(BLOCK_SIZE bsize) { +#if CONFIG_EXT_RECUR_PARTITIONS + return bsize != BLOCK_4X4 && bsize < BLOCK_SIZES; +#else + return is_square_block(bsize) && bsize >= BLOCK_8X8 && bsize < BLOCK_SIZES; +#endif // CONFIG_EXT_RECUR_PARTITIONS +} + +static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) { + switch (bsize) { + case BLOCK_4X4: return 0; + case BLOCK_8X8: return 1; + case BLOCK_16X16: return 2; + case BLOCK_32X32: return 3; + case BLOCK_64X64: return 4; + case BLOCK_128X128: return 5; + default: return SQR_BLOCK_SIZES; + } +} + +// For a square block size 'bsize', returns the size of the sub-blocks used by +// the given partition type. If the partition produces sub-blocks of different +// sizes, then the function returns the largest sub-block size. +// Implements the Partition_Subsize lookup table in the spec (Section 9.3. +// Conversion tables). +// Note: the input block size should be square. +// Otherwise it's considered invalid. +static INLINE BLOCK_SIZE get_partition_subsize(BLOCK_SIZE bsize, + PARTITION_TYPE partition) { + if (partition == PARTITION_INVALID) { + return BLOCK_INVALID; + } else { +#if CONFIG_EXT_RECUR_PARTITIONS + if (is_partition_point(bsize)) + return subsize_lookup[partition][bsize]; + else + return partition == PARTITION_NONE ? bsize : BLOCK_INVALID; +#else // CONFIG_EXT_RECUR_PARTITIONS + const int sqr_bsize_idx = get_sqr_bsize_idx(bsize); + return sqr_bsize_idx >= SQR_BLOCK_SIZES + ? BLOCK_INVALID + : subsize_lookup[partition][sqr_bsize_idx]; +#endif // CONFIG_EXT_RECUR_PARTITIONS + } +} + +static INLINE int is_partition_valid(BLOCK_SIZE bsize, PARTITION_TYPE p) { +#if CONFIG_EXT_RECUR_PARTITIONS + if (p == PARTITION_SPLIT) return 0; +#endif // CONFIG_EXT_RECUR_PARTITIONS + if (is_partition_point(bsize)) + return get_partition_subsize(bsize, p) < BLOCK_SIZES_ALL; + else + return p == PARTITION_NONE; +} + +static INLINE void initialize_chr_ref_info(int mi_row, int mi_col, + BLOCK_SIZE bsize, + CHROMA_REF_INFO *info) { + info->is_chroma_ref = 1; + info->offset_started = 0; + info->mi_row_chroma_base = mi_row; + info->mi_col_chroma_base = mi_col; + info->bsize = bsize; + info->bsize_base = bsize; +} + +// Decide whether a block needs coding multiple chroma coding blocks in it at +// once to get around sub-4x4 coding. +static INLINE int have_nz_chroma_ref_offset(BLOCK_SIZE bsize, + PARTITION_TYPE partition, + int subsampling_x, + int subsampling_y) { + const int bw = block_size_wide[bsize] >> subsampling_x; + const int bh = block_size_high[bsize] >> subsampling_y; + const int bw_less_than_4 = bw < 4; + const int bh_less_than_4 = bh < 4; + const int hbw_less_than_4 = bw < 8; + const int hbh_less_than_4 = bh < 8; + const int qbw_less_than_4 = bw < 16; + const int qbh_less_than_4 = bh < 16; + switch (partition) { + case PARTITION_NONE: return bw_less_than_4 || bh_less_than_4; + case PARTITION_HORZ: return bw_less_than_4 || hbh_less_than_4; + case PARTITION_VERT: return hbw_less_than_4 || bh_less_than_4; + case PARTITION_SPLIT: return hbw_less_than_4 || hbh_less_than_4; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: return bw_less_than_4 || qbh_less_than_4; + case PARTITION_VERT_3: return qbw_less_than_4 || bh_less_than_4; +#else // CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_A: + case PARTITION_HORZ_B: + case PARTITION_VERT_A: + case PARTITION_VERT_B: return hbw_less_than_4 || hbh_less_than_4; + case PARTITION_HORZ_4: return bw_less_than_4 || qbh_less_than_4; + case PARTITION_VERT_4: return qbw_less_than_4 || bh_less_than_4; +#endif // CONFIG_EXT_RECUR_PARTITIONS + default: + assert(0 && "Invalid partition type!"); + return 0; + break; + } +} + +// Decide whether a subblock is the main chroma reference when its parent block +// needs coding multiple chroma coding blocks at once. The function returns a +// flag indicating whether the mode info used for the combined chroma block is +// located in the subblock. +static INLINE int is_sub_partition_chroma_ref(PARTITION_TYPE partition, + int index, BLOCK_SIZE bsize, + BLOCK_SIZE parent_bsize, int ss_x, + int ss_y, int is_offset_started) { + (void)is_offset_started; + (void)parent_bsize; + const int bw = block_size_wide[bsize]; + const int bh = block_size_high[bsize]; + const int pw = bw >> ss_x; + const int ph = bh >> ss_y; + const int pw_less_than_4 = pw < 4; + const int ph_less_than_4 = ph < 4; + switch (partition) { + case PARTITION_NONE: return 1; + case PARTITION_HORZ: + case PARTITION_VERT: return index == 1; + case PARTITION_SPLIT: + if (is_offset_started) { + return index == 3; + } else { + if (pw_less_than_4 && ph_less_than_4) + return index == 3; + else if (pw_less_than_4) + return index == 1 || index == 3; + else if (ph_less_than_4) + return index == 2 || index == 3; + else + return 1; + } +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_VERT_3: + case PARTITION_HORZ_3: return index == 2; +#else // CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_A: + case PARTITION_HORZ_B: + case PARTITION_VERT_A: + case PARTITION_VERT_B: + if (is_offset_started) { + return index == 2; + } else { + const int smallest_w = block_size_wide[parent_bsize] >> (ss_x + 1); + const int smallest_h = block_size_high[parent_bsize] >> (ss_y + 1); + const int smallest_w_less_than_4 = smallest_w < 4; + const int smallest_h_less_than_4 = smallest_h < 4; + if (smallest_w_less_than_4 && smallest_h_less_than_4) { + return index == 2; + } else if (smallest_w_less_than_4) { + if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) { + return index == 2; + } else if (partition == PARTITION_HORZ_A) { + return index == 1 || index == 2; + } else { + return index == 0 || index == 2; + } + } else if (smallest_h_less_than_4) { + if (partition == PARTITION_HORZ_A || partition == PARTITION_HORZ_B) { + return index == 2; + } else if (partition == PARTITION_VERT_A) { + return index == 1 || index == 2; + } else { + return index == 0 || index == 2; + } + } else { + return 1; + } + } + case PARTITION_HORZ_4: + case PARTITION_VERT_4: + if (is_offset_started) { + return index == 3; + } else { + if ((partition == PARTITION_HORZ_4 && ph_less_than_4) || + (partition == PARTITION_VERT_4 && pw_less_than_4)) { + return index == 1 || index == 3; + } else { + return 1; + } + } +#endif // CONFIG_EXT_RECUR_PARTITIONS + default: + assert(0 && "Invalid partition type!"); + return 0; + break; + } +} + +static INLINE void set_chroma_ref_offset_size( + int mi_row, int mi_col, PARTITION_TYPE partition, BLOCK_SIZE bsize, + BLOCK_SIZE parent_bsize, int ss_x, int ss_y, CHROMA_REF_INFO *info, + const CHROMA_REF_INFO *parent_info) { + const int pw = block_size_wide[bsize] >> ss_x; + const int ph = block_size_high[bsize] >> ss_y; + const int pw_less_than_4 = pw < 4; + const int ph_less_than_4 = ph < 4; +#if !CONFIG_EXT_RECUR_PARTITIONS + const int hppw = block_size_wide[parent_bsize] >> (ss_x + 1); + const int hpph = block_size_high[parent_bsize] >> (ss_y + 1); + const int hppw_less_than_4 = hppw < 4; + const int hpph_less_than_4 = hpph < 4; + const int mi_row_mid_point = + parent_info->mi_row_chroma_base + (mi_size_high[parent_bsize] >> 1); + const int mi_col_mid_point = + parent_info->mi_col_chroma_base + (mi_size_wide[parent_bsize] >> 1); +#endif // !CONFIG_EXT_RECUR_PARTITIONS + assert(parent_info->offset_started == 0); + switch (partition) { + case PARTITION_NONE: + case PARTITION_HORZ: + case PARTITION_VERT: +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_VERT_3: + case PARTITION_HORZ_3: +#endif // CONFIG_EXT_RECUR_PARTITIONS + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + info->bsize_base = parent_bsize; + break; + case PARTITION_SPLIT: + if (pw_less_than_4 && ph_less_than_4) { + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + info->bsize_base = parent_bsize; + } else if (pw_less_than_4) { + info->bsize_base = get_partition_subsize(parent_bsize, PARTITION_HORZ); + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + if (mi_row == parent_info->mi_row_chroma_base) { + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + } else { + info->mi_row_chroma_base = + parent_info->mi_row_chroma_base + mi_size_high[bsize]; + } + } else { + assert(ph_less_than_4); + info->bsize_base = get_partition_subsize(parent_bsize, PARTITION_VERT); + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + if (mi_col == parent_info->mi_col_chroma_base) { + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + } else { + info->mi_col_chroma_base = + parent_info->mi_col_chroma_base + mi_size_wide[bsize]; + } + } + break; +#if !CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_A: + case PARTITION_HORZ_B: + case PARTITION_VERT_A: + case PARTITION_VERT_B: + if ((hppw_less_than_4 && hpph_less_than_4) || + (hppw_less_than_4 && + (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B)) || + (hpph_less_than_4 && + (partition == PARTITION_HORZ_A || partition == PARTITION_HORZ_B))) { + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + info->bsize_base = parent_bsize; + } else if (hppw_less_than_4) { + info->bsize_base = get_partition_subsize(parent_bsize, PARTITION_HORZ); + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + if (mi_row == parent_info->mi_row_chroma_base) { + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + } else { + info->mi_row_chroma_base = parent_info->mi_row_chroma_base + + (mi_size_high[parent_bsize] >> 1); + } + } else { + assert(hpph_less_than_4); + info->bsize_base = get_partition_subsize(parent_bsize, PARTITION_VERT); + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + if (mi_col == parent_info->mi_col_chroma_base) { + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + } else { + info->mi_col_chroma_base = parent_info->mi_col_chroma_base + + (mi_size_wide[parent_bsize] >> 1); + } + } + break; + case PARTITION_HORZ_4: + info->bsize_base = get_partition_subsize(parent_bsize, PARTITION_HORZ); + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + if (mi_row < mi_row_mid_point) { + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + } else { + info->mi_row_chroma_base = mi_row_mid_point; + } + break; + case PARTITION_VERT_4: + info->bsize_base = get_partition_subsize(parent_bsize, PARTITION_VERT); + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + if (mi_col < mi_col_mid_point) { + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + } else { + info->mi_col_chroma_base = mi_col_mid_point; + } + break; +#endif // !CONFIG_EXT_RECUR_PARTITIONS + default: assert(0 && "Invalid partition type!"); break; + } +} + +static INLINE void set_chroma_ref_info(int mi_row, int mi_col, int index, + BLOCK_SIZE bsize, CHROMA_REF_INFO *info, + const CHROMA_REF_INFO *parent_info, + BLOCK_SIZE parent_bsize, + PARTITION_TYPE parent_partition, + int ss_x, int ss_y) { + assert(bsize < BLOCK_SIZES_ALL); + initialize_chr_ref_info(mi_row, mi_col, bsize, info); + if (parent_info == NULL) return; + if (parent_info->is_chroma_ref) { + if (parent_info->offset_started) { + if (is_sub_partition_chroma_ref(parent_partition, index, bsize, + parent_bsize, ss_x, ss_y, 1)) { + info->is_chroma_ref = 1; + } else { + info->is_chroma_ref = 0; + } + info->offset_started = 1; + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + info->bsize_base = parent_info->bsize_base; + } else if (have_nz_chroma_ref_offset(parent_bsize, parent_partition, ss_x, + ss_y)) { + info->offset_started = 1; + info->is_chroma_ref = is_sub_partition_chroma_ref( + parent_partition, index, bsize, parent_bsize, ss_x, ss_y, 0); + set_chroma_ref_offset_size(mi_row, mi_col, parent_partition, bsize, + parent_bsize, ss_x, ss_y, info, parent_info); + } + } else { + info->is_chroma_ref = 0; + info->offset_started = 1; + info->mi_row_chroma_base = parent_info->mi_row_chroma_base; + info->mi_col_chroma_base = parent_info->mi_col_chroma_base; + info->bsize_base = parent_info->bsize_base; + } +} + #if CONFIG_MISMATCH_DEBUG static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col, int mi_row, int tx_blk_col, int tx_blk_row, @@ -745,6 +1162,11 @@ MB_MODE_INFO *chroma_above_mbmi; /*! + * SB_INFO for the superblock that the current coding block is located in + */ + SB_INFO *sbi; + + /*! * Appropriate offset based on current 'mi_row' and 'mi_col', inside * 'tx_type_map' in one of 'CommonModeInfoParams', 'PICK_MODE_CONTEXT' or * 'MACROBLOCK' structs. @@ -772,7 +1194,22 @@ * block shares the same tree or not. */ TREE_TYPE tree_type; -#endif + + /*! + * An array for recording whether an mi(4x4) is coded. Reset at sb level. + */ + // TODO(any): Convert to bit field instead. + uint8_t is_mi_coded[2][MAX_MIB_SQUARE]; +#else + /*! + * An array for recording whether an mi(4x4) is coded. Reset at sb level. + */ + uint8_t is_mi_coded[MAX_MIB_SQUARE]; +#endif // CONFIG_SDP + /*! + * Stride of the is_mi_coded array. + */ + int is_mi_coded_stride; /*! * Scale factors for reference frames of the current block. @@ -1061,37 +1498,6 @@ : buf16; } -static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) { - switch (bsize) { - case BLOCK_4X4: return 0; - case BLOCK_8X8: return 1; - case BLOCK_16X16: return 2; - case BLOCK_32X32: return 3; - case BLOCK_64X64: return 4; - case BLOCK_128X128: return 5; - default: return SQR_BLOCK_SIZES; - } -} - -// For a square block size 'bsize', returns the size of the sub-blocks used by -// the given partition type. If the partition produces sub-blocks of different -// sizes, then the function returns the largest sub-block size. -// Implements the Partition_Subsize lookup table in the spec (Section 9.3. -// Conversion tables). -// Note: the input block size should be square. -// Otherwise it's considered invalid. -static INLINE BLOCK_SIZE get_partition_subsize(BLOCK_SIZE bsize, - PARTITION_TYPE partition) { - if (partition == PARTITION_INVALID) { - return BLOCK_INVALID; - } else { - const int sqr_bsize_idx = get_sqr_bsize_idx(bsize); - return sqr_bsize_idx >= SQR_BLOCK_SIZES - ? BLOCK_INVALID - : subsize_lookup[partition][sqr_bsize_idx]; - } -} - static TX_TYPE intra_mode_to_tx_type(const MB_MODE_INFO *mbmi, PLANE_TYPE plane_type) { static const TX_TYPE _intra_mode_to_tx_type[INTRA_MODES] = { @@ -1266,6 +1672,74 @@ return ss_size_lookup[bsize][subsampling_x][subsampling_y]; } +#if CONFIG_SDP +static INLINE int av1_get_sdp_idx(TREE_TYPE tree_type) { + switch (tree_type) { + case SHARED_PART: + case LUMA_PART: return 0; + case CHROMA_PART: return 1; break; + default: assert(0 && "Invalid tree type"); return 0; + } +} +#endif // CONFIG_SDP + +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP +static INLINE BLOCK_SIZE get_bsize_base(const MACROBLOCKD *xd, + const MB_MODE_INFO *mbmi, int plane) { + BLOCK_SIZE bsize_base = BLOCK_INVALID; +#if CONFIG_SDP + if (xd->tree_type == SHARED_PART) { + bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base : mbmi->sb_type[PLANE_TYPE_Y]; + } else { + bsize_base = mbmi->sb_type[av1_get_sdp_idx(xd->tree_type)]; + } +#else + bsize_base = plane ? mbmi->chroma_ref_info.bsize_base : mbmi->sb_type; + (void)xd; +#endif // CONFIG_SDP + return bsize_base; +} + +static INLINE BLOCK_SIZE get_mb_plane_block_size(const MACROBLOCKD *xd, + const MB_MODE_INFO *mbmi, + int plane, int subsampling_x, + int subsampling_y) { + assert(subsampling_x >= 0 && subsampling_x < 2); + assert(subsampling_y >= 0 && subsampling_y < 2); + const BLOCK_SIZE bsize_base = get_bsize_base(xd, mbmi, plane); + return get_plane_block_size(bsize_base, subsampling_x, subsampling_y); +} +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + +#if CONFIG_SDP +// These are only needed to support lpf multi-thread. +// Because xd is shared among all the threads workers, xd->tree_type does not +// contain the valid tree_type, so we are passing in the tree_type +static INLINE BLOCK_SIZE get_bsize_base_from_tree_type(const MB_MODE_INFO *mbmi, + TREE_TYPE tree_type, + int plane) { + BLOCK_SIZE bsize_base = BLOCK_INVALID; + if (tree_type == SHARED_PART) { + bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base : mbmi->sb_type[PLANE_TYPE_Y]; + } else { + bsize_base = mbmi->sb_type[av1_get_sdp_idx(tree_type)]; + } + return bsize_base; +} + +static INLINE BLOCK_SIZE get_mb_plane_block_size_from_tree_type( + const MB_MODE_INFO *mbmi, TREE_TYPE tree_type, int plane, int subsampling_x, + int subsampling_y) { + assert(subsampling_x >= 0 && subsampling_x < 2); + assert(subsampling_y >= 0 && subsampling_y < 2); + const BLOCK_SIZE bsize_base = + get_bsize_base_from_tree_type(mbmi, tree_type, plane); + return get_plane_block_size(bsize_base, subsampling_x, subsampling_y); +} +#endif // CONFIG_SDP + /* * Logic to generate the lookup tables: * @@ -1471,7 +1945,7 @@ if (is_inter_block(mbmi, xd->tree_type)) { #else if (is_inter_block(mbmi)) { -#endif // CONFIG_SDP +#endif // scale back to y plane's coordinate const struct macroblockd_plane *const pd = &xd->plane[plane_type]; blk_row <<= pd->subsampling_y; @@ -1492,7 +1966,7 @@ reduced_tx_set); #else av1_get_ext_tx_set_type(tx_size, is_inter_block(mbmi), reduced_tx_set); -#endif // CONFIG_SDP +#endif if (!av1_ext_tx_used[tx_set_type][tx_type]) tx_type = DCT_DCT; } #if CONFIG_IST @@ -1597,12 +2071,16 @@ if (xd->lossless[mbmi->segment_id]) return TX_4X4; if (plane == 0) return mbmi->tx_size; const MACROBLOCKD_PLANE *pd = &xd->plane[plane]; -#if CONFIG_SDP +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize_base = get_bsize_base(xd, mbmi, plane); + return av1_get_max_uv_txsize(bsize_base, pd->subsampling_x, + pd->subsampling_y); +#elif CONFIG_SDP return av1_get_max_uv_txsize(mbmi->sb_type[PLANE_TYPE_UV], pd->subsampling_x, pd->subsampling_y); #else - return av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y); + return av1_get_max_uv_txsize(mbmi->chroma_ref_info.bsize_base, + pd->subsampling_x, pd->subsampling_y); #endif } @@ -1623,6 +2101,12 @@ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); +void av1_reset_is_mi_coded_map(MACROBLOCKD *xd, int stride); +void av1_mark_block_as_coded(MACROBLOCKD *xd, BLOCK_SIZE bsize, + BLOCK_SIZE sb_size); +void av1_mark_block_as_not_coded(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE sb_size); + #define MAX_INTERINTRA_SB_SQUARE 32 * 32 static INLINE int is_interintra_mode(const MB_MODE_INFO *mbmi) { return (mbmi->ref_frame[0] > INTRA_FRAME && @@ -1677,9 +2161,27 @@ return av1_get_adjusted_tx_size(max_txsize); // chroma } -static INLINE int is_motion_variation_allowed_bsize(BLOCK_SIZE bsize) { +static INLINE int is_motion_variation_allowed_bsize(BLOCK_SIZE bsize, + int mi_row, int mi_col) { assert(bsize < BLOCK_SIZES_ALL); - return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8; + + if (AOMMIN(block_size_wide[bsize], block_size_high[bsize]) < 8) { + return 0; + } +#if CONFIG_EXT_RECUR_PARTITIONS + // TODO(urvang): Enable this special case, if we make OBMC work. + // TODO(yuec): Enable this case when the alignment issue is fixed. There + // will be memory leak in global above_pred_buff and left_pred_buff if + // the restriction on mi_row and mi_col is removed. + if ((mi_row & 0x01) || (mi_col & 0x01)) { + return 0; + } +#else + assert(!(mi_row & 0x01) && !(mi_col & 0x01)); + (void)mi_row; + (void)mi_col; +#endif // CONFIG_EXT_RECUR_PARTITIONS + return 1; } static INLINE int is_motion_variation_allowed_compound( @@ -1703,9 +2205,11 @@ if (is_global_mv_block(mbmi, gm_type)) return SIMPLE_TRANSLATION; } #if CONFIG_SDP - if (is_motion_variation_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y]) && + if (is_motion_variation_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y], xd->mi_row, + xd->mi_col) && #else - if (is_motion_variation_allowed_bsize(mbmi->sb_type) && + if (is_motion_variation_allowed_bsize(mbmi->sb_type, xd->mi_row, + xd->mi_col) && #endif is_inter_mode(mbmi->mode) && mbmi->ref_frame[1] != INTRA_FRAME && is_motion_variation_allowed_compound(mbmi)) { @@ -1752,6 +2256,7 @@ int *height, int *rows_within_bounds, int *cols_within_bounds) { + if (plane > 0) bsize = xd->mi[0]->chroma_ref_info.bsize_base; const int block_height = block_size_high[bsize]; const int block_width = block_size_wide[bsize]; const int block_rows = (xd->mb_to_bottom_edge >= 0) @@ -1842,6 +2347,18 @@ } return tx_size_2d[tx_size]; } + +#if CONFIG_EXT_RECUR_PARTITIONS +static AOM_INLINE const PARTITION_TREE *get_partition_subtree_const( + const PARTITION_TREE *partition_tree, int idx) { + if (!partition_tree) { + return NULL; + } + return partition_tree->sub_tree[idx]; +} + +#endif // CONFIG_EXT_RECUR_PARTITIONS + /*!\endcond */ #ifdef __cplusplus
diff --git a/av1/common/ccso.c b/av1/common/ccso.c index 5596ceb..5f8e44c 100644 --- a/av1/common/ccso.c +++ b/av1/common/ccso.c
@@ -271,8 +271,7 @@ void ccso_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, uint16_t *ext_rec_y) { const int num_planes = av1_num_planes(cm); - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, - num_planes); + av1_setup_dst_planes(xd->plane, frame, 0, 0, 0, num_planes, NULL); const uint8_t quant_sz[4] = { 16, 8, 32, 64 }; for (int plane = 1; plane < 3; plane++) {
diff --git a/av1/common/cdef.c b/av1/common/cdef.c index ffb0159..14ec3bb 100644 --- a/av1/common/cdef.c +++ b/av1/common/cdef.c
@@ -142,8 +142,7 @@ int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0); const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, - num_planes); + av1_setup_dst_planes(xd->plane, frame, 0, 0, 0, num_planes, NULL); row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2); memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2); prev_row_cdef = row_cdef + 1;
diff --git a/av1/common/cfl.c b/av1/common/cfl.c index 718f6c0..1df05bb 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c
@@ -372,41 +372,21 @@ } } -// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced -// and non-chroma-referenced blocks are stored together in the CfL buffer. -static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int mi_row, - int mi_col, int *row_out, - int *col_out) { - // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s. - if ((mi_row & 0x01) && cfl->subsampling_y) { - assert(*row_out == 0); - (*row_out)++; - } - - // Increment col index for right: 4x8, 4x16 or both right 4x4s. - if ((mi_col & 0x01) && cfl->subsampling_x) { - assert(*col_out == 0); - (*col_out)++; - } -} - -void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, - BLOCK_SIZE bsize) { +void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size) { CFL_CTX *const cfl = &xd->cfl; struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; uint8_t *dst = &pd->dst.buf[(row * pd->dst.stride + col) << MI_SIZE_LOG2]; + const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); + const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); + const int row_offset = mi_row - xd->mi[0]->chroma_ref_info.mi_row_chroma_base; + const int col_offset = mi_col - xd->mi[0]->chroma_ref_info.mi_col_chroma_base; - if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { - // Only dimensions of size 4 can have an odd offset. - assert(!((col & 1) && tx_size_wide[tx_size] != 4)); - assert(!((row & 1) && tx_size_high[tx_size] != 4)); - sub8x8_adjust_offset(cfl, xd->mi_row, xd->mi_col, &row, &col); - } #if CONFIG_SDP - cfl_store(xd, cfl, dst, pd->dst.stride, row, col, tx_size, - is_cur_buf_hbd(xd)); + cfl_store(xd, cfl, dst, pd->dst.stride, row + row_offset, col + col_offset, + tx_size, is_cur_buf_hbd(xd)); #else - cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size, is_cur_buf_hbd(xd)); + cfl_store(cfl, dst, pd->dst.stride, row + row_offset, col + col_offset, + tx_size, is_cur_buf_hbd(xd)); #endif } @@ -429,21 +409,20 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) { CFL_CTX *const cfl = &xd->cfl; struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; - int row = 0; - int col = 0; - - if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { - sub8x8_adjust_offset(cfl, xd->mi_row, xd->mi_col, &row, &col); - } const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size); const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size); + const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); + const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); + const int row_offset = mi_row - xd->mi[0]->chroma_ref_info.mi_row_chroma_base; + const int col_offset = mi_col - xd->mi[0]->chroma_ref_info.mi_col_chroma_base; + tx_size = get_tx_size(width, height); assert(tx_size != TX_INVALID); #if CONFIG_SDP - cfl_store(xd, cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size, - is_cur_buf_hbd(xd)); + cfl_store(xd, cfl, pd->dst.buf, pd->dst.stride, row_offset, col_offset, + tx_size, is_cur_buf_hbd(xd)); #else - cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size, + cfl_store(cfl, pd->dst.buf, pd->dst.stride, row_offset, col_offset, tx_size, is_cur_buf_hbd(xd)); #endif }
diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 2463e2d..f9c11c6 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h
@@ -20,9 +20,7 @@ const MB_MODE_INFO *mbmi = xd->mi[0]; #if CONFIG_SDP if (xd->tree_type == LUMA_PART) return CFL_DISALLOWED; - const BLOCK_SIZE bsize = - mbmi->sb_type[xd->tree_type == SHARED_PART ? PLANE_TYPE_Y - : PLANE_TYPE_UV]; + const BLOCK_SIZE bsize = get_bsize_base(xd, mbmi, AOM_PLANE_U); #else const BLOCK_SIZE bsize = mbmi->sb_type; #endif @@ -82,8 +80,7 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size); -void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, - BLOCK_SIZE bsize); +void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size); void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input, CFL_PRED_TYPE pred_plane, int width);
diff --git a/av1/common/common_data.h b/av1/common/common_data.h index ca47b4a..88ce99a 100644 --- a/av1/common/common_data.h +++ b/av1/common/common_data.h
@@ -12,6 +12,9 @@ #ifndef AOM_AV1_COMMON_COMMON_DATA_H_ #define AOM_AV1_COMMON_COMMON_DATA_H_ +#include <assert.h> +#include <stdbool.h> + #include "av1/common/enums.h" #include "aom/aom_integer.h" #include "aom_dsp/aom_dsp_common.h" @@ -65,6 +68,112 @@ 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 6, 6, 8, 8, 10, 10 }; +#if CONFIG_EXT_RECUR_PARTITIONS +static const PARTITION_TYPE + partition_map_from_symbol_block_wgth[PARTITION_TYPES_REC] = { + PARTITION_NONE, + PARTITION_VERT, + PARTITION_VERT_3, + PARTITION_HORZ, + }; + +static const PARTITION_TYPE_REC + symbol_map_from_partition_block_wgth[EXT_PARTITION_TYPES] = { + PARTITION_NONE_REC, PARTITION_SHORT_SIDE_2_REC, + PARTITION_LONG_SIDE_2_REC, PARTITION_INVALID_REC, + PARTITION_LONG_SIDE_3_REC, + }; + +static const PARTITION_TYPE + partition_map_from_symbol_block_hgtw[PARTITION_TYPES_REC] = { + PARTITION_NONE, + PARTITION_HORZ, + PARTITION_HORZ_3, + PARTITION_VERT, + }; + +static const PARTITION_TYPE_REC + symbol_map_from_partition_block_hgtw[EXT_PARTITION_TYPES] = { + PARTITION_NONE_REC, PARTITION_LONG_SIDE_2_REC, + PARTITION_SHORT_SIDE_2_REC, PARTITION_LONG_SIDE_3_REC, + PARTITION_INVALID_REC, + }; + +/* clang-format off */ +// This table covers all square blocks and 1:2/2:1 rectangular blocks +static const BLOCK_SIZE + subsize_lookup[EXT_PARTITION_TYPES + 1][BLOCK_SIZES_ALL] = { + { // PARTITION_NONE + BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, + BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64, BLOCK_128X128, + BLOCK_4X16, BLOCK_16X4, BLOCK_8X32, BLOCK_32X8, BLOCK_16X64, BLOCK_64X16, + }, { // PARTITION_HORZ + BLOCK_INVALID, BLOCK_4X4, BLOCK_INVALID, BLOCK_8X4, BLOCK_8X8, BLOCK_16X4, + BLOCK_16X8, BLOCK_16X16, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32, BLOCK_64X16, + BLOCK_64X32, BLOCK_64X64, BLOCK_INVALID, BLOCK_128X64, + BLOCK_4X8, BLOCK_INVALID, BLOCK_8X16, BLOCK_INVALID, BLOCK_16X32, + BLOCK_INVALID, + }, { // PARTITION_VERT + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, BLOCK_4X8, BLOCK_4X16, BLOCK_8X8, + BLOCK_8X16, BLOCK_8X32, BLOCK_16X16, BLOCK_16X32, BLOCK_16X64, BLOCK_32X32, + BLOCK_32X64, BLOCK_INVALID, BLOCK_64X64, BLOCK_64X128, + BLOCK_INVALID, BLOCK_8X4, BLOCK_INVALID, BLOCK_16X8, BLOCK_INVALID, + BLOCK_32X16, + }, { // PARTITION_HORZ_3 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, + BLOCK_INVALID, BLOCK_16X4, BLOCK_16X8, + BLOCK_INVALID, + BLOCK_32X8, BLOCK_32X16, + BLOCK_INVALID, + BLOCK_64X16, BLOCK_64X32, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_4X4, BLOCK_INVALID, BLOCK_8X8, BLOCK_INVALID, BLOCK_16X16, + BLOCK_INVALID, + }, { // PARTITION_VERT_3 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_4X8, BLOCK_4X16, + BLOCK_INVALID, + BLOCK_8X16, BLOCK_8X32, + BLOCK_INVALID, + BLOCK_16X32, BLOCK_16X64, BLOCK_INVALID, BLOCK_32X64, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_4X4, BLOCK_INVALID, BLOCK_8X8, + BLOCK_INVALID, BLOCK_16X16, + }, { // PARTITION_SPLIT + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_64X64, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, + }, +}; + +#if CONFIG_SDP +static AOM_INLINE PARTITION_TYPE sdp_chroma_part_from_luma(BLOCK_SIZE bsize, + PARTITION_TYPE luma_part, int ssx, + int ssy) { + const int bh_chr = block_size_high[bsize] >> ssy; + const int bw_chr = block_size_wide[bsize] >> ssx; + + switch (luma_part) { + case PARTITION_NONE: return PARTITION_NONE; + case PARTITION_HORZ: return (bh_chr < 8) ? PARTITION_NONE : PARTITION_HORZ; + case PARTITION_HORZ_3: + if (bh_chr >= 16) + return PARTITION_HORZ_3; + else + return (bh_chr < 8) ? PARTITION_NONE : PARTITION_HORZ; + case PARTITION_VERT: return (bw_chr < 8) ? PARTITION_NONE : PARTITION_VERT; + case PARTITION_VERT_3: + if (bw_chr >= 16) + return PARTITION_VERT_3; + else + return (bw_chr < 8) ? PARTITION_NONE : PARTITION_VERT; + default: assert(0); + } + return PARTITION_INVALID; +} +#endif // CONFIG_SDP +#else // CONFIG_EXT_RECUR_PARTITIONS // A compressed version of the Partition_Subsize table in the spec (9.3. // Conversion tables), for square block sizes only. /* clang-format off */ @@ -101,6 +210,7 @@ BLOCK_8X32, BLOCK_16X64, BLOCK_INVALID } }; +#endif // CONFIG_EXT_RECUR_PARTITIONS static const TX_SIZE max_txsize_lookup[BLOCK_SIZES_ALL] = { // 4X4 @@ -468,6 +578,22 @@ { 13, 3 }, }; +static AOM_INLINE bool is_bsize_geq(BLOCK_SIZE bsize1, BLOCK_SIZE bsize2) { + if (bsize1 == BLOCK_INVALID || bsize2 == BLOCK_INVALID) { + return false; + } + return block_size_wide[bsize1] >= block_size_wide[bsize2] && + block_size_high[bsize1] >= block_size_high[bsize2]; +} + +static AOM_INLINE bool is_bsize_gt(BLOCK_SIZE bsize1, BLOCK_SIZE bsize2) { + if (bsize1 == BLOCK_INVALID || bsize2 == BLOCK_INVALID) { + return false; + } + return block_size_wide[bsize1] > block_size_wide[bsize2] && + block_size_high[bsize1] > block_size_high[bsize2]; +} + #if CONFIG_IST // Mapping of intra modes to IST kernel set // Secondary transforms are enabled only intra modes < PAETH_PRED.
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c index 1396f9a..44c40e6 100644 --- a/av1/common/entropymode.c +++ b/av1/common/entropymode.c
@@ -204,7 +204,64 @@ { AOM_CDF14(3144, 5087, 7382, 7504, 7593, 7690, 7801, 8064, 8232, 9248, 9875, 10521, 29048) } } }; -#if CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP +static const aom_cdf_prob + default_partition_cdf[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS][CDF_SIZE( + EXT_PARTITION_TYPES)] = { { + // 8x8 + { AOM_CDF3(22187, 28914) }, + { AOM_CDF3(17354, 25544) }, + { AOM_CDF3(16287, 28824) }, + { AOM_CDF3(15189, 27217) }, + // 16x16 + { AOM_CDF5(14665, 22357, 28960, 30645) }, + { AOM_CDF5(7804, 13703, 27420, 29025) }, + { AOM_CDF5(7508, 23180, 28044, 30882) }, + { AOM_CDF5(5058, 16122, 25275, 28359) }, + // 32x32 + { AOM_CDF5(11795, 19886, 27120, 29401) }, + { AOM_CDF5(5127, 12682, 26374, 28387) }, + { AOM_CDF5(5522, 19614, 27318, 30668) }, + { AOM_CDF5(3450, 12856, 24163, 28493) }, + // 64x64 + { AOM_CDF5(21562, 26118, 30872, 31711) }, + { AOM_CDF5(5489, 14515, 28365, 29969) }, + { AOM_CDF5(5873, 19195, 28209, 31603) }, + { AOM_CDF5(1674, 15579, 28805, 31560) }, + // 128x128 + { AOM_CDF3(25710, 28640) }, + { AOM_CDF3(7561, 14721) }, + { AOM_CDF3(9603, 21021) }, + { AOM_CDF3(1736, 12989) }, + }, + { + // 8x8 + { AOM_CDF3(22187, 28914) }, + { AOM_CDF3(17354, 25544) }, + { AOM_CDF3(16287, 28824) }, + { AOM_CDF3(15189, 27217) }, + // 16x16 + { AOM_CDF5(14665, 22357, 28960, 30645) }, + { AOM_CDF5(7804, 13703, 27420, 29025) }, + { AOM_CDF5(7508, 23180, 28044, 30882) }, + { AOM_CDF5(5058, 16122, 25275, 28359) }, + // 32x32 + { AOM_CDF5(11795, 19886, 27120, 29401) }, + { AOM_CDF5(5127, 12682, 26374, 28387) }, + { AOM_CDF5(5522, 19614, 27318, 30668) }, + { AOM_CDF5(3450, 12856, 24163, 28493) }, + // 64x64 + { AOM_CDF5(21562, 26118, 30872, 31711) }, + { AOM_CDF5(5489, 14515, 28365, 29969) }, + { AOM_CDF5(5873, 19195, 28209, 31603) }, + { AOM_CDF5(1674, 15579, 28805, 31560) }, + // 128x128 + { AOM_CDF3(25710, 28640) }, + { AOM_CDF3(7561, 14721) }, + { AOM_CDF3(9603, 21021) }, + { AOM_CDF3(1736, 12989) }, + } }; +#elif CONFIG_SDP static const aom_cdf_prob default_partition_cdf[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS][CDF_SIZE( EXT_PARTITION_TYPES)] = { @@ -277,7 +334,36 @@ { AOM_CDF8(711, 966, 1172, 32448, 32538, 32617, 32664) }, } }; -#else +#elif CONFIG_EXT_RECUR_PARTITIONS +static const aom_cdf_prob + default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_PARTITION_TYPES)] = { + // 8x8 + { AOM_CDF3(22187, 28914) }, + { AOM_CDF3(17354, 25544) }, + { AOM_CDF3(16287, 28824) }, + { AOM_CDF3(15189, 27217) }, + // 16x16 + { AOM_CDF5(14665, 22357, 28960, 30645) }, + { AOM_CDF5(7804, 13703, 27420, 29025) }, + { AOM_CDF5(7508, 23180, 28044, 30882) }, + { AOM_CDF5(5058, 16122, 25275, 28359) }, + // 32x32 + { AOM_CDF5(11795, 19886, 27120, 29401) }, + { AOM_CDF5(5127, 12682, 26374, 28387) }, + { AOM_CDF5(5522, 19614, 27318, 30668) }, + { AOM_CDF5(3450, 12856, 24163, 28493) }, + // 64x64 + { AOM_CDF5(21562, 26118, 30872, 31711) }, + { AOM_CDF5(5489, 14515, 28365, 29969) }, + { AOM_CDF5(5873, 19195, 28209, 31603) }, + { AOM_CDF5(1674, 15579, 28805, 31560) }, + // 128x128 + { AOM_CDF3(25710, 28640) }, + { AOM_CDF3(7561, 14721) }, + { AOM_CDF3(9603, 21021) }, + { AOM_CDF3(1736, 12989) }, + }; +#else // !CONFIG_EXT_RECUR_PARTITIONS && !CONFIG_SDP static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE( EXT_PARTITION_TYPES)] = { { AOM_CDF4(19132, 25510, 30392) }, @@ -303,6 +389,38 @@ }; #endif +#if CONFIG_EXT_RECUR_PARTITIONS +static const aom_cdf_prob + default_partition_rec_cdf[PARTITION_CONTEXTS_REC] + [CDF_SIZE(PARTITION_TYPES_REC)] = { + // 8x4, 4x8 + { AOM_CDF2(30462) }, + { AOM_CDF2(25506) }, + { AOM_CDF2(27632) }, + { AOM_CDF2(19443) }, + // 16x8, 8x16 + { AOM_CDF4(19986, 29676, 30790) }, + { AOM_CDF4(12672, 24996, 30937) }, + { AOM_CDF4(16895, 30097, 30155) }, + { AOM_CDF4(10676, 22283, 25595) }, + // 32x16, 16x32 + { AOM_CDF4(13648, 24298, 31008) }, + { AOM_CDF4(6941, 18823, 31326) }, + { AOM_CDF4(8081, 19947, 30935) }, + { AOM_CDF4(4728, 17352, 30577) }, + // 64x32, 32x64 + { AOM_CDF4(14004, 23614, 30662) }, + { AOM_CDF4(5530, 18449, 30965) }, + { AOM_CDF4(6144, 19185, 31435) }, + { AOM_CDF4(7382, 19434, 30389) }, + // 128x64, 64x128 + { AOM_CDF3(15208, 24398) }, + { AOM_CDF3(6597, 18232) }, + { AOM_CDF3(9068, 21038) }, + { AOM_CDF3(10923, 21845) }, + }; +#endif // CONFIG_EXT_RECUR_PARTITIONS + static const aom_cdf_prob default_intra_ext_tx_cdf [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)] = { { @@ -1399,6 +1517,9 @@ #endif av1_copy(fc->switchable_interp_cdf, default_switchable_interp_cdf); av1_copy(fc->partition_cdf, default_partition_cdf); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_copy(fc->partition_rec_cdf, default_partition_rec_cdf); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_copy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf); av1_copy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf); av1_copy(fc->skip_mode_cdfs, default_skip_mode_cdfs);
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h index 235542b..1711602 100644 --- a/av1/common/entropymode.h +++ b/av1/common/entropymode.h
@@ -161,7 +161,11 @@ [CDF_SIZE(EXT_PARTITION_TYPES)]; #else aom_cdf_prob partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_PARTITION_TYPES)]; -#endif +#endif // CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS + aom_cdf_prob partition_rec_cdf[PARTITION_CONTEXTS_REC] + [CDF_SIZE(PARTITION_TYPES_REC)]; +#endif // CONFIG_EXT_RECUR_PARTITIONS aom_cdf_prob switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS] [CDF_SIZE(SWITCHABLE_FILTERS)]; /* kf_y_cdf is discarded after use, so does not require persistent storage.
diff --git a/av1/common/enums.h b/av1/common/enums.h index 697fdee..743a5dc 100644 --- a/av1/common/enums.h +++ b/av1/common/enums.h
@@ -50,6 +50,8 @@ #define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) #define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2) +#define MAX_MIB_SQUARE (MAX_MIB_SIZE * MAX_MIB_SIZE) + // MI-units per min superblock #define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2) @@ -165,6 +167,10 @@ // 4X4, 8X8, 16X16, 32X32, 64X64, 128X128 #define SQR_BLOCK_SIZES 6 +#if CONFIG_EXT_RECUR_PARTITIONS +#define KEEP_PARTITION_SPLIT 0 +#endif // CONFIG_EXT_RECUR_PARTITIONS + // Partition types. R: Recursive // // NONE HORZ VERT SPLIT @@ -174,6 +180,17 @@ // | | | | | | | | R | R | // +-------+ +-------+ +---+---+ +---+---+ // +#if CONFIG_EXT_RECUR_PARTITIONS +// HORZ_3 VERT_3 +// +--------------+ +---+------+---+ +// | | | | | | +// +--------------+ | | | | +// | | | | | | +// | | | | | | +// +--------------+ | | | | +// | | | | | | +// +--------------+ +---+------+---+ +#else // HORZ_A HORZ_B VERT_A VERT_B // +---+---+ +-------+ +---+---+ +---+---+ // | | | | | | | | | | | @@ -186,6 +203,20 @@ // +-----+ | | | | // +-----+ | | | | // +-----+ +-+-+-+ +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_EXT_RECUR_PARTITIONS +enum { + PARTITION_NONE, + PARTITION_HORZ, + PARTITION_VERT, + PARTITION_HORZ_3, // 3 horizontal sub-partitions with ratios 4:1, 2:1 and 4:1 + PARTITION_VERT_3, // 3 vertical sub-partitions with ratios 4:1, 2:1 and 4:1 + EXT_PARTITION_TYPES, + PARTITION_SPLIT = EXT_PARTITION_TYPES, + PARTITION_TYPES = PARTITION_VERT + 1, + PARTITION_INVALID = 255 +} UENUM1BYTE(PARTITION_TYPE); +#else // CONFIG_EXT_RECUR_PARTITIONS enum { PARTITION_NONE, PARTITION_HORZ, @@ -201,12 +232,27 @@ PARTITION_TYPES = PARTITION_SPLIT + 1, PARTITION_INVALID = 255 } UENUM1BYTE(PARTITION_TYPE); +#endif // CONFIG_EXT_RECUR_PARTITIONS typedef char PARTITION_CONTEXT; #define PARTITION_PLOFFSET 4 // number of probability models per block size #define PARTITION_BLOCK_SIZES 5 #define PARTITION_CONTEXTS (PARTITION_BLOCK_SIZES * PARTITION_PLOFFSET) +#if CONFIG_EXT_RECUR_PARTITIONS +enum { + PARTITION_NONE_REC, + PARTITION_LONG_SIDE_2_REC, + PARTITION_LONG_SIDE_3_REC, + PARTITION_SHORT_SIDE_2_REC, + PARTITION_TYPES_REC = PARTITION_SHORT_SIDE_2_REC + 1, + PARTITION_INVALID_REC = 255 +} UENUM1BYTE(PARTITION_TYPE_REC); + +#define PARTITION_BLOCK_SIZES_REC 5 // 128x64, 64x32, 32x16, 16x8, 8x4 +#define PARTITION_CONTEXTS_REC (PARTITION_BLOCK_SIZES_REC * PARTITION_PLOFFSET) +#endif // CONFIG_EXT_RECUR_PARTITIONS + // block transform size enum { TX_4X4, // 4x4 transform
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c index bd02cf3..de7eb9c 100644 --- a/av1/common/mvref_common.c +++ b/av1/common/mvref_common.c
@@ -337,10 +337,14 @@ } static AOM_INLINE void scan_row_mbmi( - const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_col, - const MV_REFERENCE_FRAME rf[2], int row_offset, CANDIDATE_MV *ref_mv_stack, - uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count, - uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_row_offset, + const AV1_COMMON *cm, const MACROBLOCKD *xd, +#if CONFIG_EXT_RECUR_PARTITIONS + int mi_row, +#endif // CONFIG_EXT_RECUR_PARTITIONS + int mi_col, const MV_REFERENCE_FRAME rf[2], int row_offset, + CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *refmv_count, + uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates, + int max_row_offset, #if CONFIG_SMVP_IMPROVEMENT int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count, CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight, @@ -363,6 +367,24 @@ const int plane_type = (xd->tree_type == CHROMA_PART); #endif for (int i = 0; i < end_mi;) { +#if CONFIG_EXT_RECUR_PARTITIONS + const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; + const int mask_row = mi_row & (sb_mi_size - 1); + const int mask_col = mi_col & (sb_mi_size - 1); + const int ref_mask_row = mask_row + row_offset; + const int ref_mask_col = mask_col + col_offset + i; + if (ref_mask_row >= 0) { + if (ref_mask_col >= sb_mi_size) break; + + const int ref_offset = + ref_mask_row * xd->is_mi_coded_stride + ref_mask_col; +#if CONFIG_SDP + if (!xd->is_mi_coded[0][ref_offset]) break; +#else + if (!xd->is_mi_coded[ref_offset]) break; +#endif // CONFIG_SDP + } +#endif // CONFIG_EXT_RECUR_PARTITIONS const MB_MODE_INFO *const candidate = candidate_mi0[col_offset + i]; #if CONFIG_SDP const int candidate_bsize = candidate->sb_type[plane_type]; @@ -401,6 +423,9 @@ static AOM_INLINE void scan_col_mbmi( const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, +#if CONFIG_EXT_RECUR_PARTITIONS + int mi_col, +#endif // CONFIG_EXT_RECUR_PARTITIONS const MV_REFERENCE_FRAME rf[2], int col_offset, CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_col_offset, @@ -423,6 +448,23 @@ const int use_step_16 = (xd->height >= 16); for (i = 0; i < end_mi;) { +#if CONFIG_EXT_RECUR_PARTITIONS + const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; + const int mask_row = mi_row & (sb_mi_size - 1); + const int mask_col = mi_col & (sb_mi_size - 1); + const int ref_mask_row = mask_row + row_offset + i; + const int ref_mask_col = mask_col + col_offset; + if (ref_mask_col >= 0) { + if (ref_mask_row >= sb_mi_size) break; + const int ref_offset = + ref_mask_row * xd->is_mi_coded_stride + ref_mask_col; +#if CONFIG_SDP + if (!xd->is_mi_coded[0][ref_offset]) break; +#else + if (!xd->is_mi_coded[ref_offset]) break; +#endif // CONFIG_SDP + } +#endif // CONFIG_EXT_RECUR_PARTITIONS const MB_MODE_INFO *const candidate = xd->mi[(row_offset + i) * xd->mi_stride + col_offset]; #if CONFIG_SDP @@ -494,6 +536,44 @@ } // Analyze a single 8x8 block motion information. } +#if CONFIG_EXT_RECUR_PARTITIONS +static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd, + int mi_row, int mi_col, int n4_w) { + const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; + const int mask_row = mi_row & (sb_mi_size - 1); + const int mask_col = mi_col & (sb_mi_size - 1); + + if (n4_w > mi_size_wide[BLOCK_64X64]) return 0; + + const int tr_mask_row = mask_row - 1; + const int tr_mask_col = mask_col + n4_w; + int has_tr; + + if (tr_mask_row < 0) { + // The top-right block is in a superblock above the current sb row. If it is + // in the current tile or a previously coded one, it has been coded. + // Otherwise later the tile boundary checker will figure out whether it is + // available. + has_tr = 1; + } else if (tr_mask_col >= sb_mi_size) { + // The top-right block is in the superblock on the right side, therefore it + // is not coded yet. + has_tr = 0; + } else { + // For a general case, we use is_mi_coded array for the current superblock + // to figure out the availability. + const int tr_offset = tr_mask_row * xd->is_mi_coded_stride + tr_mask_col; + +#if CONFIG_SDP + has_tr = xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)][tr_offset]; +#else + has_tr = xd->is_mi_coded[tr_offset]; +#endif // CONFIG_SDP + } + + return has_tr; +} +#else static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col, int bs) { const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; @@ -545,6 +625,7 @@ return has_tr; } +#endif static int check_sb_border(const int mi_row, const int mi_col, const int row_offset, const int col_offset) { @@ -719,8 +800,12 @@ uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE], int_mv mv_ref_list[MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates, int mi_row, int mi_col, int16_t *mode_context) { +#if CONFIG_EXT_RECUR_PARTITIONS + const int has_tr = has_top_right(cm, xd, mi_row, mi_col, xd->width); +#else const int bs = AOMMAX(xd->width, xd->height); const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs); +#endif MV_REFERENCE_FRAME rf[2]; const TileInfo *const tile = &xd->tile; @@ -775,8 +860,12 @@ // Scan the first above row mode info. row_offset = -1; if (abs(max_row_offset) >= 1) - scan_row_mbmi(cm, xd, mi_col, rf, -1, ref_mv_stack, ref_mv_weight, - refmv_count, &row_match_count, &newmv_count, gm_mv_candidates, + scan_row_mbmi(cm, xd, +#if CONFIG_EXT_RECUR_PARTITIONS + mi_row, +#endif // CONFIG_EXT_RECUR_PARTITIONS + mi_col, rf, -1, ref_mv_stack, ref_mv_weight, refmv_count, + &row_match_count, &newmv_count, gm_mv_candidates, max_row_offset, #if CONFIG_SMVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, @@ -786,8 +875,12 @@ // Scan the first left column mode info. col_offset = -1; if (abs(max_col_offset) >= 1) - scan_col_mbmi(cm, xd, mi_row, rf, -1, ref_mv_stack, ref_mv_weight, - refmv_count, &col_match_count, &newmv_count, gm_mv_candidates, + scan_col_mbmi(cm, xd, mi_row, +#if CONFIG_EXT_RECUR_PARTITIONS + mi_col, +#endif // CONFIG_EXT_RECUR_PARTITIONS + rf, -1, ref_mv_stack, ref_mv_weight, refmv_count, + &col_match_count, &newmv_count, gm_mv_candidates, max_col_offset, #if CONFIG_SMVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, @@ -875,11 +968,15 @@ const int col_offset = -(idx << 1) + 1 + col_adj; if (abs(col_offset) <= abs(max_col_offset) && abs(col_offset) > processed_cols) { - scan_col_mbmi(cm, xd, mi_row, rf, col_offset, ref_mv_stack, ref_mv_weight, - refmv_count, &col_match_count, &dummy_newmv_count, - gm_mv_candidates, max_col_offset, 0, single_mv, - &single_mv_count, derived_mv_stack, derived_mv_weight, - &derived_mv_count, &processed_cols); + scan_col_mbmi(cm, xd, mi_row, +#if CONFIG_EXT_RECUR_PARTITIONS + mi_col, +#endif // CONFIG_EXT_RECUR_PARTITIONS + rf, col_offset, ref_mv_stack, ref_mv_weight, refmv_count, + &col_match_count, &dummy_newmv_count, gm_mv_candidates, + max_col_offset, 0, single_mv, &single_mv_count, + derived_mv_stack, derived_mv_weight, &derived_mv_count, + &processed_cols); } } #else @@ -889,15 +986,23 @@ if (abs(row_offset) <= abs(max_row_offset) && abs(row_offset) > processed_rows) - scan_row_mbmi(cm, xd, mi_col, rf, row_offset, ref_mv_stack, ref_mv_weight, + scan_row_mbmi(cm, xd, +#if CONFIG_EXT_RECUR_PARTITIONS + mi_row, +#endif // CONFIG_EXT_RECUR_PARTITIONS + mi_col, rf, row_offset, ref_mv_stack, ref_mv_weight, refmv_count, &row_match_count, &dummy_newmv_count, gm_mv_candidates, max_row_offset, &processed_rows); if (abs(col_offset) <= abs(max_col_offset) && abs(col_offset) > processed_cols) - scan_col_mbmi(cm, xd, mi_row, rf, col_offset, ref_mv_stack, ref_mv_weight, - refmv_count, &col_match_count, &dummy_newmv_count, - gm_mv_candidates, max_col_offset, &processed_cols); + scan_col_mbmi(cm, xd, mi_row, +#if CONFIG_EXT_RECUR_PARTITIONS + mi_col, +#endif // CONFIG_EXT_RECUR_PARTITIONS + rf, col_offset, ref_mv_stack, ref_mv_weight, refmv_count, + &col_match_count, &dummy_newmv_count, gm_mv_candidates, + max_col_offset, &processed_cols); } #endif // CONFIG_SMVP_IMPROVEMENT @@ -1815,8 +1920,12 @@ assert(np <= LEAST_SQUARES_SAMPLES_MAX); // Top-right block - if (do_tr && - has_top_right(cm, xd, mi_row, mi_col, AOMMAX(xd->width, xd->height))) { +#if CONFIG_EXT_RECUR_PARTITIONS + if (do_tr && has_top_right(cm, xd, mi_row, mi_col, xd->width)) { +#else + const int bs = AOMMAX(xd->width, xd->height); + if (do_tr && has_top_right(cm, xd, mi_row, mi_col, bs)) { +#endif const POSITION trb_pos = { -1, xd->width }; const TileInfo *const tile = &xd->tile; if (is_inside(tile, mi_col, mi_row, &trb_pos)) {
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index b03024e..b830750 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -765,27 +765,30 @@ // 2. At least one dimension is size 4 with subsampling // 3. If sub-sampled, none of the previous blocks around the sub-sample // are intrabc or inter-blocks -static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize, - int is_intrabc, int build_for_obmc) { +static bool is_sub8x8_inter(const MACROBLOCKD *xd, const MB_MODE_INFO *mi, + int plane, int is_intrabc, int build_for_obmc) { if (is_intrabc || build_for_obmc) { return false; } - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int ss_x = pd->subsampling_x; - const int ss_y = pd->subsampling_y; - const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x; - const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y; - if (!is_sub4_x && !is_sub4_y) { +#if CONFIG_SDP + if (!(plane && + (mi->sb_type[PLANE_TYPE_UV] != mi->chroma_ref_info.bsize_base))) return false; - } +#else + if (!(plane && (mi->sb_type != mi->chroma_ref_info.bsize_base))) return false; +#endif // CONFIG_SDP // For sub8x8 chroma blocks, we may be covering more than one luma block's // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for // the top-left corner of the prediction source - the correct top-left corner // is at (pre_x, pre_y). - const int row_start = is_sub4_y ? -1 : 0; - const int col_start = is_sub4_x ? -1 : 0; + const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); + const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); + const int row_start = + plane ? mi->chroma_ref_info.mi_row_chroma_base - mi_row : 0; + const int col_start = + plane ? mi->chroma_ref_info.mi_col_chroma_base - mi_col : 0; for (int row = row_start; row <= 0; ++row) { for (int col = col_start; col <= 0; ++col) { @@ -816,11 +819,9 @@ const bool ss_y = pd->subsampling_y; const int b4_w = block_size_wide[bsize] >> ss_x; const int b4_h = block_size_high[bsize] >> ss_y; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); - const int b8_w = block_size_wide[plane_bsize]; - const int b8_h = block_size_high[plane_bsize]; - const int is_compound = has_second_ref(mi); - assert(!is_compound); + const BLOCK_SIZE plane_bsize = plane ? mi->chroma_ref_info.bsize_base : bsize; + const int b8_w = block_size_wide[plane_bsize] >> ss_x; + const int b8_h = block_size_high[plane_bsize] >> ss_y; #if CONFIG_SDP assert(!is_intrabc_block(mi, xd->tree_type)); #else @@ -831,8 +832,10 @@ // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for // the top-left corner of the prediction source - the correct top-left corner // is at (pre_x, pre_y). - const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0; - const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0; + const int row_start = + plane ? (mi->chroma_ref_info.mi_row_chroma_base - xd->mi_row) : 0; + const int col_start = + plane ? (mi->chroma_ref_info.mi_col_chroma_base - xd->mi_col) : 0; const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x; const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y; @@ -841,6 +844,13 @@ int col = col_start; for (int x = 0; x < b8_w; x += b4_w) { MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col]; +#if CONFIG_EXT_RECUR_PARTITIONS + // TODO(yuec): enabling compound prediction in none sub8x8 mbs in the + // group + bool is_compound = 0; +#else + bool is_compound = has_second_ref(this_mbmi); +#endif // CONFIG_EXT_RECUR_PARTITIONS struct buf_2d *const dst_buf = &pd->dst; uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x; int ref = 0; @@ -858,7 +868,6 @@ }; const MV mv = this_mbmi->mv[ref].as_mv; - InterPredParams inter_pred_params; av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y, pre_x + x, pd->subsampling_x, pd->subsampling_y, @@ -881,9 +890,9 @@ &inter_pred_params, xd, mi_x + x, mi_y + y, ref, mc_buf, calc_subpel_params_func); - ++col; + col += mi_size_wide[bsize]; } - ++row; + row += mi_size_high[bsize]; } } @@ -907,19 +916,17 @@ const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]]; is_global[ref] = is_global_mv_block(mi, wm->wmtype); } -#if CONFIG_SDP - const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y]; -#else - const BLOCK_SIZE bsize = mi->sb_type; -#endif - const int ss_x = pd->subsampling_x; - const int ss_y = pd->subsampling_y; - const int row_start = - (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0; - const int col_start = - (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0; - const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x; - const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y; + + int row_start = 0; + int col_start = 0; + if (!build_for_obmc) { + const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); + const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); + row_start = plane ? (mi->chroma_ref_info.mi_row_chroma_base - mi_row) : 0; + col_start = plane ? (mi->chroma_ref_info.mi_col_chroma_base - mi_col) : 0; + } + const int pre_x = (mi_x + MI_SIZE * col_start) >> pd->subsampling_x; + const int pre_y = (mi_y + MI_SIZE * row_start) >> pd->subsampling_y; for (int ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = @@ -983,13 +990,14 @@ int mi_y, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) { #if CONFIG_SDP - if (is_sub8x8_inter(xd, plane, mi->sb_type[PLANE_TYPE_Y], - is_intrabc_block(mi, xd->tree_type), build_for_obmc)) { -#else - if (is_sub8x8_inter(xd, plane, mi->sb_type, is_intrabc_block(mi), + if (is_sub8x8_inter(xd, mi, plane, is_intrabc_block(mi, xd->tree_type), build_for_obmc)) { +#else + if (is_sub8x8_inter(xd, mi, plane, is_intrabc_block(mi), build_for_obmc)) { #endif +#if !CONFIG_EXT_RECUR_PARTITIONS assert(bw < 8 || bh < 8); +#endif // !CONFIG_EXT_RECUR_PARTITIONS build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf, calc_subpel_params_func); } else { @@ -998,41 +1006,37 @@ calc_subpel_params_func); } } -void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, + +void av1_setup_dst_planes(struct macroblockd_plane *planes, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const int plane_start, const int plane_end) { + const int plane_start, const int plane_end, + const CHROMA_REF_INFO *chr_ref_info) { // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet // the static analysis warnings. for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) { struct macroblockd_plane *const pd = &planes[i]; const int is_uv = i > 0; - setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv], + setup_pred_plane(&pd->dst, src->buffers[i], src->crop_widths[is_uv], src->crop_heights[is_uv], src->strides[is_uv], mi_row, - mi_col, NULL, pd->subsampling_x, pd->subsampling_y); + mi_col, NULL, pd->subsampling_x, pd->subsampling_y, + chr_ref_info); } } void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf, - const int num_planes) { + const struct scale_factors *sf, const int num_planes, + const CHROMA_REF_INFO *chr_ref_info) { if (src != NULL) { // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet // the static analysis warnings. for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; const int is_uv = i > 0; -#if CONFIG_SDP - setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type[PLANE_TYPE_Y], - src->buffers[i], src->crop_widths[is_uv], + setup_pred_plane(&pd->pre[idx], src->buffers[i], src->crop_widths[is_uv], src->crop_heights[is_uv], src->strides[is_uv], mi_row, - mi_col, sf, pd->subsampling_x, pd->subsampling_y); -#else - setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i], - src->crop_widths[is_uv], src->crop_heights[is_uv], - src->strides[is_uv], mi_row, mi_col, sf, - pd->subsampling_x, pd->subsampling_y); -#endif + mi_col, sf, pd->subsampling_x, pd->subsampling_y, + chr_ref_info); } } } @@ -1093,9 +1097,12 @@ mbmi->overlappable_neighbors[0] = 0; mbmi->overlappable_neighbors[1] = 0; #if CONFIG_SDP - if (!is_motion_variation_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y])) return; + if (!is_motion_variation_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y], + xd->mi_row, xd->mi_col)) + return; #else - if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return; + if (!is_motion_variation_allowed_bsize(mbmi->sb_type, xd->mi_row, xd->mi_col)) + return; #endif foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr, @@ -1111,8 +1118,6 @@ int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd, int dir) { - assert(is_motion_variation_allowed_bsize(bsize)); - const BLOCK_SIZE bsize_plane = get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); switch (bsize_plane) { @@ -1274,21 +1279,15 @@ MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt, const int num_planes) { -#if CONFIG_SDP - const BLOCK_SIZE a_bsize = - AOMMAX(BLOCK_8X8, above_mbmi->sb_type[PLANE_TYPE_Y]); -#else - const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type); -#endif const int above_mi_col = xd->mi_col + rel_mi_col; av1_modify_neighbor_predictor_for_obmc(above_mbmi); for (int j = 0; j < num_planes; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; - setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], + setup_pred_plane(&pd->dst, ctxt->tmp_buf[j], ctxt->tmp_width[j], ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col, - NULL, pd->subsampling_x, pd->subsampling_y); + NULL, pd->subsampling_x, pd->subsampling_y, NULL); } const int num_refs = 1 + has_second_ref(above_mbmi); @@ -1304,7 +1303,7 @@ aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf, - num_planes); + num_planes, NULL); } xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col); @@ -1318,21 +1317,15 @@ MB_MODE_INFO *left_mbmi, struct build_prediction_ctxt *ctxt, const int num_planes) { -#if CONFIG_SDP - const BLOCK_SIZE l_bsize = - AOMMAX(BLOCK_8X8, left_mbmi->sb_type[PLANE_TYPE_Y]); -#else - const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type); -#endif const int left_mi_row = xd->mi_row + rel_mi_row; av1_modify_neighbor_predictor_for_obmc(left_mbmi); for (int j = 0; j < num_planes; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; - setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], + setup_pred_plane(&pd->dst, ctxt->tmp_buf[j], ctxt->tmp_width[j], ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0, - NULL, pd->subsampling_x, pd->subsampling_y); + NULL, pd->subsampling_x, pd->subsampling_y, NULL); } const int num_refs = 1 + has_second_ref(left_mbmi); @@ -1349,7 +1342,7 @@ aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col, - ref_scale_factors, num_planes); + ref_scale_factors, num_planes, NULL); } xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row)); @@ -1412,15 +1405,27 @@ bd); } +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, + MACROBLOCKD *xd, int plane, + const BUFFER_SET *ctx, + uint8_t *dst, int dst_stride) { +#else void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, const BUFFER_SET *ctx, uint8_t *dst, int dst_stride) { +#endif // CONFIG_EXT_RECUR_PARTITIONS struct macroblockd_plane *const pd = &xd->plane[plane]; const int ssx = xd->plane[plane].subsampling_x; const int ssy = xd->plane[plane].subsampling_y; +#if CONFIG_EXT_RECUR_PARTITIONS + BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, xd->mi[0], plane, ssx, ssy); +#else BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); +#endif // CONFIG_EXT_RECUR_PARTITIONS PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode]; assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0); assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0); @@ -1446,7 +1451,15 @@ const uint8_t *intra_pred, int intra_stride) { const int ssx = xd->plane[plane].subsampling_x; const int ssy = xd->plane[plane].subsampling_y; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, xd->mi[0], plane, ssx, ssy); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == get_plane_block_size(bsize, ssx, ssy)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#else + BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (is_cur_buf_hbd(xd)) { combine_interintra_highbd( @@ -1472,15 +1485,25 @@ assert(bsize < BLOCK_SIZES_ALL); if (is_cur_buf_hbd(xd)) { DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra( + cm, xd, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); +#else av1_build_intra_predictors_for_interintra( cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_combine_interintra(xd, bsize, plane, pred, stride, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); } else { DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, plane, ctx, + intrapredictor, MAX_SB_SIZE); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx, intrapredictor, MAX_SB_SIZE); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor, MAX_SB_SIZE); }
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h index 6e6ab2c..323940f 100644 --- a/av1/common/reconinter.h +++ b/av1/common/reconinter.h
@@ -298,16 +298,21 @@ return (int64_t)y * stride + x; } -static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize, - uint8_t *src, int width, int height, - int stride, int mi_row, int mi_col, +static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src, int width, + int height, int stride, int mi_row, + int mi_col, const struct scale_factors *scale, - int subsampling_x, int subsampling_y) { + int subsampling_x, int subsampling_y, + const CHROMA_REF_INFO *chr_ref_info) { // Offset the buffer pointer - if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) - mi_row -= 1; - if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) - mi_col -= 1; + /* if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) + mi_row -= 1; + if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) + mi_col -= 1;*/ + if (chr_ref_info && (subsampling_x || subsampling_y)) { + mi_row = chr_ref_info->mi_row_chroma_base; + mi_col = chr_ref_info->mi_col_chroma_base; + } const int x = (MI_SIZE * mi_col) >> subsampling_x; const int y = (MI_SIZE * mi_row) >> subsampling_y; @@ -318,13 +323,15 @@ dst->stride = stride; } -void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, +void av1_setup_dst_planes(struct macroblockd_plane *planes, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const int plane_start, const int plane_end); + const int plane_start, const int plane_end, + const CHROMA_REF_INFO *chr_ref_info); void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf, const int num_planes); + const struct scale_factors *sf, const int num_planes, + const CHROMA_REF_INFO *chr_ref_info); static INLINE void set_default_interp_filters( MB_MODE_INFO *const mbmi, InterpFilter frame_interp_filter) { @@ -393,11 +400,18 @@ const BUFFER_SET *ctx, int plane, BLOCK_SIZE bsize); +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, + MACROBLOCKD *xd, int plane, + const BUFFER_SET *ctx, + uint8_t *dst, int dst_stride); +#else void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, const BUFFER_SET *ctx, uint8_t *dst, int dst_stride); +#endif // CONFIG_EXT_RECUR_PARTITIONS void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, const uint8_t *inter_pred, int inter_stride,
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c index 73197b8..36f4288 100644 --- a/av1/common/reconintra.c +++ b/av1/common/reconintra.c
@@ -65,6 +65,97 @@ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH }; +#if CONFIG_EXT_RECUR_PARTITIONS +static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int top_available, int right_available, TX_SIZE txsz, + int row_off, int col_off, int ss_x, int ss_y, + int px_to_right_edge, int *px_top_right, + int is_bsize_altered_for_chroma) { + if (!top_available || !right_available) return 0; + + const int bw_unit = mi_size_wide[bsize]; + const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1); + const int top_right_count_unit = tx_size_wide_unit[txsz]; + const int px_tr_common = AOMMIN(tx_size_wide[txsz], px_to_right_edge); + + if (px_tr_common <= 0) return 0; + + *px_top_right = px_tr_common; + + if (row_off > 0) { // Just need to check if enough pixels on the right. + if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) { + // Special case: For 128x128 blocks, the transform unit whose + // top-right corner is at the center of the block does in fact have + // pixels available at its top-right corner. + if (row_off == mi_size_high[BLOCK_64X64] >> ss_y && + col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) { + return 1; + } + const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x; + const int col_off_64 = col_off % plane_bw_unit_64; + return col_off_64 + top_right_count_unit < plane_bw_unit_64; + } + return col_off + top_right_count_unit < plane_bw_unit; + } else { + // All top-right pixels are in the block above, which is already available. + if (col_off + top_right_count_unit < plane_bw_unit) return 1; + + // Handle the top-right intra tx block of the coding block + const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; + const int mi_row_aligned = + is_bsize_altered_for_chroma + ? xd->mi[0]->chroma_ref_info.mi_row_chroma_base + : mi_row; + const int mi_col_aligned = + is_bsize_altered_for_chroma + ? xd->mi[0]->chroma_ref_info.mi_col_chroma_base + : mi_col; + const int tr_mask_row = (mi_row_aligned & (sb_mi_size - 1)) - 1; + const int tr_mask_col = + (mi_col_aligned & (sb_mi_size - 1)) + mi_size_wide[bsize]; + + if (tr_mask_row < 0) { + return 1; + } else if (tr_mask_col >= sb_mi_size) { + return 0; + } else { // Handle the general case: the top_right mi is in the same SB + const int tr_offset = tr_mask_row * xd->is_mi_coded_stride + tr_mask_col; + // As long as the first mi is available, we determine tr is available +#if CONFIG_SDP + int has_tr = xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)][tr_offset]; +#else + int has_tr = xd->is_mi_coded[tr_offset]; +#endif // CONFIG_SDP + + // Calculate px_top_right: how many top-right pixels are available. If it + // is less than tx_size_wide[txsz], px_top_right will be used to + // determine the location of the last available pixel, which will be used + // for padding. + if (has_tr) { + int mi_tr = 0; + for (int i = 0; i < top_right_count_unit << ss_x; ++i) { + if ((tr_mask_col + i) >= sb_mi_size || +#if CONFIG_SDP + !xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)][tr_offset + i] +#else + !xd->is_mi_coded[tr_offset + i] +#endif // CONFIG_SDP + ) { + break; + } else { + mi_tr++; + } + } + + *px_top_right = AOMMIN((mi_tr << MI_SIZE_LOG2) >> ss_x, px_tr_common); + } + + return has_tr; + } + } +} +#else // Tables to store if the top-right reference pixels are available. The flags // are represented with bits, packed into 8-bit integers. E.g., for the 32x32 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster @@ -260,142 +351,22 @@ return (has_tr_table[idx1] >> idx2) & 1; } } +#endif -// Similar to the has_tr_* tables, but store if the bottom-left reference -// pixels are available. -static uint8_t has_bl_4x4[128] = { - 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, - 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17, - 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, - 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, - 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, - 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, - 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0, -}; -static uint8_t has_bl_4x8[64] = { - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0, - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0, - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0, - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0, -}; -static uint8_t has_bl_8x4[64] = { - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1, - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0, - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1, - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0, -}; -static uint8_t has_bl_8x8[32] = { - 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0, - 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0, -}; -static uint8_t has_bl_8x16[16] = { - 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0, -}; -static uint8_t has_bl_16x8[16] = { - 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0, -}; -static uint8_t has_bl_16x16[8] = { - 84, 16, 84, 0, 84, 16, 84, 0, -}; -static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 }; -static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 }; -static uint8_t has_bl_32x32[2] = { 4, 4 }; -static uint8_t has_bl_32x64[1] = { 0 }; -static uint8_t has_bl_64x32[1] = { 34 }; -static uint8_t has_bl_64x64[1] = { 0 }; -static uint8_t has_bl_64x128[1] = { 0 }; -static uint8_t has_bl_128x64[1] = { 0 }; -static uint8_t has_bl_128x128[1] = { 0 }; -static uint8_t has_bl_4x16[32] = { - 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, -}; -static uint8_t has_bl_16x4[32] = { - 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0, - 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0, -}; -static uint8_t has_bl_8x32[8] = { - 0, 1, 0, 0, 0, 1, 0, 0, -}; -static uint8_t has_bl_32x8[8] = { - 238, 78, 238, 14, 238, 78, 238, 14, -}; -static uint8_t has_bl_16x64[2] = { 0, 0 }; -static uint8_t has_bl_64x16[2] = { 42, 42 }; - -static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = { - // 4X4 - has_bl_4x4, - // 4X8, 8X4, 8X8 - has_bl_4x8, has_bl_8x4, has_bl_8x8, - // 8X16, 16X8, 16X16 - has_bl_8x16, has_bl_16x8, has_bl_16x16, - // 16X32, 32X16, 32X32 - has_bl_16x32, has_bl_32x16, has_bl_32x32, - // 32X64, 64X32, 64X64 - has_bl_32x64, has_bl_64x32, has_bl_64x64, - // 64x128, 128x64, 128x128 - has_bl_64x128, has_bl_128x64, has_bl_128x128, - // 4x16, 16x4, 8x32 - has_bl_4x16, has_bl_16x4, has_bl_8x32, - // 32x8, 16x64, 64x16 - has_bl_32x8, has_bl_16x64, has_bl_64x16 -}; - -static uint8_t has_bl_vert_8x8[32] = { - 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0, - 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0, -}; -static uint8_t has_bl_vert_16x16[8] = { - 254, 16, 254, 0, 254, 16, 254, 0, -}; -static uint8_t has_bl_vert_32x32[2] = { 14, 14 }; -static uint8_t has_bl_vert_64x64[1] = { 2 }; - -// The _vert_* tables are like the ordinary tables above, but describe the -// order we visit square blocks when doing a PARTITION_VERT_A or -// PARTITION_VERT_B. This is the same order as normal except for on the last -// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block -// as a pair of squares, which means that these tables work correctly for both -// mixed vertical partition types. -// -// There are tables for each of the square sizes. Vertical rectangles (like -// BLOCK_16X32) use their respective "non-vert" table -static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = { - // 4X4 - NULL, - // 4X8, 8X4, 8X8 - has_bl_4x8, NULL, has_bl_vert_8x8, - // 8X16, 16X8, 16X16 - has_bl_8x16, NULL, has_bl_vert_16x16, - // 16X32, 32X16, 32X32 - has_bl_16x32, NULL, has_bl_vert_32x32, - // 32X64, 64X32, 64X64 - has_bl_32x64, NULL, has_bl_vert_64x64, - // 64x128, 128x64, 128x128 - has_bl_64x128, NULL, has_bl_128x128 -}; - -static const uint8_t *get_has_bl_table(PARTITION_TYPE partition, - BLOCK_SIZE bsize) { - const uint8_t *ret = NULL; - // If this is a mixed vertical partition, look up bsize in orders_vert. - if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) { - assert(bsize < BLOCK_SIZES); - ret = has_bl_vert_tables[bsize]; - } else { - ret = has_bl_tables[bsize]; - } - assert(ret); - return ret; -} - -static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row, - int mi_col, int bottom_available, int left_available, - PARTITION_TYPE partition, TX_SIZE txsz, int row_off, - int col_off, int ss_x, int ss_y) { +static int has_bottom_left(const AV1_COMMON *cm, const MACROBLOCKD *xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int bottom_available, int left_available, + TX_SIZE txsz, int row_off, int col_off, int ss_x, + int ss_y, int px_to_bottom_edge, int *px_bottom_left, + int is_bsize_altered_for_chroma) { if (!bottom_available || !left_available) return 0; + const int px_bl_common = AOMMIN(tx_size_high[txsz], px_to_bottom_edge); + + if (px_bl_common <= 0) return 0; + + *px_bottom_left = px_bl_common; + // Special case for 128x* blocks, when col_off is half the block width. // This is needed because 128x* superblocks are divided into 64x* blocks in // raster order @@ -425,37 +396,70 @@ // All bottom-left pixels are in the left block, which is already available. if (row_off + bottom_left_count_unit < plane_bh_unit) return 1; - const int bw_in_mi_log2 = mi_size_wide_log2[bsize]; - const int bh_in_mi_log2 = mi_size_high_log2[bsize]; + // The general case: neither the leftmost column nor the bottom row. The + // bottom-left mi is in the same SB const int sb_mi_size = mi_size_high[cm->seq_params.sb_size]; - const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2; - const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2; + const int mi_row_aligned = + is_bsize_altered_for_chroma + ? xd->mi[0]->chroma_ref_info.mi_row_chroma_base + : mi_row; + const int mi_col_aligned = + is_bsize_altered_for_chroma + ? xd->mi[0]->chroma_ref_info.mi_col_chroma_base + : mi_col; + const int bl_mask_row = + (mi_row_aligned & (sb_mi_size - 1)) + mi_size_high[bsize]; + const int bl_mask_col = (mi_col_aligned & (sb_mi_size - 1)) - 1; - // Leftmost column of superblock: so bottom-left pixels maybe in the left - // and/or bottom-left superblocks. But only the left superblock is - // available, so check if all required pixels fall in that superblock. - if (blk_col_in_sb == 0) { - const int blk_start_row_off = - blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >> + if (bl_mask_col < 0) { + const int plane_sb_height = + block_size_high[cm->seq_params.sb_size] >> ss_y; + const int plane_bottom_row = + (((mi_row_aligned & (sb_mi_size - 1)) << MI_SIZE_LOG2) + + block_size_high[bsize]) >> ss_y; - const int row_off_in_sb = blk_start_row_off + row_off; - const int sb_height_unit = sb_mi_size >> ss_y; - return row_off_in_sb + bottom_left_count_unit < sb_height_unit; + *px_bottom_left = + AOMMIN(plane_sb_height - plane_bottom_row, px_bl_common); + + return *px_bottom_left > 0; + } else if (bl_mask_row >= sb_mi_size) { + return 0; + } else { + const int bl_offset = bl_mask_row * xd->is_mi_coded_stride + bl_mask_col; + // As long as there is one bottom-left mi available, we determine bl is + // available +#if CONFIG_SDP + int has_bl = xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)][bl_offset]; +#else + int has_bl = xd->is_mi_coded[bl_offset]; +#endif // CONFIG_SDP + + // Calculate px_bottom_left: how many bottom-left pixels are available. If + // it is less than tx_size_high[txsz], px_bottom_left will be used to + // determine the location of the last available pixel, which will be used + // for padding. + if (has_bl) { + int mi_bl = 0; + for (int i = 0; i < bottom_left_count_unit << ss_y; ++i) { + if ((bl_mask_row + i) >= sb_mi_size || +#if CONFIG_SDP + !xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)] + [bl_offset + i * xd->is_mi_coded_stride] +#else + !xd->is_mi_coded[bl_offset + i * xd->is_mi_coded_stride] +#endif // CONFIG_SDP + ) { + break; + } else { + mi_bl++; + } + } + + *px_bottom_left = AOMMIN((mi_bl << MI_SIZE_LOG2) >> ss_y, px_bl_common); + } + + return has_bl; } - - // Bottom row of superblock (and not the leftmost column): so bottom-left - // pixels fall in the bottom superblock, which is not available yet. - if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0; - - // General case (neither leftmost column nor bottom row): check if the - // bottom-left block is coded before the current block. - const int this_blk_index = - ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) + - blk_col_in_sb + 0; - const int idx1 = this_blk_index / 8; - const int idx2 = this_blk_index % 8; - const uint8_t *has_bl_table = get_has_bl_table(partition, bsize); - return (has_bl_table[idx1] >> idx2) & 1; } } @@ -1838,56 +1842,8 @@ #endif } -static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x, - int subsampling_y) { - assert(subsampling_x >= 0 && subsampling_x < 2); - assert(subsampling_y >= 0 && subsampling_y < 2); - BLOCK_SIZE bs = bsize; - switch (bsize) { - case BLOCK_4X4: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X8; - else if (subsampling_x == 1) - bs = BLOCK_8X4; - else if (subsampling_y == 1) - bs = BLOCK_4X8; - break; - case BLOCK_4X8: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X8; - else if (subsampling_x == 1) - bs = BLOCK_8X8; - else if (subsampling_y == 1) - bs = BLOCK_4X8; - break; - case BLOCK_8X4: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X8; - else if (subsampling_x == 1) - bs = BLOCK_8X4; - else if (subsampling_y == 1) - bs = BLOCK_8X8; - break; - case BLOCK_4X16: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X16; - else if (subsampling_x == 1) - bs = BLOCK_8X16; - else if (subsampling_y == 1) - bs = BLOCK_4X16; - break; - case BLOCK_16X4: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_16X8; - else if (subsampling_x == 1) - bs = BLOCK_16X4; - else if (subsampling_y == 1) - bs = BLOCK_16X8; - break; - default: break; - } - return bs; -} +#define ARITHMETIC_LEFT_SHIFT(x, shift) \ + (((x) >= 0) ? ((x) << (shift)) : (-((-(x)) << (shift)))) void av1_predict_intra_block( const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx, @@ -1940,35 +1896,51 @@ col_off || (ss_x ? xd->chroma_left_available : xd->left_available); const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); +#if CONFIG_SDP + BLOCK_SIZE bsize = mbmi->sb_type[plane > 0]; +#else // CONFIG_SDP + BLOCK_SIZE bsize = mbmi->sb_type; +#endif // CONFIG_SDP + const int mi_wide = mi_size_wide[bsize]; + const int mi_high = mi_size_high[bsize]; // Distance between the right edge of this prediction block to - // the frame right edge - const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx; + // the tile right edge + const int xr = + ARITHMETIC_LEFT_SHIFT(xd->tile.mi_col_end - mi_col - mi_wide, 2 - ss_x) + + wpx - x - txwpx; // Distance between the bottom edge of this prediction block to - // the frame bottom edge - const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx; + // the tile bottom edge + const int yd = + ARITHMETIC_LEFT_SHIFT(xd->tile.mi_row_end - mi_row - mi_high, 2 - ss_y) + + hpx - y - txhpx; const int right_available = mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end; const int bottom_available = (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end); - const PARTITION_TYPE partition = mbmi->partition; -#if CONFIG_SDP - BLOCK_SIZE bsize = mbmi->sb_type[plane > 0]; -#else - BLOCK_SIZE bsize = mbmi->sb_type; -#endif + const BLOCK_SIZE init_bsize = bsize; // force 4x4 chroma component block size. if (ss_x || ss_y) { - bsize = scale_chroma_bsize(bsize, ss_x, ss_y); + bsize = mbmi->chroma_ref_info.bsize_base; } +#if CONFIG_EXT_RECUR_PARTITIONS + int px_top_right = 0; + const int have_top_right = has_top_right( + cm, xd, bsize, mi_row, mi_col, have_top, right_available, tx_size, + row_off, col_off, ss_x, ss_y, xr, &px_top_right, bsize != init_bsize); +#else + const PARTITION_TYPE partition = mbmi->partition; const int have_top_right = has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size, row_off, col_off, ss_x, ss_y); - const int have_bottom_left = - has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left, - partition, tx_size, row_off, col_off, ss_x, ss_y); +#endif + + int px_bottom_left = 0; + const int have_bottom_left = has_bottom_left( + cm, xd, bsize, mi_row, mi_col, bottom_available, have_left, tx_size, + row_off, col_off, ss_x, ss_y, yd, &px_bottom_left, bsize != init_bsize); const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter; @@ -1982,9 +1954,13 @@ xd, ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode, tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0, +#if CONFIG_EXT_RECUR_PARTITIONS + have_top_right ? px_top_right : 0, +#else have_top_right ? AOMMIN(txwpx, xr) : 0, +#endif have_left ? AOMMIN(txhpx, yd + txhpx) : 0, - have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane + have_bottom_left ? px_bottom_left : 0, plane #if CONFIG_MRLS , is_sb_boundary @@ -2001,9 +1977,13 @@ xd, ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode, tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0, +#if CONFIG_EXT_RECUR_PARTITIONS + have_top_right ? px_top_right : 0, +#else have_top_right ? AOMMIN(txwpx, xr) : 0, +#endif have_left ? AOMMIN(txhpx, yd + txhpx) : 0, - have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane + have_bottom_left ? px_bottom_left : 0, plane #if CONFIG_MRLS , is_sb_boundary @@ -2015,6 +1995,8 @@ ); } +#undef ARITHMETIC_LEFT_SHIFT + void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, int blk_col, int blk_row, TX_SIZE tx_size) { @@ -2062,13 +2044,18 @@ if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) { #if CONFIG_DEBUG assert(is_cfl_allowed(xd)); +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, mbmi, plane, pd->subsampling_x, pd->subsampling_y); +#else const BLOCK_SIZE plane_bsize = get_plane_block_size( #if CONFIG_SDP mbmi->sb_type[xd->tree_type == CHROMA_PART], pd->subsampling_x, pd->subsampling_y); #else - mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); -#endif + mbmi->chroma_ref_info.bsize_base, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_SDP +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS (void)plane_bsize; assert(plane_bsize < BLOCK_SIZES_ALL); if (!xd->lossless[mbmi->segment_id]) { @@ -2102,8 +2089,7 @@ if (xd->tree_type == CHROMA_PART) { const int luma_tx_size = av1_get_max_uv_txsize(mbmi->sb_type[PLANE_TYPE_UV], 0, 0); - cfl_store_tx(xd, blk_row, blk_col, luma_tx_size, - mbmi->sb_type[PLANE_TYPE_UV]); + cfl_store_tx(xd, blk_row, blk_col, luma_tx_size); } #endif cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h index a0e6e9a..0ccac73 100644 --- a/av1/common/reconintra.h +++ b/av1/common/reconintra.h
@@ -68,6 +68,7 @@ return mode >= V_PRED && mode <= D67_PRED; } +// TODO(any): Verify the correct behavior when we have BLOCK_4X16 static INLINE int av1_use_angle_delta(BLOCK_SIZE bsize) { return bsize >= BLOCK_8X8; }
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c index f3c8795..88615c4 100644 --- a/av1/common/thread_common.c +++ b/av1/common/thread_common.c
@@ -287,8 +287,8 @@ mi_col += MAX_MIB_SIZE) { c = mi_col >> MAX_MIB_SIZE_LOG2; - av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer, - mi_row, mi_col, plane, plane + 1); + av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row, mi_col); @@ -307,8 +307,8 @@ // completed sync_read(lf_sync, r + 1, c, plane); - av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer, - mi_row, mi_col, plane, plane + 1); + av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row, mi_col); } @@ -354,8 +354,8 @@ mi_col += MI_SIZE_64X64) { c = mi_col >> MIN_MIB_SIZE_LOG2; - av1_setup_dst_planes(planes, BLOCK_64X64, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_bitmask_vert(cm, &planes[plane], plane, mi_row, mi_col); @@ -374,8 +374,8 @@ // completed sync_read(lf_sync, r + 1, c, plane); - av1_setup_dst_planes(planes, BLOCK_64X64, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col, plane, + plane + 1, NULL); av1_filter_block_plane_bitmask_horz(cm, &planes[plane], plane, mi_row, mi_col); } @@ -510,8 +510,7 @@ // TODO(chengchen): can we remove this? struct macroblockd_plane *pd = xd->plane; - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame, 0, 0, plane, - plane + 1); + av1_setup_dst_planes(pd, frame, 0, 0, plane, plane + 1, NULL); av1_build_bitmask_vert_info(cm, &pd[plane], plane); av1_build_bitmask_horz_info(cm, &pd[plane], plane);
diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c index a3a8857..e7cbb48 100644 --- a/av1/common/x86/convolve_2d_sse2.c +++ b/av1/common/x86/convolve_2d_sse2.c
@@ -280,7 +280,7 @@ const __m128i res_8 = _mm_packus_epi16(round_result_lo, round_result_hi); - _mm_store_si128((__m128i *)(&dst0[j]), res_8); + _mm_storeu_si128((__m128i *)(&dst0[j]), res_8); } else { _mm_store_si128((__m128i *)(&dst[j]), res_unsigned_lo); _mm_store_si128((__m128i *)(&dst[j + 8]), res_unsigned_hi);
diff --git a/av1/common/x86/highbd_convolve_2d_sse4.c b/av1/common/x86/highbd_convolve_2d_sse4.c index 185bdd6..122bf73 100644 --- a/av1/common/x86/highbd_convolve_2d_sse4.c +++ b/av1/common/x86/highbd_convolve_2d_sse4.c
@@ -90,7 +90,7 @@ _mm_packus_epi32(round_result_lo, round_result_hi); const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); } else { const __m128i res_unsigned_16b = _mm_adds_epu16(res, offset_const_16b); @@ -402,7 +402,7 @@ _mm_packus_epi32(round_result_lo, round_result_hi); const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); } else { const __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi);
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c index 7c32b49..876b9e6 100644 --- a/av1/common/x86/highbd_inv_txfm_sse4.c +++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -1303,14 +1303,14 @@ round_shift_8x8(in, shift); - v0 = _mm_load_si128((__m128i const *)(output + 0 * stride)); - v1 = _mm_load_si128((__m128i const *)(output + 1 * stride)); - v2 = _mm_load_si128((__m128i const *)(output + 2 * stride)); - v3 = _mm_load_si128((__m128i const *)(output + 3 * stride)); - v4 = _mm_load_si128((__m128i const *)(output + 4 * stride)); - v5 = _mm_load_si128((__m128i const *)(output + 5 * stride)); - v6 = _mm_load_si128((__m128i const *)(output + 6 * stride)); - v7 = _mm_load_si128((__m128i const *)(output + 7 * stride)); + v0 = _mm_loadu_si128((__m128i const *)(output + 0 * stride)); + v1 = _mm_loadu_si128((__m128i const *)(output + 1 * stride)); + v2 = _mm_loadu_si128((__m128i const *)(output + 2 * stride)); + v3 = _mm_loadu_si128((__m128i const *)(output + 3 * stride)); + v4 = _mm_loadu_si128((__m128i const *)(output + 4 * stride)); + v5 = _mm_loadu_si128((__m128i const *)(output + 5 * stride)); + v6 = _mm_loadu_si128((__m128i const *)(output + 6 * stride)); + v7 = _mm_loadu_si128((__m128i const *)(output + 7 * stride)); if (flipud) { u0 = get_recon_8x8(v0, in[14], in[15], fliplr, bd); @@ -1332,14 +1332,14 @@ u7 = get_recon_8x8(v7, in[14], in[15], fliplr, bd); } - _mm_store_si128((__m128i *)(output + 0 * stride), u0); - _mm_store_si128((__m128i *)(output + 1 * stride), u1); - _mm_store_si128((__m128i *)(output + 2 * stride), u2); - _mm_store_si128((__m128i *)(output + 3 * stride), u3); - _mm_store_si128((__m128i *)(output + 4 * stride), u4); - _mm_store_si128((__m128i *)(output + 5 * stride), u5); - _mm_store_si128((__m128i *)(output + 6 * stride), u6); - _mm_store_si128((__m128i *)(output + 7 * stride), u7); + _mm_storeu_si128((__m128i *)(output + 0 * stride), u0); + _mm_storeu_si128((__m128i *)(output + 1 * stride), u1); + _mm_storeu_si128((__m128i *)(output + 2 * stride), u2); + _mm_storeu_si128((__m128i *)(output + 3 * stride), u3); + _mm_storeu_si128((__m128i *)(output + 4 * stride), u4); + _mm_storeu_si128((__m128i *)(output + 5 * stride), u5); + _mm_storeu_si128((__m128i *)(output + 6 * stride), u6); + _mm_storeu_si128((__m128i *)(output + 7 * stride), u7); } void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output,
diff --git a/av1/common/x86/highbd_jnt_convolve_avx2.c b/av1/common/x86/highbd_jnt_convolve_avx2.c index fa27571..0494c0a 100644 --- a/av1/common/x86/highbd_jnt_convolve_avx2.c +++ b/av1/common/x86/highbd_jnt_convolve_avx2.c
@@ -93,7 +93,8 @@ _mm256_packus_epi32(round_result_lo, round_result_hi); const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd); - _mm256_store_si256((__m256i *)(&dst0[i * dst_stride0 + j]), res_clip); + _mm256_storeu_si256((__m256i *)(&dst0[i * dst_stride0 + j]), + res_clip); } else { const __m256i res_unsigned_16b = _mm256_adds_epu16(res, offset_const_16b); @@ -200,8 +201,8 @@ const __m128i res_0 = _mm256_castsi256_si128(res_clip); const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); + _mm_storeu_si128( (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); } else { const __m256i res_unsigned_16b = @@ -209,9 +210,9 @@ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b); const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), + res_1); } } } @@ -425,8 +426,8 @@ const __m128i res_0 = _mm256_castsi256_si128(res_clip); const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); + _mm_storeu_si128( (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); } else { __m256i res_16b = @@ -434,9 +435,9 @@ const __m128i res_0 = _mm256_castsi256_si128(res_16b); const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), + res_1); } } @@ -598,18 +599,18 @@ const __m128i res_0 = _mm256_castsi256_si128(res_clip); const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), - res_1); + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); + _mm_storeu_si128( + (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); } else { __m256i res_16b = _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi); const __m128i res_0 = _mm256_castsi256_si128(res_16b); const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), + res_1); } } } @@ -809,8 +810,8 @@ const __m128i res_0 = _mm256_castsi256_si128(res_clip); const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); + _mm_storeu_si128( (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); } else { __m256i res_16b = @@ -818,9 +819,9 @@ const __m128i res_0 = _mm256_castsi256_si128(res_16b); const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), + res_1); } } s[0] = s[1];
diff --git a/av1/common/x86/highbd_jnt_convolve_sse4.c b/av1/common/x86/highbd_jnt_convolve_sse4.c index 00ff22f..7c0f90f 100644 --- a/av1/common/x86/highbd_jnt_convolve_sse4.c +++ b/av1/common/x86/highbd_jnt_convolve_sse4.c
@@ -217,9 +217,9 @@ const __m128i res_clip_1 = _mm_min_epi16(res_16b_1, clip_pixel_to_bd); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), - res_clip_0); - _mm_store_si128( + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), + res_clip_0); + _mm_storeu_si128( (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_clip_1); } else { @@ -227,9 +227,9 @@ _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_hi_0); __m128i res_16bit1 = _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_hi_1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16bit0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_16bit1); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j]), res_16bit0); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), + res_16bit1); } } s[0] = s[1]; @@ -366,10 +366,10 @@ const __m128i res_16b = _mm_packus_epi32(round_result_lo, round_result_hi); const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); } else { __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b); + _mm_storeu_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b); } } }
diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c index c42ebc1..586b3e4 100644 --- a/av1/common/x86/jnt_convolve_avx2.c +++ b/av1/common/x86/jnt_convolve_avx2.c
@@ -369,8 +369,8 @@ const __m128i res_0 = _mm256_castsi256_si128(res_8); const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); + _mm_storeu_si128( (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); } else { @@ -548,8 +548,8 @@ const __m128i res_0 = _mm256_castsi256_si128(res_8); const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); + _mm_storeu_si128( (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); } else { @@ -840,8 +840,8 @@ const __m256i res_8 = _mm256_packus_epi16(round_result, round_result); const __m256i res_0 = _mm256_permute4x64_epi64(res_8, 0xD8); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), - _mm256_castsi256_si128(res_0)); + _mm_storeu_si128((__m128i *)(&dst0[i * dst_stride0 + j]), + _mm256_castsi256_si128(res_0)); } else { _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]), res_unsigned);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index 5e25082..b43e0a9 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c
@@ -12,6 +12,7 @@ #include <assert.h> #include <stddef.h> +#include "av1/common/blockd.h" #include "config/aom_config.h" #include "config/aom_dsp_rtcd.h" #include "config/aom_scale_rtcd.h" @@ -253,11 +254,7 @@ #else if (plane == AOM_PLANE_Y && store_cfl_required(cm, xd)) { #endif -#if CONFIG_SDP - cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type[AOM_PLANE_Y]); -#else - cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type); -#endif + cfl_store_tx(xd, row, col, tx_size); } } @@ -307,7 +304,14 @@ DecoderCodingBlock *const dcb = &td->dcb; MACROBLOCKD *const xd = &dcb->xd; const struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + const BLOCK_SIZE bsize_base = get_bsize_base(xd, mbmi, plane); + const TX_SIZE plane_tx_size = + plane ? av1_get_max_uv_txsize(bsize_base, pd->subsampling_x, + pd->subsampling_y) + : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, + blk_col)]; +#elif CONFIG_SDP if (xd->tree_type == SHARED_PART) assert(mbmi->sb_type[PLANE_TYPE_Y] == mbmi->sb_type[PLANE_TYPE_UV]); const TX_SIZE plane_tx_size = @@ -317,8 +321,8 @@ blk_col)]; #else const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y) + plane ? av1_get_max_uv_txsize(mbmi->chroma_ref_info.bsize_base, + pd->subsampling_x, pd->subsampling_y) : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, blk_col)]; #endif @@ -393,7 +397,8 @@ static AOM_INLINE void set_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd, BLOCK_SIZE bsize, int mi_row, int mi_col, - int bw, int bh, int x_mis, int y_mis) { + int bw, int bh, int x_mis, int y_mis, + PARTITION_TREE *parent, int index) { const int num_planes = av1_num_planes(cm); const CommonModeInfoParams *const mi_params = &cm->mi_params; const TileInfo *const tile = &xd->tile; @@ -423,25 +428,31 @@ } #endif - set_plane_n4(xd, bw, bh, num_planes); - set_entropy_context(xd, mi_row, mi_col, num_planes); + CHROMA_REF_INFO *chr_ref_info = &xd->mi[0]->chroma_ref_info; + set_chroma_ref_info(mi_row, mi_col, index, bsize, chr_ref_info, + parent ? &parent->chroma_ref_info : NULL, + parent ? parent->bsize : BLOCK_INVALID, + parent ? parent->partition : PARTITION_NONE, + xd->plane[1].subsampling_x, xd->plane[1].subsampling_y); + set_plane_n4(xd, bw, bh, num_planes, chr_ref_info); + set_entropy_context(xd, mi_row, mi_col, num_planes, chr_ref_info); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, mi_params->mi_rows, - mi_params->mi_cols); + mi_params->mi_cols, chr_ref_info); - av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0, - num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, chr_ref_info); } static AOM_INLINE void decode_mbmi_block(AV1Decoder *const pbi, DecoderCodingBlock *dcb, int mi_row, int mi_col, aom_reader *r, PARTITION_TYPE partition, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, + PARTITION_TREE *parent, int index) { AV1_COMMON *const cm = &pbi->common; - const SequenceHeader *const seq_params = &cm->seq_params; const int bw = mi_size_wide[bsize]; const int bh = mi_size_high[bsize]; const int x_mis = AOMMIN(bw, cm->mi_params.mi_cols - mi_col); @@ -451,18 +462,27 @@ #if CONFIG_ACCOUNTING aom_accounting_set_context(&pbi->accounting, mi_col, mi_row); #endif - set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis); + set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, parent, + index); xd->mi[0]->partition = partition; av1_read_mode_info(pbi, dcb, r, x_mis, y_mis); - if (bsize >= BLOCK_8X8 && - (seq_params->subsampling_x || seq_params->subsampling_y)) { - const BLOCK_SIZE uv_subsize = - ss_size_lookup[bsize][seq_params->subsampling_x] - [seq_params->subsampling_y]; - if (uv_subsize == BLOCK_INVALID) + +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + if (xd->tree_type != LUMA_PART) { +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const struct macroblockd_plane *const pd_u = &xd->plane[1]; + const BLOCK_SIZE chroma_bsize_base = xd->mi[0]->chroma_ref_info.bsize_base; + assert(chroma_bsize_base < BLOCK_SIZES_ALL); + if (get_plane_block_size(chroma_bsize_base, pd_u->subsampling_x, + pd_u->subsampling_y) == BLOCK_INVALID) { aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, - "Invalid block size."); + "Block size %dx%d invalid with this subsampling mode", + block_size_wide[chroma_bsize_base], + block_size_high[chroma_bsize_base]); + } +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS } +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS } typedef struct PadBlock { @@ -903,13 +923,8 @@ dst_stride2); const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; -#if CONFIG_SDP - av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type[PLANE_TYPE_Y], - &cm->cur_frame->buf, mi_row, mi_col, 0, num_planes); -#else - av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, &cm->cur_frame->buf, - mi_row, mi_col, 0, num_planes); -#endif + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, &xd->mi[0]->chroma_ref_info); av1_build_obmc_inter_prediction(cm, xd, dst_buf1, dst_stride1, dst_buf2, dst_stride2); } @@ -955,7 +970,8 @@ xd->block_ref_scale_factors[ref] = ref_scale_factors; av1_setup_pre_planes(xd, ref, &ref_buf->buf, mi_row, mi_col, - ref_scale_factors, num_planes); + ref_scale_factors, num_planes, + &mbmi->chroma_ref_info); } } @@ -969,9 +985,7 @@ int pixel_c, pixel_r; mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, pd->subsampling_x, pd->subsampling_y); - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; + if (plane && !xd->is_chroma_ref) continue; mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, cm->current_frame.order_hint, plane, pixel_c, pixel_r, pd->width, pd->height, @@ -1094,8 +1108,16 @@ const struct macroblockd_plane *const pd = &xd->plane[plane]; const int ss_x = pd->subsampling_x; const int ss_y = pd->subsampling_y; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, mbmi, plane, ss_x, ss_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == get_plane_block_size(bsize, ss_x, ss_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#else const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane); const int bh_var_tx = tx_size_high_unit[max_tx_size]; @@ -1127,6 +1149,7 @@ } av1_visit_palette(pbi, xd, r, set_color_index_map_offset); + av1_mark_block_as_coded(xd, bsize, cm->seq_params.sb_size); } static AOM_INLINE void set_inter_tx_size(MB_MODE_INFO *mbmi, int stride_log2, @@ -1411,10 +1434,12 @@ ThreadData *const td, int mi_row, int mi_col, aom_reader *r, PARTITION_TYPE partition, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, + PARTITION_TREE *parent, int index) { DecoderCodingBlock *const dcb = &td->dcb; MACROBLOCKD *const xd = &dcb->xd; - decode_mbmi_block(pbi, dcb, mi_row, mi_col, r, partition, bsize); + decode_mbmi_block(pbi, dcb, mi_row, mi_col, r, partition, bsize, parent, + index); av1_visit_palette(pbi, xd, r, av1_decode_palette_tokens); @@ -1533,11 +1558,12 @@ } } #if CONFIG_SDP + assert(bsize == mbmi->sb_type[av1_get_sdp_idx(xd->tree_type)]); if (mbmi->skip_txfm[xd->tree_type == CHROMA_PART]) - av1_reset_entropy_context(xd, bsize, num_planes); #else - if (mbmi->skip_txfm) av1_reset_entropy_context(xd, bsize, num_planes); -#endif + if (mbmi->skip_txfm) +#endif // CONFIG_SDP + av1_reset_entropy_context(xd, bsize, num_planes); #if CONFIG_SDP decode_token_recon_block(pbi, td, r, partition, bsize); #else @@ -1587,10 +1613,9 @@ #endif } -static AOM_INLINE void set_offsets_for_pred_and_recon(AV1Decoder *const pbi, - ThreadData *const td, - int mi_row, int mi_col, - BLOCK_SIZE bsize) { +static AOM_INLINE void set_offsets_for_pred_and_recon( + AV1Decoder *const pbi, ThreadData *const td, int mi_row, int mi_col, + BLOCK_SIZE bsize, PARTITION_TREE *parent, int index) { AV1_COMMON *const cm = &pbi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; DecoderCodingBlock *const dcb = &td->dcb; @@ -1607,23 +1632,29 @@ &mi_params->tx_type_map[mi_row * mi_params->mi_stride + mi_col]; xd->tx_type_map_stride = mi_params->mi_stride; - set_plane_n4(xd, bw, bh, num_planes); + CHROMA_REF_INFO *chr_ref_info = &xd->mi[0]->chroma_ref_info; + set_chroma_ref_info(mi_row, mi_col, index, bsize, chr_ref_info, + parent ? &parent->chroma_ref_info : NULL, + parent ? parent->bsize : BLOCK_INVALID, + parent ? parent->partition : PARTITION_NONE, + xd->plane[1].subsampling_x, xd->plane[1].subsampling_y); + set_plane_n4(xd, bw, bh, num_planes, chr_ref_info); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, mi_params->mi_rows, - mi_params->mi_cols); + mi_params->mi_cols, chr_ref_info); - av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0, - num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, chr_ref_info); } static AOM_INLINE void decode_block(AV1Decoder *const pbi, ThreadData *const td, int mi_row, int mi_col, aom_reader *r, - PARTITION_TYPE partition, - BLOCK_SIZE bsize) { + PARTITION_TYPE partition, BLOCK_SIZE bsize, + PARTITION_TREE *parent, int index) { (void)partition; - set_offsets_for_pred_and_recon(pbi, td, mi_row, mi_col, bsize); + set_offsets_for_pred_and_recon(pbi, td, mi_row, mi_col, bsize, parent, index); #if CONFIG_SDP decode_token_recon_block(pbi, td, r, partition, bsize); #else @@ -1635,15 +1666,65 @@ static PARTITION_TYPE read_partition(const AV1_COMMON *const cm, MACROBLOCKD *xd, int mi_row, int mi_col, aom_reader *r, int has_rows, int has_cols, +#if CONFIG_EXT_RECUR_PARTITIONS + PARTITION_TREE *ptree_luma, +#endif // CONFIG_EXT_RECUR_PARTITIONS BLOCK_SIZE bsize) { #else static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, aom_reader *r, int has_rows, int has_cols, BLOCK_SIZE bsize) { #endif +#if CONFIG_EXT_RECUR_PARTITIONS + if (!is_partition_point(bsize)) return PARTITION_NONE; +#endif // CONFIG_EXT_RECUR_PARTITIONS + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; +#if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_SDP + const int plane = xd->tree_type == CHROMA_PART; + if (plane == 1 && bsize == BLOCK_8X8) { + return PARTITION_NONE; + } + const int min_bsize_1d = + AOMMIN(block_size_high[bsize], block_size_wide[bsize]); + if (plane && min_bsize_1d >= SHARED_PART_SIZE) { + const int ssx = cm->seq_params.subsampling_x; + const int ssy = cm->seq_params.subsampling_y; + if (ptree_luma) + return sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ssx, ssy); + } +#endif // CONFIG_SDP + + if (is_square_block(bsize)) { + if (!has_rows && has_cols) return PARTITION_HORZ; + if (has_rows && !has_cols) return PARTITION_VERT; + + assert(ctx >= 0); + if (has_rows && has_cols) { +#if CONFIG_SDP + aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[plane][ctx]; +#else + aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[ctx]; +#endif // CONFIG_SDP + + return (PARTITION_TYPE)aom_read_symbol( + r, partition_cdf, partition_cdf_length(bsize), ACCT_STR); + } else { // !has_rows && !has_cols + aom_cdf_prob cdf[2] = { 16384, AOM_ICDF(CDF_PROB_TOP) }; + return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_VERT + : PARTITION_HORZ; + } + } else { + aom_cdf_prob *partition_rec_cdf = ec_ctx->partition_rec_cdf[ctx]; + const PARTITION_TYPE_REC symbol = (PARTITION_TYPE_REC)aom_read_symbol( + r, partition_rec_cdf, partition_rec_cdf_length(bsize), ACCT_STR); + + return get_partition_from_symbol_rec_block(bsize, symbol); + } +#else // !CONFIG_EXT_RECUR_PARTITIONS if (!has_rows && !has_cols) return PARTITION_SPLIT; #if CONFIG_SDP @@ -1653,12 +1734,12 @@ } int parent_block_width = block_size_wide[bsize]; const CommonModeInfoParams *const mi_params = &cm->mi_params; - if (xd->tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) { + if (plane && parent_block_width >= SHARED_PART_SIZE) { int luma_split_flag = get_luma_split_flag(bsize, mi_params, mi_row, mi_col); // if luma blocks uses smaller blocks, then chroma will also split if (luma_split_flag > 3) return PARTITION_SPLIT; } -#endif +#endif // CONFIG_SDP assert(ctx >= 0); #if CONFIG_SDP @@ -1683,26 +1764,31 @@ assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP)); return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT; } +#endif // CONFIG_EXT_RECUR_PARTITIONS } // TODO(slavarnway): eliminate bsize and subsize in future commits static AOM_INLINE void decode_partition(AV1Decoder *const pbi, ThreadData *const td, int mi_row, int mi_col, aom_reader *reader, - BLOCK_SIZE bsize, + BLOCK_SIZE bsize, PARTITION_TREE *ptree, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + PARTITION_TREE *ptree_luma, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS int parse_decode_flag) { assert(bsize < BLOCK_SIZES_ALL); AV1_COMMON *const cm = &pbi->common; DecoderCodingBlock *const dcb = &td->dcb; MACROBLOCKD *const xd = &dcb->xd; - const int bw = mi_size_wide[bsize]; - const int hbs = bw >> 1; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + const int hbs_w = mi_size_wide[bsize] / 2; + const int hbs_h = mi_size_high[bsize] / 2; + const int qbs_w = mi_size_wide[bsize] / 4; + const int qbs_h = mi_size_high[bsize] / 4; PARTITION_TYPE partition; - BLOCK_SIZE subsize; - const int quarter_step = bw / 4; - BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); - const int has_rows = (mi_row + hbs) < cm->mi_params.mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_params.mi_cols; + const int has_rows = (mi_row + hbs_h) < cm->mi_params.mi_rows; + const int has_cols = (mi_col + hbs_w) < cm->mi_params.mi_cols; if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) return; @@ -1737,24 +1823,63 @@ } } #if CONFIG_SDP - partition = (bsize < BLOCK_8X8) - ? PARTITION_NONE - : read_partition(cm, xd, mi_row, mi_col, reader, has_rows, - has_cols, bsize); -#else - partition = (bsize < BLOCK_8X8) ? PARTITION_NONE - : read_partition(xd, mi_row, mi_col, reader, - has_rows, has_cols, bsize); -#endif - } else { -#if CONFIG_SDP partition = - get_partition(cm, xd->tree_type == CHROMA_PART, mi_row, mi_col, bsize); + !is_partition_point(bsize) + ? PARTITION_NONE + : read_partition(cm, xd, mi_row, mi_col, reader, has_rows, has_cols, +#if CONFIG_EXT_RECUR_PARTITIONS + ptree_luma, +#endif // CONFIG_EXT_RECUR_PARTITIONS + bsize); #else - partition = get_partition(cm, mi_row, mi_col, bsize); + partition = !is_partition_point(bsize) + ? PARTITION_NONE + : read_partition(xd, mi_row, mi_col, reader, has_rows, + has_cols, bsize); #endif + + ptree->partition = partition; + ptree->bsize = bsize; + ptree->mi_row = mi_row; + ptree->mi_col = mi_col; + ptree->is_settled = 1; + PARTITION_TREE *parent = ptree->parent; + set_chroma_ref_info( + mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info, + parent ? &parent->chroma_ref_info : NULL, + parent ? parent->bsize : BLOCK_INVALID, + parent ? parent->partition : PARTITION_NONE, ss_x, ss_y); + + switch (partition) { + case PARTITION_SPLIT: + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2); + ptree->sub_tree[3] = av1_alloc_ptree_node(ptree, 3); + break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ: + case PARTITION_VERT: + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + break; + case PARTITION_HORZ_3: + case PARTITION_VERT_3: + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2); + break; +#endif // CONFIG_EXT_RECUR_PARTITIONS + default: break; + } + } else { + partition = ptree->partition; } - subsize = get_partition_subsize(bsize, partition); +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + const int track_ptree_luma = + ptree_luma ? (partition == ptree_luma->partition) : 0; +#endif // CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); if (subsize == BLOCK_INVALID) { aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, "Partition is invalid for block size %dx%d", @@ -1772,63 +1897,115 @@ #define DEC_BLOCK_STX_ARG #define DEC_BLOCK_EPT_ARG partition, -#define DEC_BLOCK(db_r, db_c, db_subsize) \ - block_visit[parse_decode_flag](pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), \ - reader, DEC_BLOCK_EPT_ARG(db_subsize)) -#define DEC_PARTITION(db_r, db_c, db_subsize) \ +#define DEC_BLOCK(db_r, db_c, db_subsize, index) \ + block_visit[parse_decode_flag](pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), \ + reader, DEC_BLOCK_EPT_ARG(db_subsize), ptree, \ + index) +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +#define DEC_PARTITION(db_r, db_c, db_subsize, index) \ + decode_partition(pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), reader, \ + (db_subsize), ptree->sub_tree[(index)], \ + track_ptree_luma ? ptree_luma->sub_tree[index] : NULL, \ + parse_decode_flag) +#else // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +#define DEC_PARTITION(db_r, db_c, db_subsize, index) \ decode_partition(pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), reader, \ - (db_subsize), parse_decode_flag) + (db_subsize), ptree->sub_tree[(index)], parse_decode_flag) +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + +#if !CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); +#endif // !CONFIG_EXT_RECUR_PARTITIONS switch (partition) { - case PARTITION_NONE: DEC_BLOCK(mi_row, mi_col, subsize); break; + case PARTITION_NONE: DEC_BLOCK(mi_row, mi_col, subsize, 0); break; case PARTITION_HORZ: - DEC_BLOCK(mi_row, mi_col, subsize); - if (has_rows) DEC_BLOCK(mi_row + hbs, mi_col, subsize); +#if CONFIG_EXT_RECUR_PARTITIONS + DEC_PARTITION(mi_row, mi_col, subsize, 0); + if ((mi_row + hbs_h) < cm->mi_params.mi_rows) + DEC_PARTITION(mi_row + hbs_h, mi_col, subsize, 1); +#else + DEC_BLOCK(mi_row, mi_col, subsize, 0); + if (has_rows) DEC_BLOCK(mi_row + hbs_h, mi_col, subsize, 1); +#endif // CONFIG_EXT_RECUR_PARTITIONS break; case PARTITION_VERT: - DEC_BLOCK(mi_row, mi_col, subsize); - if (has_cols) DEC_BLOCK(mi_row, mi_col + hbs, subsize); +#if CONFIG_EXT_RECUR_PARTITIONS + DEC_PARTITION(mi_row, mi_col, subsize, 0); + if ((mi_col + hbs_w) < cm->mi_params.mi_cols) + DEC_PARTITION(mi_row, mi_col + hbs_w, subsize, 1); +#else + DEC_BLOCK(mi_row, mi_col, subsize, 0); + if (has_cols) DEC_BLOCK(mi_row, mi_col + hbs_w, subsize, 1); +#endif // CONFIG_EXT_RECUR_PARTITIONS break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: { + const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ); + int this_mi_row = mi_row; + DEC_PARTITION(this_mi_row, mi_col, subsize, 0); + this_mi_row += qbs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, bsize3, 1); + this_mi_row += 2 * qbs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, subsize, 2); + break; + } + case PARTITION_VERT_3: { + const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT); + int this_mi_col = mi_col; + DEC_PARTITION(mi_row, this_mi_col, subsize, 0); + this_mi_col += qbs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, bsize3, 1); + this_mi_col += 2 * qbs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, subsize, 2); + break; + } +#else case PARTITION_SPLIT: - DEC_PARTITION(mi_row, mi_col, subsize); - DEC_PARTITION(mi_row, mi_col + hbs, subsize); - DEC_PARTITION(mi_row + hbs, mi_col, subsize); - DEC_PARTITION(mi_row + hbs, mi_col + hbs, subsize); + DEC_PARTITION(mi_row, mi_col, subsize, 0); + DEC_PARTITION(mi_row, mi_col + hbs_w, subsize, 1); + DEC_PARTITION(mi_row + hbs_h, mi_col, subsize, 2); + DEC_PARTITION(mi_row + hbs_h, mi_col + hbs_w, subsize, 3); break; case PARTITION_HORZ_A: - DEC_BLOCK(mi_row, mi_col, bsize2); - DEC_BLOCK(mi_row, mi_col + hbs, bsize2); - DEC_BLOCK(mi_row + hbs, mi_col, subsize); + DEC_BLOCK(mi_row, mi_col, bsize2, 0); + DEC_BLOCK(mi_row, mi_col + hbs_w, bsize2, 1); + DEC_BLOCK(mi_row + hbs_h, mi_col, subsize, 2); break; case PARTITION_HORZ_B: - DEC_BLOCK(mi_row, mi_col, subsize); - DEC_BLOCK(mi_row + hbs, mi_col, bsize2); - DEC_BLOCK(mi_row + hbs, mi_col + hbs, bsize2); + DEC_BLOCK(mi_row, mi_col, subsize, 0); + DEC_BLOCK(mi_row + hbs_h, mi_col, bsize2, 1); + DEC_BLOCK(mi_row + hbs_h, mi_col + hbs_w, bsize2, 2); break; case PARTITION_VERT_A: - DEC_BLOCK(mi_row, mi_col, bsize2); - DEC_BLOCK(mi_row + hbs, mi_col, bsize2); - DEC_BLOCK(mi_row, mi_col + hbs, subsize); + DEC_BLOCK(mi_row, mi_col, bsize2, 0); + DEC_BLOCK(mi_row + hbs_h, mi_col, bsize2, 1); + DEC_BLOCK(mi_row, mi_col + hbs_w, subsize, 2); break; case PARTITION_VERT_B: - DEC_BLOCK(mi_row, mi_col, subsize); - DEC_BLOCK(mi_row, mi_col + hbs, bsize2); - DEC_BLOCK(mi_row + hbs, mi_col + hbs, bsize2); + DEC_BLOCK(mi_row, mi_col, subsize, 0); + DEC_BLOCK(mi_row, mi_col + hbs_w, bsize2, 1); + DEC_BLOCK(mi_row + hbs_h, mi_col + hbs_w, bsize2, 2); break; case PARTITION_HORZ_4: for (int i = 0; i < 4; ++i) { - int this_mi_row = mi_row + i * quarter_step; + int this_mi_row = mi_row + i * qbs_h; if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break; - DEC_BLOCK(this_mi_row, mi_col, subsize); + DEC_BLOCK(this_mi_row, mi_col, subsize, i); } break; case PARTITION_VERT_4: for (int i = 0; i < 4; ++i) { - int this_mi_col = mi_col + i * quarter_step; + int this_mi_col = mi_col + i * qbs_w; if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break; - DEC_BLOCK(mi_row, this_mi_col, subsize); + DEC_BLOCK(mi_row, this_mi_col, subsize, i); } break; +#endif // CONFIG_EXT_RECUR_PARTITIONS default: assert(0 && "Invalid partition type"); } @@ -1875,10 +2052,26 @@ ? 2 : 1; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : LUMA_PART); - decode_partition(pbi, td, mi_row, mi_col, reader, bsize, parse_decode_flag); + if (parse_decode_flag & 1) { + av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); + } + decode_partition(pbi, td, mi_row, mi_col, reader, bsize, + td->dcb.xd.sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#if CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_EXT_RECUR_PARTITIONS + parse_decode_flag); if (total_loop_num == 2) { xd->tree_type = CHROMA_PART; - decode_partition(pbi, td, mi_row, mi_col, reader, bsize, parse_decode_flag); + if (parse_decode_flag & 1) { + av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); + } + decode_partition(pbi, td, mi_row, mi_col, reader, bsize, + td->dcb.xd.sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#if CONFIG_EXT_RECUR_PARTITIONS + td->dcb.xd.sbi->ptree_root[0], +#endif // CONFIG_EXT_RECUR_PARTITIONS + parse_decode_flag); xd->tree_type = SHARED_PART; } } @@ -3099,6 +3292,8 @@ for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; mi_col += cm->seq_params.mib_size, sb_col_in_tile++) { + av1_reset_is_mi_coded_map(&td->dcb.xd, cm->seq_params.mib_size); + td->dcb.xd.sbi = av1_get_sb_info(cm, mi_row, mi_col); set_cb_buffer(pbi, &td->dcb, pbi->cb_buffer_base, num_planes, mi_row, mi_col); @@ -3110,7 +3305,7 @@ cm->seq_params.sb_size, 0x2); #else decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, - cm->seq_params.sb_size, 0x2); + cm->seq_params.sb_size, td->dcb.xd.sbi->ptree_root, 0x2); #endif sync_write(&tile_data->dec_row_mt_sync, sb_row_in_tile, sb_col_in_tile, @@ -3186,15 +3381,19 @@ for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; mi_col += cm->seq_params.mib_size) { + av1_reset_is_mi_coded_map(xd, cm->seq_params.mib_size); + av1_set_sb_info(cm, xd, mi_row, mi_col); set_cb_buffer(pbi, dcb, &td->cb_buffer_base, num_planes, 0, 0); #if CONFIG_SDP decode_partition_sb(pbi, td, mi_row, mi_col, td->bit_reader, cm->seq_params.sb_size, 0x3); #else + av1_reset_ptree_in_sbi(xd->sbi); // Bit-stream parsing and decoding of the superblock decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, - cm->seq_params.sb_size, 0x3); + cm->seq_params.sb_size, xd->sbi->ptree_root, 0x3); #endif + if (aom_reader_has_overflowed(td->bit_reader)) { aom_merge_corrupted_flag(&dcb->corrupted, 1); return; @@ -3626,6 +3825,8 @@ for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; mi_col += cm->seq_params.mib_size) { + av1_reset_is_mi_coded_map(xd, cm->seq_params.mib_size); + av1_set_sb_info(cm, xd, mi_row, mi_col); set_cb_buffer(pbi, dcb, pbi->cb_buffer_base, num_planes, mi_row, mi_col); // Bit-stream parsing of the superblock @@ -3633,8 +3834,9 @@ decode_partition_sb(pbi, td, mi_row, mi_col, td->bit_reader, cm->seq_params.sb_size, 0x1); #else + av1_reset_ptree_in_sbi(xd->sbi); decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, - cm->seq_params.sb_size, 0x1); + cm->seq_params.sb_size, xd->sbi->ptree_root, 0x1); #endif if (aom_reader_has_overflowed(td->bit_reader)) { @@ -5805,8 +6007,8 @@ (cm->ccso_info.ccso_enable[0] || cm->ccso_info.ccso_enable[1]); uint16_t *ext_rec_y; if (use_ccso) { - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, - &cm->cur_frame->buf, 0, 0, 0, num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, 0, 0, 0, num_planes, + NULL); const int ccso_stride_ext = xd->plane[0].dst.width + (CCSO_PADDING_SIZE << 1); ext_rec_y =
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index 9509e18..3f11d5d 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c
@@ -38,10 +38,11 @@ static void read_cdef(AV1_COMMON *cm, aom_reader *r, MACROBLOCKD *const xd) { #if CONFIG_SDP - const int skip_txfm = xd->mi[0]->skip_txfm[xd->tree_type == CHROMA_PART]; + assert(xd->tree_type != CHROMA_PART); + const int skip_txfm = xd->mi[0]->skip_txfm[0]; #else const int skip_txfm = xd->mi[0]->skip_txfm; -#endif +#endif // CONFIG_SDP if (cm->features.coded_lossless) return; if (cm->features.allow_intrabc) { assert(cm->cdef_info.cdef_bits == 0); @@ -68,6 +69,27 @@ const int index = (cm->seq_params.sb_size == BLOCK_128X128) ? cdef_unit_col_in_sb + 2 * cdef_unit_row_in_sb : 0; +#if CONFIG_EXT_RECUR_PARTITIONS + int second_index = index; + const int current_grid_idx = + get_mi_grid_idx(&cm->mi_params, xd->mi_row, xd->mi_col); + const MB_MODE_INFO *const current_mbmi = + cm->mi_params.mi_grid_base[current_grid_idx]; +#if CONFIG_SDP + const BLOCK_SIZE current_bsize = current_mbmi->sb_type[0]; +#else + const BLOCK_SIZE current_bsize = current_mbmi->sb_type; +#endif // CONFIG_SDP + const int mi_row_end = xd->mi_row + mi_size_high[current_bsize] - 1; + const int mi_col_end = xd->mi_col + mi_size_wide[current_bsize] - 1; + if (cm->seq_params.sb_size == BLOCK_128X128 && + block_size_wide[current_bsize] != 128 && + block_size_high[current_bsize] != 128) { + const int second_cdef_unit_row_in_sb = ((mi_row_end & index_mask) != 0); + const int second_cdef_unit_col_in_sb = ((mi_col_end & index_mask) != 0); + second_index = second_cdef_unit_col_in_sb + 2 * second_cdef_unit_row_in_sb; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS // Read CDEF strength from the first non-skip coding block in this CDEF unit. if (!xd->cdef_transmitted[index] && !skip_txfm) { @@ -83,6 +105,42 @@ aom_read_literal(r, cm->cdef_info.cdef_bits, ACCT_STR); xd->cdef_transmitted[index] = true; } +#if CONFIG_EXT_RECUR_PARTITIONS + if (!xd->cdef_transmitted[second_index] && !skip_txfm) { + // CDEF strength for this CDEF unit needs to be read into the MB_MODE_INFO + // of the 1st block in this CDEF unit. + const int first_block_mask = ~(cdef_size - 1); + CommonModeInfoParams *const mi_params = &cm->mi_params; + const int grid_idx = + get_mi_grid_idx(mi_params, mi_row_end & first_block_mask, + mi_col_end & first_block_mask); +#if CONFIG_SDP + assert(IMPLIES(!mi_params->mi_grid_base[grid_idx], + xd->tree_type == LUMA_PART)); + if (!mi_params->mi_grid_base[grid_idx]) { + const int mi_alloc_idx = + get_alloc_mi_idx(mi_params, mi_row_end & first_block_mask, + mi_col_end & first_block_mask); + mi_params->mi_grid_base[grid_idx] = &mi_params->mi_alloc[mi_alloc_idx]; + } +#endif + MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx]; + mbmi->cdef_strength = + aom_read_literal(r, cm->cdef_info.cdef_bits, ACCT_STR); + xd->cdef_transmitted[second_index] = true; +#if CONFIG_SDP + for (int x = 0; x < mi_size_wide[current_bsize]; x++) { + for (int y = 0; y < mi_size_high[current_bsize]; y++) { + const int mi_x = xd->mi_col + x; + const int mi_y = xd->mi_row + y; + const int idx = get_alloc_mi_idx(mi_params, mi_y, mi_x); + if (mi_y < mi_params->mi_rows && mi_x < mi_params->mi_cols) + mi_params->mi_alloc[idx].cdef_strength = mbmi->cdef_strength; + } + } +#endif // CONFIG_SDP + } +#endif // CONFIG_EXT_RECUR_PARTITIONS } #if CONFIG_CCSO @@ -1919,12 +1977,14 @@ mbmi->motion_mode = SIMPLE_TRANSLATION; #if CONFIG_SDP - if (is_motion_variation_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y]) && + if (is_motion_variation_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y], xd->mi_row, + xd->mi_col) && !mbmi->skip_mode && !has_second_ref(mbmi)) { #else - if (is_motion_variation_allowed_bsize(mbmi->sb_type) && !mbmi->skip_mode && - !has_second_ref(mbmi)) { -#endif + if (is_motion_variation_allowed_bsize(mbmi->sb_type, xd->mi_row, + xd->mi_col) && + !mbmi->skip_mode && !has_second_ref(mbmi)) { +#endif // CONFIG_SDP mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref); } av1_count_overlappable_neighbors(cm, xd);
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h index 45355ad..7a1f9a4 100644 --- a/av1/decoder/decoder.h +++ b/av1/decoder/decoder.h
@@ -417,7 +417,8 @@ typedef void (*block_visitor_fn_t)(AV1Decoder *const pbi, ThreadData *const td, int mi_row, int mi_col, aom_reader *r, - PARTITION_TYPE partition, BLOCK_SIZE bsize); + PARTITION_TYPE partition, BLOCK_SIZE bsize, + PARTITION_TREE *parent, int index); /*!\endcond */
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c index 2f5c152..f95bc3e 100644 --- a/av1/decoder/decodetxb.c +++ b/av1/decoder/decodetxb.c
@@ -380,14 +380,20 @@ MACROBLOCKD *const xd = &dcb->xd; MB_MODE_INFO *const mbmi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_SDP - const BLOCK_SIZE bsize = mbmi->sb_type[plane > 0]; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, mbmi, plane, pd->subsampling_x, pd->subsampling_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == get_plane_block_size(mbmi->sb_type[plane > 0], + pd->subsampling_x, + pd->subsampling_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS #else const BLOCK_SIZE bsize = mbmi->sb_type; -#endif assert(bsize < BLOCK_SIZES_ALL); const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP TXB_CTX txb_ctx; get_txb_ctx(plane_bsize, tx_size, plane, pd->above_entropy_context + col,
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c index 4a45fc7..824b45f 100644 --- a/av1/encoder/aq_complexity.c +++ b/av1/encoder/aq_complexity.c
@@ -161,7 +161,8 @@ aom_clear_system_state(); low_var_thresh = DEFAULT_LV_THRESH; - av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes, bs); + av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes, + &mb->e_mbd.mi[0]->chroma_ref_info); logvar = av1_log_block_var(cpi, mb, bs); segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index d830a00..fc6d529 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c
@@ -21,6 +21,7 @@ #include "aom_ports/bitops.h" #include "aom_ports/mem_ops.h" #include "aom_ports/system_state.h" +#include "av1/common/blockd.h" #if CONFIG_BITSTREAM_DEBUG #include "aom_util/debug_util.h" #endif // CONFIG_BITSTREAM_DEBUG @@ -515,7 +516,14 @@ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; const struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_SDP +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize_base = get_bsize_base(xd, mbmi, plane); + const TX_SIZE plane_tx_size = + plane ? av1_get_max_uv_txsize(bsize_base, pd->subsampling_x, + pd->subsampling_y) + : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, + blk_col)]; +#elif CONFIG_SDP const TX_SIZE plane_tx_size = plane ? av1_get_max_uv_txsize(mbmi->sb_type[plane > 0], pd->subsampling_x, pd->subsampling_y) @@ -523,8 +531,8 @@ blk_col)]; #else const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y) + plane ? av1_get_max_uv_txsize(mbmi->chroma_ref_info.bsize_base, + pd->subsampling_x, pd->subsampling_y) : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, blk_col)]; #endif @@ -1210,6 +1218,28 @@ const int index = (cm->seq_params.sb_size == BLOCK_128X128) ? cdef_unit_col_in_sb + 2 * cdef_unit_row_in_sb : 0; +#if CONFIG_EXT_RECUR_PARTITIONS + int second_index = index; + const int current_grid_idx = + get_mi_grid_idx(&cm->mi_params, xd->mi_row, xd->mi_col); + const MB_MODE_INFO *const current_mbmi = + cm->mi_params.mi_grid_base[current_grid_idx]; +#if CONFIG_SDP + assert(xd->tree_type != CHROMA_PART); + const BLOCK_SIZE current_bsize = current_mbmi->sb_type[0]; +#else + const BLOCK_SIZE current_bsize = current_mbmi->sb_type; +#endif // CONFIG_SDP + const int mi_row_end = xd->mi_row + mi_size_high[current_bsize] - 1; + const int mi_col_end = xd->mi_col + mi_size_wide[current_bsize] - 1; + if (cm->seq_params.sb_size == BLOCK_128X128 && + block_size_wide[current_bsize] != 128 && + block_size_high[current_bsize] != 128) { + const int second_cdef_unit_row_in_sb = ((mi_row_end & index_mask) != 0); + const int second_cdef_unit_col_in_sb = ((mi_col_end & index_mask) != 0); + second_index = second_cdef_unit_col_in_sb + 2 * second_cdef_unit_row_in_sb; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS // Write CDEF strength to the first non-skip coding block in this CDEF unit. if (!xd->cdef_transmitted[index] && !skip) { @@ -1224,6 +1254,20 @@ aom_write_literal(w, mbmi->cdef_strength, cm->cdef_info.cdef_bits); xd->cdef_transmitted[index] = true; } +#if CONFIG_EXT_RECUR_PARTITIONS + if (!xd->cdef_transmitted[second_index] && !skip) { + // CDEF strength for this CDEF unit needs to be stored in the MB_MODE_INFO + // of the 1st block in this CDEF unit. + const int first_block_mask = ~(cdef_size - 1); + const CommonModeInfoParams *const mi_params = &cm->mi_params; + const int grid_idx = + get_mi_grid_idx(mi_params, mi_row_end & first_block_mask, + mi_col_end & first_block_mask); + const MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx]; + aom_write_literal(w, mbmi->cdef_strength, cm->cdef_info.cdef_bits); + xd->cdef_transmitted[second_index] = true; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS } #if CONFIG_CCSO @@ -1903,16 +1947,21 @@ const int plane) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_SDP - const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y]; -#else - const BLOCK_SIZE bsize = mbmi->sb_type; -#endif - assert(bsize < BLOCK_SIZES_ALL); const int ss_x = pd->subsampling_x; const int ss_y = pd->subsampling_y; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, mbmi, plane, ss_x, ss_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == + get_plane_block_size(mbmi->sb_type[PLANE_TYPE_Y], ss_x, ss_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS assert(plane_bsize < BLOCK_SIZES_ALL); +#else + const BLOCK_SIZE bsize = mbmi->sb_type; + assert(bsize < BLOCK_SIZES_ALL); + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane); const int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; @@ -1943,7 +1992,9 @@ MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = xd->mi[0]; -#if CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + const BLOCK_SIZE bsize = get_bsize_base(xd, mbmi, AOM_PLANE_Y); +#elif CONFIG_SDP const BLOCK_SIZE bsize = mbmi->sb_type[xd->tree_type == CHROMA_PART]; #else const BLOCK_SIZE bsize = mbmi->sb_type; @@ -2042,7 +2093,7 @@ const int bh = mi_size_high[bsize]; const int bw = mi_size_wide[bsize]; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, mi_params->mi_rows, - mi_params->mi_cols); + mi_params->mi_cols, &mbmi->chroma_ref_info); xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col; xd->left_txfm_context = @@ -2138,33 +2189,81 @@ #endif write_tokens_b(cpi, w, tok, tok_end); } + + av1_mark_block_as_coded(xd, bsize, cm->seq_params.sb_size); } static AOM_INLINE void write_partition(const AV1_COMMON *const cm, - const MACROBLOCKD *const xd, int hbs, - int mi_row, int mi_col, PARTITION_TYPE p, - BLOCK_SIZE bsize, aom_writer *w) { - const int is_partition_point = bsize >= BLOCK_8X8; - - if (!is_partition_point) return; + const MACROBLOCKD *const xd, int mi_row, + int mi_col, PARTITION_TYPE p, + BLOCK_SIZE bsize, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + PARTITION_TREE *ptree_luma, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + aom_writer *w) { + if (!is_partition_point(bsize)) return; #if CONFIG_SDP const int plane = xd->tree_type == CHROMA_PART; if (bsize == BLOCK_8X8 && plane > 0) return; -#endif + const int parent_block_width = block_size_wide[bsize]; +#if CONFIG_EXT_RECUR_PARTITIONS + const int min_bsize_1d = AOMMIN(block_size_high[bsize], parent_block_width); + if (xd->tree_type == CHROMA_PART && min_bsize_1d >= SHARED_PART_SIZE) { + const int ssx = cm->seq_params.subsampling_x; + const int ssy = cm->seq_params.subsampling_y; + (void)ssx; + (void)ssy; + if (ptree_luma) { + assert(p == + sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ssx, ssy)); + return; + } + } + (void)ptree_luma; +#endif // CONFIG_EXT_RECUR_PARTITIONS +#endif // CONFIG_SDP - const int has_rows = (mi_row + hbs) < cm->mi_params.mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_params.mi_cols; const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; +#if CONFIG_EXT_RECUR_PARTITIONS + if (is_square_block(bsize)) { +#endif // CONFIG_EXT_RECUR_PARTITIONS + const int hbs_w = mi_size_wide[bsize] / 2; + const int hbs_h = mi_size_high[bsize] / 2; + const int has_rows = (mi_row + hbs_h) < cm->mi_params.mi_rows; + const int has_cols = (mi_col + hbs_w) < cm->mi_params.mi_cols; +#if CONFIG_EXT_RECUR_PARTITIONS + if (has_rows && has_cols) { +#if CONFIG_SDP + aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[plane][ctx]; +#else + aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[ctx]; +#endif // CONFIG_SDP + aom_write_symbol(w, p, partition_cdf, partition_cdf_length(bsize)); + } else if (!has_rows && has_cols) { + assert(p == PARTITION_HORZ); + } else if (has_rows && !has_cols) { + assert(p == PARTITION_VERT); + } else { + assert(p == PARTITION_HORZ || p == PARTITION_VERT); + aom_cdf_prob cdf[2] = { 16384, AOM_ICDF(CDF_PROB_TOP) }; + aom_write_cdf(w, p == PARTITION_VERT, cdf, 2); + } + } else { // 1:2 or 2:1 rectangular blocks + const PARTITION_TYPE_REC symbol = + get_symbol_from_partition_rec_block(bsize, p); + aom_write_symbol(w, symbol, ec_ctx->partition_rec_cdf[ctx], + partition_rec_cdf_length(bsize)); + } +#else // CONFIG_EXT_RECUR_PARTITIONS if (!has_rows && !has_cols) { assert(p == PARTITION_SPLIT); return; } #if CONFIG_SDP - int parent_block_width = block_size_wide[bsize]; const CommonModeInfoParams *const mi_params = &cm->mi_params; if (xd->tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) { int luma_split_flag = get_luma_split_flag(bsize, mi_params, mi_row, mi_col); @@ -2206,25 +2305,27 @@ #endif aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2); } +#endif // CONFIG_EXT_RECUR_PARTITIONS } static AOM_INLINE void write_modes_sb( AV1_COMP *const cpi, const TileInfo *const tile, aom_writer *const w, - const TokenExtra **tok, const TokenExtra *const tok_end, int mi_row, - int mi_col, BLOCK_SIZE bsize) { + const TokenExtra **tok, const TokenExtra *const tok_end, + PARTITION_TREE *ptree, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + PARTITION_TREE *ptree_luma, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + int mi_row, int mi_col, BLOCK_SIZE bsize) { const AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; assert(bsize < BLOCK_SIZES_ALL); - const int hbs = mi_size_wide[bsize] / 2; - const int quarter_step = mi_size_wide[bsize] / 4; - int i; -#if CONFIG_SDP - const PARTITION_TYPE partition = - get_partition(cm, xd->tree_type == CHROMA_PART, mi_row, mi_col, bsize); -#else - const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); -#endif + const int hbs_w = mi_size_wide[bsize] / 2; + const int hbs_h = mi_size_high[bsize] / 2; + const int qbs_w = mi_size_wide[bsize] / 4; + const int qbs_h = mi_size_high[bsize] / 4; + assert(ptree); + const PARTITION_TYPE partition = ptree->partition; const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; @@ -2253,65 +2354,145 @@ } } - write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w); +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + write_partition(cm, xd, mi_row, mi_col, partition, bsize, ptree_luma, w); + const int track_ptree_luma = + ptree_luma ? (partition == ptree_luma->partition) : 0; +#else + write_partition(cm, xd, mi_row, mi_col, partition, bsize, w); +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS switch (partition) { case PARTITION_NONE: write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); break; case PARTITION_HORZ: +#if CONFIG_EXT_RECUR_PARTITIONS + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col, subsize); + if (mi_row + hbs_h < mi_params->mi_rows) { + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + mi_row + hbs_h, mi_col, subsize); + } +#else // CONFIG_EXT_RECUR_PARTITIONS write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - if (mi_row + hbs < mi_params->mi_rows) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); + if (mi_row + hbs_h < mi_params->mi_rows) + write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs_h, mi_col); +#endif // CONFIG_EXT_RECUR_PARTITIONS break; case PARTITION_VERT: +#if CONFIG_EXT_RECUR_PARTITIONS + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col, subsize); + if (mi_col + hbs_w < mi_params->mi_cols) { + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col + hbs_w, subsize); + } +#else // CONFIG_EXT_RECUR_PARTITIONS write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - if (mi_col + hbs < mi_params->mi_cols) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); + if (mi_col + hbs_w < mi_params->mi_cols) + write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs_w); +#endif break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col, subsize); + if (mi_row + qbs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + mi_row + qbs_h, mi_col, + get_partition_subsize(bsize, PARTITION_HORZ)); + if (mi_row + 3 * qbs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, +#endif // CONFIG_SDP + mi_row + 3 * qbs_h, mi_col, subsize); + break; + case PARTITION_VERT_3: + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col, subsize); + if (mi_col + qbs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col + qbs_w, + get_partition_subsize(bsize, PARTITION_VERT)); + if (mi_col + 3 * qbs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, +#endif // CONFIG_SDP + mi_row, mi_col + 3 * qbs_w, subsize); + break; +#else case PARTITION_SPLIT: - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs, - subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], mi_row, + mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], mi_row, + mi_col + hbs_w, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], + mi_row + hbs_h, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3], + mi_row + hbs_h, mi_col + hbs_w, subsize); break; case PARTITION_HORZ_A: write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs_w); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs_h, mi_col); break; case PARTITION_HORZ_B: write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs_h, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs_h, mi_col + hbs_w); break; case PARTITION_VERT_A: write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs_h, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs_w); break; case PARTITION_VERT_B: write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs_w); + write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs_h, mi_col + hbs_w); break; case PARTITION_HORZ_4: - for (i = 0; i < 4; ++i) { - int this_mi_row = mi_row + i * quarter_step; + for (int i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * qbs_h; if (i > 0 && this_mi_row >= mi_params->mi_rows) break; - write_modes_b(cpi, tile, w, tok, tok_end, this_mi_row, mi_col); } break; case PARTITION_VERT_4: - for (i = 0; i < 4; ++i) { - int this_mi_col = mi_col + i * quarter_step; + for (int i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * qbs_w; if (i > 0 && this_mi_col >= mi_params->mi_cols) break; - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, this_mi_col); } break; - default: assert(0); +#endif // CONFIG_EXT_RECUR_PARTITIONS + default: assert(0); break; } // update partition context @@ -2353,6 +2534,8 @@ for (int mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->seq_params.mib_size) { + av1_reset_is_mi_coded_map(xd, cm->seq_params.mib_size); + xd->sbi = av1_get_sb_info(cm, mi_row, mi_col); cpi->td.mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col); #if CONFIG_SDP const int total_loop_num = @@ -2361,17 +2544,26 @@ ? 2 : 1; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : LUMA_PART); -#endif - write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col, - cm->seq_params.sb_size); -#if CONFIG_SDP + write_modes_sb(cpi, tile, w, &tok, tok_end, + xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#if CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_EXT_RECUR_PARTITIONS + mi_row, mi_col, cm->seq_params.sb_size); if (total_loop_num == 2) { xd->tree_type = CHROMA_PART; - write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col, - cm->seq_params.sb_size); + write_modes_sb(cpi, tile, w, &tok, tok_end, + xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#if CONFIG_EXT_RECUR_PARTITIONS + xd->sbi->ptree_root[0], +#endif // CONFIG_EXT_RECUR_PARTITIONS + mi_row, mi_col, cm->seq_params.sb_size); xd->tree_type = SHARED_PART; } -#endif +#else + write_modes_sb(cpi, tile, w, &tok, tok_end, xd->sbi->ptree_root, mi_row, + mi_col, cm->seq_params.sb_size); +#endif // CONFIG_SDP } assert(tok == tok_end); }
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 4bf2617..c18d28d 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -244,11 +244,7 @@ //! \copydoc MB_MODE_INFO_EXT::mode_context int16_t mode_context; //! Offset of current coding block's coeff buffer relative to the sb. -#if CONFIG_SDP int cb_offset[MAX_MB_PLANE]; -#else - int cb_offset; -#endif } MB_MODE_INFO_EXT_FRAME; /*! \brief Txfm search results for a partition @@ -412,6 +408,101 @@ uint8_t *tmp_best_mask_buf; } CompoundTypeRdBuffers; +/*!\cond */ +/*! \brief MV cost types + */ +enum { + MV_COST_ENTROPY, // Use the entropy rate of the mv as the cost + MV_COST_L1_LOWRES, // Use the l1 norm of the mv as the cost (<480p) + MV_COST_L1_MIDRES, // Use the l1 norm of the mv as the cost (>=480p) + MV_COST_L1_HDRES, // Use the l1 norm of the mv as the cost (>=720p) + MV_COST_NONE // Use 0 as as cost irrespective of the current mv +} UENUM1BYTE(MV_COST_TYPE); +/*!\endcond */ + +#if CONFIG_EXT_RECUR_PARTITIONS +/*! \brief max length of start Mv list + */ +#define kSMSMaxStartMVs 1 +/*! \brief Contains data for simple motion + */ +typedef struct SimpleMotionData { + MV mv_ref; /*!< mv reference */ + MV fullmv; /*!< mv full */ + MV submv; /*!< mv subpel */ + unsigned int sse; /*!< sse */ + unsigned int var; /*!< variance */ + int64_t dist; /*!< distortion */ + int rate; /*!< rate */ + int64_t rdcost; /*!< rdcost */ + int valid; /*!< whether valid */ + BLOCK_SIZE bsize; /*!< blocksize */ + int mi_row; /*!< row position in mi units */ + int mi_col; /*!< col position in mi units */ + MV_COST_TYPE mv_cost_type; /*!< mv cost type */ + int sadpb; /*!< sad per bit */ + int errorperbit; /*!< error per bit */ + MV start_mv_list[kSMSMaxStartMVs]; /*!< start mv list */ + int num_start_mvs; /*!< number of start mvs */ + int has_prev_partition; /*!< has previous partition */ + PARTITION_TYPE prev_partition; /*!< previous partition */ + struct PICK_MODE_CONTEXT *mode_cache[1]; /*!< mode cache */ +} SimpleMotionData; + +/*!\cond */ +#define BLOCK_128_COUNT 1 +#define BLOCK_64_COUNT 3 +#define BLOCK_32_COUNT 7 +#define BLOCK_16_COUNT 15 +#define BLOCK_8_COUNT 31 +#define BLOCK_4_COUNT 32 + +#define MAKE_SM_DATA_BUF(width, height) \ + SimpleMotionData \ + b_##width##x##height[BLOCK_##width##_COUNT * BLOCK_##height##_COUNT] +/*!\endcond */ + +/*! \brief Simple motion data buffers + */ +typedef struct SimpleMotionDataBufs { + /*!\cond */ + // Square blocks + MAKE_SM_DATA_BUF(128, 128); + MAKE_SM_DATA_BUF(64, 64); + MAKE_SM_DATA_BUF(32, 32); + MAKE_SM_DATA_BUF(16, 16); + MAKE_SM_DATA_BUF(8, 8); + MAKE_SM_DATA_BUF(4, 4); + + // 1:2 blocks + MAKE_SM_DATA_BUF(64, 128); + MAKE_SM_DATA_BUF(32, 64); + MAKE_SM_DATA_BUF(16, 32); + MAKE_SM_DATA_BUF(8, 16); + MAKE_SM_DATA_BUF(4, 8); + + // 2:1 blocks + MAKE_SM_DATA_BUF(128, 64); + MAKE_SM_DATA_BUF(64, 32); + MAKE_SM_DATA_BUF(32, 16); + MAKE_SM_DATA_BUF(16, 8); + MAKE_SM_DATA_BUF(8, 4); + + // 1:4 blocks + MAKE_SM_DATA_BUF(16, 64); + MAKE_SM_DATA_BUF(8, 32); + MAKE_SM_DATA_BUF(4, 16); + + // 4:1 blocks + MAKE_SM_DATA_BUF(64, 16); + MAKE_SM_DATA_BUF(32, 8); + MAKE_SM_DATA_BUF(16, 4); + /*!\endcond */ +} SimpleMotionDataBufs; + +#undef MAKE_SM_DATA_BUF +#endif // CONFIG_EXT_RECUR_PARTITIONS + /*! \brief Holds some parameters related to partitioning schemes in AV1. */ // TODO(chiyotsai@google.com): Consolidate this with SIMPLE_MOTION_DATA_TREE @@ -606,7 +697,11 @@ [EXT_PARTITION_TYPES]; #else int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES]; -#endif +#endif // CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS + /*! Cost for coding the partition for rectangular blocks. */ + int partition_rec_cost[PARTITION_CONTEXTS_REC][PARTITION_TYPES_REC]; +#endif // CONFIG_EXT_RECUR_PARTITIONS /**@}*/ /***************************************************************************** @@ -937,11 +1032,7 @@ */ CB_COEFF_BUFFER *cb_coef_buff; //! Offset of current coding block's coeff buffer relative to the sb. -#if CONFIG_SDP int cb_offset[MAX_MB_PLANE]; -#else - uint16_t cb_offset; -#endif //! Modified source and masks used for fast OBMC search. OBMCBuffer obmc_buffer; @@ -1206,6 +1297,14 @@ unsigned int source_variance; //! SSE of the current predictor. unsigned int pred_sse[REF_FRAMES]; +#if CONFIG_EXT_RECUR_PARTITIONS + /*! Simple motion search buffers. */ + SimpleMotionDataBufs *sms_bufs; + /*! \brief Determines what encoding decision should be reused. */ + int reuse_inter_mode_cache_type; + /*! \brief The mode to reuse during \ref av1_rd_pick_inter_mode_sb. */ + MB_MODE_INFO *inter_mode_cache; +#endif // CONFIG_EXT_RECUR_PARTITIONS /**@}*/ } MACROBLOCK; #undef SINGLE_REF_MODES @@ -1321,6 +1420,11 @@ return (txb_skip[blk_idx] >> plane) & 1; } +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE int should_reuse_mode(const MACROBLOCK *x, int mode_flag) { + return x->reuse_inter_mode_cache_type & mode_flag; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS /*!\endcond */ #ifdef __cplusplus
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c index f33094d..0162b55 100644 --- a/av1/encoder/compound_type.c +++ b/av1/encoder/compound_type.c
@@ -457,8 +457,12 @@ const int bw = block_size_wide[bsize]; mbmi->interintra_mode = interintra_mode; int rmode = interintra_mode_cost[interintra_mode]; +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, intrapred, bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); model_rd_sb_fn[MODELRD_TYPE_INTERINTRA](cpi, bsize, x, xd, 0, 0, &rate, &dist, &skip_txfm_sb, &skip_sse_sb, NULL, @@ -520,8 +524,13 @@ uint8_t *intrapred = get_buf_by_bd(xd, intrapred_); for (INTERINTRA_MODE mode = 0; mode < INTERINTRA_MODES; ++mode) { mbmi->interintra_mode = mode; +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, intrapred, + bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS int64_t rd = pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); const int rate_overhead = interintra_mode_cost[mode] + @@ -575,8 +584,13 @@ *best_interintra_mode != INTERINTRA_MODES; if (interintra_mode_reuse || *best_interintra_mode != INTERINTRA_MODES - 1) { mbmi->interintra_mode = *best_interintra_mode; +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, intrapred, + bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); } @@ -634,15 +648,25 @@ mbmi->interintra_mode = best_mode; mbmi->interintra_wedge_index = best_wedge_index; if (best_mode != INTERINTRA_MODES - 1) { +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, intrapred, + bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS } } else if (!try_smooth_interintra) { if (*best_interintra_mode == INTERINTRA_MODES) { mbmi->interintra_mode = INTERINTRA_MODES - 1; *best_interintra_mode = INTERINTRA_MODES - 1; +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, intrapred, + bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS // Pick wedge mask based on INTERINTRA_MODES - 1 *best_rd = pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); // Find the best interintra mode for the chosen wedge mask @@ -657,14 +681,24 @@ // Recompute prediction if required if (*best_interintra_mode != INTERINTRA_MODES - 1) { +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, + intrapred, bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS } } else { // Pick wedge mask for the best interintra mode (reused) mbmi->interintra_mode = *best_interintra_mode; +#if CONFIG_EXT_RECUR_PARTITIONS + av1_build_intra_predictors_for_interintra(cm, xd, 0, orig_dst, intrapred, + bw); +#else av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, intrapred, bw); +#endif // CONFIG_EXT_RECUR_PARTITIONS *best_rd = pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); } } else {
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c index 6d07ef2..028fd3d 100644 --- a/av1/encoder/context_tree.c +++ b/av1/encoder/context_tree.c
@@ -11,6 +11,7 @@ #include "av1/encoder/context_tree.h" #include "av1/encoder/encoder.h" +#include "av1/encoder/rd.h" static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 1] = { BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, BLOCK_128X128, @@ -38,6 +39,12 @@ dst_ctx->rd_stats = src_ctx->rd_stats; dst_ctx->rd_mode_is_ready = src_ctx->rd_mode_is_ready; +#if CONFIG_EXT_RECUR_PARTITIONS + for (int i = 0; i < 2; ++i) { + memcpy(dst_ctx->color_index_map[i], src_ctx->color_index_map[i], + sizeof(src_ctx->color_index_map[i][0]) * src_ctx->num_4x4_blk * 16); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS } void av1_setup_shared_coeff_buffer(AV1_COMMON *cm, @@ -64,13 +71,23 @@ } } -PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, BLOCK_SIZE bsize, +PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, PC_TREE *parent, + PARTITION_TYPE parent_partition, int index, + int subsampling_x, int subsampling_y, PC_TREE_SHARED_BUFFERS *shared_bufs) { PICK_MODE_CONTEXT *ctx = NULL; struct aom_internal_error_info error; AOM_CHECK_MEM_ERROR(&error, ctx, aom_calloc(1, sizeof(*ctx))); ctx->rd_mode_is_ready = 0; + ctx->parent = parent; + ctx->index = index; + set_chroma_ref_info(mi_row, mi_col, index, bsize, &ctx->chroma_ref_info, + parent ? &parent->chroma_ref_info : NULL, + parent ? parent->block_size : BLOCK_INVALID, + parent_partition, subsampling_x, subsampling_y); + ctx->mic.chroma_ref_info = ctx->chroma_ref_info; const int num_planes = av1_num_planes(cm); const int num_pix = block_size_wide[bsize] * block_size_high[bsize]; @@ -100,7 +117,7 @@ aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i]))); } } - + av1_invalid_rd_stats(&ctx->rd_stats); return ctx; } @@ -128,30 +145,52 @@ aom_free(ctx); } -PC_TREE *av1_alloc_pc_tree_node(BLOCK_SIZE bsize) { +PC_TREE *av1_alloc_pc_tree_node(int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *parent, + PARTITION_TYPE parent_partition, int index, + int is_last, int subsampling_x, + int subsampling_y) { PC_TREE *pc_tree = NULL; struct aom_internal_error_info error; AOM_CHECK_MEM_ERROR(&error, pc_tree, aom_calloc(1, sizeof(*pc_tree))); + pc_tree->mi_row = mi_row; + pc_tree->mi_col = mi_col; + pc_tree->parent = parent; + pc_tree->index = index; pc_tree->partitioning = PARTITION_NONE; pc_tree->block_size = bsize; - pc_tree->index = 0; + pc_tree->is_last_subblock = is_last; + av1_invalid_rd_stats(&pc_tree->rd_cost); + set_chroma_ref_info(mi_row, mi_col, index, bsize, &pc_tree->chroma_ref_info, + parent ? &parent->chroma_ref_info : NULL, + parent ? parent->block_size : BLOCK_INVALID, + parent_partition, subsampling_x, subsampling_y); pc_tree->none = NULL; for (int i = 0; i < 2; ++i) { pc_tree->horizontal[i] = NULL; pc_tree->vertical[i] = NULL; } +#if CONFIG_EXT_RECUR_PARTITIONS + for (int i = 0; i < 3; ++i) { + pc_tree->horizontal3[i] = NULL; + pc_tree->vertical3[i] = NULL; + } +#else for (int i = 0; i < 3; ++i) { pc_tree->horizontala[i] = NULL; pc_tree->horizontalb[i] = NULL; pc_tree->verticala[i] = NULL; pc_tree->verticalb[i] = NULL; } +#endif // CONFIG_EXT_RECUR_PARTITIONS for (int i = 0; i < 4; ++i) { +#if !CONFIG_EXT_RECUR_PARTITIONS pc_tree->horizontal4[i] = NULL; pc_tree->vertical4[i] = NULL; +#endif // !CONFIG_EXT_RECUR_PARTITIONS pc_tree->split[i] = NULL; } @@ -174,11 +213,38 @@ FREE_PMC_NODE(pc_tree->none); for (int i = 0; i < 2; ++i) { +#if CONFIG_EXT_RECUR_PARTITIONS + if ((!keep_best || (partition != PARTITION_HORZ)) && + pc_tree->horizontal[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->horizontal[i], num_planes, 0, 0); + pc_tree->horizontal[i] = NULL; + } + if ((!keep_best || (partition != PARTITION_VERT)) && + pc_tree->vertical[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->vertical[i], num_planes, 0, 0); + pc_tree->vertical[i] = NULL; + } +#else if (!keep_best || (partition != PARTITION_HORZ)) FREE_PMC_NODE(pc_tree->horizontal[i]); if (!keep_best || (partition != PARTITION_VERT)) FREE_PMC_NODE(pc_tree->vertical[i]); +#endif // CONFIG_EXT_RECUR_PARTITIONS } +#if CONFIG_EXT_RECUR_PARTITIONS + for (int i = 0; i < 3; ++i) { + if ((!keep_best || (partition != PARTITION_HORZ_3)) && + pc_tree->horizontal3[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->horizontal3[i], num_planes, 0, 0); + pc_tree->horizontal3[i] = NULL; + } + if ((!keep_best || (partition != PARTITION_VERT_3)) && + pc_tree->vertical3[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->vertical3[i], num_planes, 0, 0); + pc_tree->vertical3[i] = NULL; + } + } +#else for (int i = 0; i < 3; ++i) { if (!keep_best || (partition != PARTITION_HORZ_A)) FREE_PMC_NODE(pc_tree->horizontala[i]); @@ -195,6 +261,7 @@ if (!keep_best || (partition != PARTITION_VERT_4)) FREE_PMC_NODE(pc_tree->vertical4[i]); } +#endif // CONFIG_EXT_RECUR_PARTITIONS if (!keep_best || (partition != PARTITION_SPLIT)) { for (int i = 0; i < 4; ++i) { @@ -208,6 +275,144 @@ if (!keep_best && !keep_none) aom_free(pc_tree); } +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_copy_pc_tree_recursive(const AV1_COMMON *cm, PC_TREE *dst, + PC_TREE *src, int ss_x, int ss_y, + PC_TREE_SHARED_BUFFERS *shared_bufs, + int num_planes) { + // Copy the best partition type. For basic information like bsize and index, + // we assume they have been set properly when initializing the dst PC_TREE + dst->partitioning = src->partitioning; + dst->rd_cost = src->rd_cost; + const BLOCK_SIZE bsize = dst->block_size; + const BLOCK_SIZE subsize = get_partition_subsize(bsize, src->partitioning); + const int mi_row = src->mi_row; + const int mi_col = src->mi_col; + + switch (src->partitioning) { + // PARTITION_NONE + case PARTITION_NONE: + if (dst->none) av1_free_pmc(dst->none, num_planes); + dst->none = NULL; + if (src->none) { + dst->none = av1_alloc_pmc(cm, mi_row, mi_col, bsize, dst, + PARTITION_NONE, 0, ss_x, ss_y, shared_bufs); + av1_copy_tree_context(dst->none, src->none); + } + break; + // PARTITION_SPLIT + case PARTITION_SPLIT: + if (is_partition_valid(bsize, PARTITION_SPLIT)) { + for (int i = 0; i < 4; ++i) { + if (dst->split[i]) { + av1_free_pc_tree_recursive(dst->split[i], num_planes, 0, 0); + dst->split[i] = NULL; + } + if (src->split[i]) { + const int x_idx = (i & 1) * (mi_size_wide[bsize] >> 1); + const int y_idx = (i >> 1) * (mi_size_high[bsize] >> 1); + dst->split[i] = av1_alloc_pc_tree_node( + mi_row + y_idx, mi_col + x_idx, subsize, dst, PARTITION_SPLIT, + i, i == 3, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->split[i], src->split[i], ss_x, + ss_y, shared_bufs, num_planes); + } + } + } + break; + // PARTITION_HORZ + case PARTITION_HORZ: + if (is_partition_valid(bsize, PARTITION_HORZ)) { + for (int i = 0; i < 2; ++i) { + if (dst->horizontal[i]) { + av1_free_pc_tree_recursive(dst->horizontal[i], num_planes, 0, 0); + dst->horizontal[i] = NULL; + } + if (src->horizontal[i]) { + const int this_mi_row = mi_row + i * (mi_size_high[bsize] >> 1); + dst->horizontal[i] = + av1_alloc_pc_tree_node(this_mi_row, mi_col, subsize, dst, + PARTITION_HORZ, i, i == 1, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->horizontal[i], + src->horizontal[i], ss_x, ss_y, + shared_bufs, num_planes); + } + } + } + break; + // PARTITION_VERT + case PARTITION_VERT: + if (is_partition_valid(bsize, PARTITION_VERT)) { + for (int i = 0; i < 2; ++i) { + if (dst->vertical[i]) { + av1_free_pc_tree_recursive(dst->vertical[i], num_planes, 0, 0); + dst->vertical[i] = NULL; + } + if (src->vertical[i]) { + const int this_mi_col = mi_col + i * (mi_size_wide[bsize] >> 1); + dst->vertical[i] = + av1_alloc_pc_tree_node(mi_row, this_mi_col, subsize, dst, + PARTITION_VERT, i, i == 1, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->vertical[i], src->vertical[i], + ss_x, ss_y, shared_bufs, num_planes); + } + } + } + break; + // PARTITION_HORZ_3 + case PARTITION_HORZ_3: + if (is_partition_valid(bsize, PARTITION_HORZ_3)) { + const int mi_rows[3] = { mi_row, mi_row + (mi_size_high[bsize] >> 2), + mi_row + (mi_size_high[bsize] >> 2) * 3 }; + const BLOCK_SIZE subsizes[3] = { + subsize, get_partition_subsize(bsize, PARTITION_HORZ), subsize + }; + + for (int i = 0; i < 3; ++i) { + if (dst->horizontal3[i]) { + av1_free_pc_tree_recursive(dst->horizontal3[i], num_planes, 0, 0); + dst->horizontal3[i] = NULL; + } + if (src->horizontal3[i]) { + dst->horizontal3[i] = + av1_alloc_pc_tree_node(mi_rows[i], mi_col, subsizes[i], dst, + PARTITION_HORZ_3, i, i == 2, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->horizontal3[i], + src->horizontal3[i], ss_x, ss_y, + shared_bufs, num_planes); + } + } + } + break; + // PARTITION_VERT_3 + case PARTITION_VERT_3: + if (is_partition_valid(bsize, PARTITION_VERT_3)) { + const int mi_cols[3] = { mi_col, mi_col + (mi_size_wide[bsize] >> 2), + mi_col + (mi_size_wide[bsize] >> 2) * 3 }; + const BLOCK_SIZE subsizes[3] = { + subsize, get_partition_subsize(bsize, PARTITION_VERT), subsize + }; + + for (int i = 0; i < 3; ++i) { + if (dst->vertical3[i]) { + av1_free_pc_tree_recursive(dst->vertical3[i], num_planes, 0, 0); + dst->vertical3[i] = NULL; + } + if (src->vertical3[i]) { + dst->vertical3[i] = + av1_alloc_pc_tree_node(mi_row, mi_cols[i], subsizes[i], dst, + PARTITION_VERT_3, i, i == 2, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->vertical3[i], src->vertical3[i], + ss_x, ss_y, shared_bufs, num_planes); + } + } + } + break; + default: assert(0 && "Not a valid partition."); break; + } +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + static AOM_INLINE int get_pc_tree_nodes(const int is_sb_size_128, int stat_generation_stage) { const int tree_nodes_inc = is_sb_size_128 ? 1024 : 0; @@ -272,3 +477,68 @@ td->sms_tree = NULL; } } + +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_setup_sms_bufs(AV1_COMMON *cm, ThreadData *td) { + CHECK_MEM_ERROR(cm, td->sms_bufs, aom_malloc(sizeof(*td->sms_bufs))); +} + +void av1_free_sms_bufs(ThreadData *td) { + if (td->sms_bufs != NULL) { + aom_free(td->sms_bufs); + td->sms_bufs = NULL; + } +} + +PC_TREE *counterpart_from_different_partition(PC_TREE *pc_tree, + PC_TREE *target); + +static PC_TREE *look_for_counterpart_helper(PC_TREE *cur, PC_TREE *target) { + if (cur == NULL || cur == target) return NULL; + + BLOCK_SIZE current_bsize = cur->block_size; + BLOCK_SIZE target_bsize = target->block_size; + if (current_bsize == target_bsize) { + return cur; + } else { + if (mi_size_wide[current_bsize] >= mi_size_wide[target_bsize] && + mi_size_high[current_bsize] >= mi_size_high[target_bsize]) { + return counterpart_from_different_partition(cur, target); + } else { + return NULL; + } + } +} + +PC_TREE *counterpart_from_different_partition(PC_TREE *pc_tree, + PC_TREE *target) { + if (pc_tree == NULL || pc_tree == target) return NULL; + + PC_TREE *result; + result = look_for_counterpart_helper(pc_tree->split[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->horizontal[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->vertical[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->horizontal3[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->vertical3[0], target); + if (result) return result; + + return NULL; +} + +PC_TREE *av1_look_for_counterpart_block(PC_TREE *pc_tree) { + if (!pc_tree) return 0; + + // Find the highest possible common parent node + PC_TREE *current = pc_tree; + while (current->index == 0 && current->parent) { + current = current->parent; + } + + // Search from the highest common ancester + return counterpart_from_different_partition(current, pc_tree); +} +#endif // CONFIG_EXT_RECUR_PARTITIONS
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h index f243233..f03414b 100644 --- a/av1/encoder/context_tree.h +++ b/av1/encoder/context_tree.h
@@ -60,12 +60,21 @@ int rd_mode_is_ready; // Flag to indicate whether rd pick mode decision has // been made. + CHROMA_REF_INFO chroma_ref_info; + struct PC_TREE *parent; + int index; } PICK_MODE_CONTEXT; typedef struct PC_TREE { PARTITION_TYPE partitioning; BLOCK_SIZE block_size; PICK_MODE_CONTEXT *none; +#if CONFIG_EXT_RECUR_PARTITIONS + struct PC_TREE *horizontal[2]; + struct PC_TREE *vertical[2]; + struct PC_TREE *horizontal3[3]; + struct PC_TREE *vertical3[3]; +#else PICK_MODE_CONTEXT *horizontal[2]; PICK_MODE_CONTEXT *vertical[2]; PICK_MODE_CONTEXT *horizontala[3]; @@ -74,8 +83,15 @@ PICK_MODE_CONTEXT *verticalb[3]; PICK_MODE_CONTEXT *horizontal4[4]; PICK_MODE_CONTEXT *vertical4[4]; +#endif // CONFIG_EXT_RECUR_PARTITIONS struct PC_TREE *split[4]; + struct PC_TREE *parent; + int mi_row; + int mi_col; int index; + int is_last_subblock; + CHROMA_REF_INFO chroma_ref_info; + RD_STATS rd_cost; } PC_TREE; typedef struct SIMPLE_MOTION_DATA_TREE { @@ -91,15 +107,32 @@ int sms_rect_valid; } SIMPLE_MOTION_DATA_TREE; +#if CONFIG_EXT_RECUR_PARTITIONS +PC_TREE *av1_look_for_counterpart_block(PC_TREE *pc_tree); +#endif // CONFIG_EXT_RECUR_PARTITIONS + void av1_setup_shared_coeff_buffer(AV1_COMMON *cm, PC_TREE_SHARED_BUFFERS *shared_bufs); void av1_free_shared_coeff_buffer(PC_TREE_SHARED_BUFFERS *shared_bufs); -PC_TREE *av1_alloc_pc_tree_node(BLOCK_SIZE bsize); +PC_TREE *av1_alloc_pc_tree_node(int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *parent, + PARTITION_TYPE parent_partition, int index, + int is_last, int subsampling_x, + int subsampling_y); void av1_free_pc_tree_recursive(PC_TREE *tree, int num_planes, int keep_best, int keep_none); +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_copy_pc_tree_recursive(const AV1_COMMON *cm, PC_TREE *dst, + PC_TREE *src, int ss_x, int ss_y, + PC_TREE_SHARED_BUFFERS *shared_bufs, + int num_planes); +#endif // CONFIG_EXT_RECUR_PARTITIONS -PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, BLOCK_SIZE bsize, +PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, PC_TREE *parent, + PARTITION_TYPE parent_partition, int index, + int subsampling_x, int subsampling_y, PC_TREE_SHARED_BUFFERS *shared_bufs); void av1_free_pmc(PICK_MODE_CONTEXT *ctx, int num_planes); void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx, @@ -107,6 +140,10 @@ void av1_setup_sms_tree(struct AV1_COMP *const cpi, struct ThreadData *td); void av1_free_sms_tree(struct ThreadData *td); +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_setup_sms_bufs(struct AV1Common *cm, struct ThreadData *td); +void av1_free_sms_bufs(struct ThreadData *td); +#endif // CONFIG_EXT_RECUR_PARTITIONS #ifdef __cplusplus } // extern "C"
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c index 8adec1e..3c05fb6 100644 --- a/av1/encoder/encode_strategy.c +++ b/av1/encoder/encode_strategy.c
@@ -881,7 +881,7 @@ MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; - av1_setup_src_planes(x, src, 0, 0, num_planes, cm->seq_params.sb_size); + av1_setup_src_planes(x, src, 0, 0, num_planes, NULL); av1_setup_block_planes(xd, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, num_planes);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index 0592a0b..86b7fcc 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -198,7 +198,7 @@ void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const int num_planes, - BLOCK_SIZE bsize) { + const CHROMA_REF_INFO *chr_ref_info) { // Set current frame pointer. x->e_mbd.cur_buf = src; @@ -206,10 +206,10 @@ // the static analysis warnings. for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) { const int is_uv = i > 0; - setup_pred_plane( - &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv], - src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL, - x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y); + setup_pred_plane(&x->plane[i].src, src->buffers[i], src->crop_widths[is_uv], + src->crop_heights[is_uv], src->strides[is_uv], mi_row, + mi_col, NULL, x->e_mbd.plane[i].subsampling_x, + x->e_mbd.plane[i].subsampling_y, chr_ref_info); } } @@ -240,7 +240,7 @@ const BLOCK_SIZE sb_size = cm->seq_params.sb_size; // Delta-q modulation based on variance - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size); + av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, NULL); int current_qindex = cm->quant_params.base_qindex; if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) { @@ -287,7 +287,7 @@ assert(current_qindex > 0); x->delta_qindex = current_qindex - cm->quant_params.base_qindex; - av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); + av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size, NULL); xd->mi[0]->current_qindex = current_qindex; av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id); @@ -483,6 +483,16 @@ reset_hash_records(&x->txfm_search_info, cpi->sf.tx_sf.use_inter_txb_hash); av1_zero(x->picked_ref_frames_mask); av1_invalid_rd_stats(rd_cost); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_init_sms_data_bufs(x->sms_bufs); +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_SDP + if (x->e_mbd.tree_type == CHROMA_PART) { + assert(is_bsize_square(x->sb_enc.min_partition_size)); + x->sb_enc.min_partition_size = + AOMMAX(x->sb_enc.min_partition_size, BLOCK_8X8); + } +#endif // CONFIG_SDP } /*!\brief Encode a superblock (RD-search-based) @@ -507,6 +517,11 @@ int64_t dummy_dist; RD_STATS dummy_rdc; SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root; + const int ss_x = cm->seq_params.subsampling_x; + const int ss_y = cm->seq_params.subsampling_y; + (void)tile_info; + (void)num_planes; + (void)mi; #if CONFIG_SDP const int total_loop_num = @@ -514,58 +529,110 @@ cm->seq_params.enable_sdp) ? 2 : 1; +#endif // CONFIG_SDP +#if CONFIG_SDP || CONFIG_EXT_RECUR_PARTITIONS MACROBLOCKD *const xd = &x->e_mbd; -#endif +#endif // CONFIG_SDP || CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_EXT_RECUR_PARTITIONS + x->sms_bufs = td->sms_bufs; + x->reuse_inter_mode_cache_type = cpi->sf.inter_sf.reuse_erp_mode_flag; +#endif // CONFIG_EXT_RECUR_PARTITIONS init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); // Encode the superblock if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) { // partition search by adjusting a fixed-size partition - av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); + av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size, NULL); const BLOCK_SIZE bsize = seg_skip ? sb_size : sf->part_sf.fixed_partition_size; av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); #if CONFIG_SDP for (int loop_idx = 0; loop_idx < total_loop_num; loop_idx++) { + const BLOCK_SIZE min_partition_size = x->sb_enc.min_partition_size; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); #endif - PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size); +#if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_SDP + av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); + av1_build_partition_tree_fixed_partitioning( + cm, mi_row, mi_col, bsize, + xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)]); +#else + av1_reset_ptree_in_sbi(xd->sbi); + av1_build_partition_tree_fixed_partitioning(cm, mi_row, mi_col, bsize, + xd->sbi->ptree_root); +#endif // CONFIG_SDP +#endif // CONFIG_EXT_RECUR_PARTITIONS + PC_TREE *const pc_root = av1_alloc_pc_tree_node( + mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, - &dummy_rate, &dummy_dist, 1, pc_root); + &dummy_rate, &dummy_dist, 1, +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#elif CONFIG_EXT_RECUR_PARTITIONS + xd->sbi->ptree_root, +#else // !CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + pc_root); av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0); #if CONFIG_SDP + x->sb_enc.min_partition_size = min_partition_size; } xd->tree_type = SHARED_PART; #endif } else if (cpi->partition_search_skippable_frame) { // partition search by adjusting a fixed-size partition for which the size // is determined by the source variance - av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); + av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size, NULL); const BLOCK_SIZE bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); #if CONFIG_SDP for (int loop_idx = 0; loop_idx < total_loop_num; loop_idx++) { + const BLOCK_SIZE min_partition_size = x->sb_enc.min_partition_size; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); #endif - PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size); + PC_TREE *const pc_root = av1_alloc_pc_tree_node( + mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); +#if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_SDP + av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); + av1_build_partition_tree_fixed_partitioning( + cm, mi_row, mi_col, bsize, + xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)]); +#else + av1_reset_ptree_in_sbi(xd->sbi); + av1_build_partition_tree_fixed_partitioning(cm, mi_row, mi_col, bsize, + xd->sbi->ptree_root); +#endif // CONFIG_SDP +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, - &dummy_rate, &dummy_dist, 1, pc_root); + &dummy_rate, &dummy_dist, 1, +#if CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#elif CONFIG_EXT_RECUR_PARTITIONS + xd->sbi->ptree_root, +#else // !CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_EXT_RECUR_PARTITIONS && CONFIG_SDP + pc_root); av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0); #if CONFIG_SDP + x->sb_enc.min_partition_size = min_partition_size; } xd->tree_type = SHARED_PART; -#endif +#endif // CONFIG_SDP } else { // The most exhaustive recursive partition search SuperBlockEnc *sb_enc = &x->sb_enc; @@ -592,17 +659,24 @@ if (num_passes == 1) { #if CONFIG_SDP for (int loop_idx = 0; loop_idx < total_loop_num; loop_idx++) { + const BLOCK_SIZE min_partition_size = sb_enc->min_partition_size; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); #endif - PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size); - av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, - &dummy_rdc, dummy_rdc, pc_root, sms_root, NULL, - SB_SINGLE_PASS, NULL); + PC_TREE *const pc_root = av1_alloc_pc_tree_node( + mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); + av1_rd_pick_partition( + cpi, td, tile_data, tp, mi_row, mi_col, sb_size, &dummy_rdc, + dummy_rdc, pc_root, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + xd->tree_type == CHROMA_PART ? xd->sbi->ptree_root[0] : NULL, NULL, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + sms_root, NULL, SB_SINGLE_PASS, NULL); #if CONFIG_SDP + sb_enc->min_partition_size = min_partition_size; } xd->tree_type = SHARED_PART; #endif @@ -612,17 +686,24 @@ av1_backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col); #if CONFIG_SDP for (int loop_idx = 0; loop_idx < total_loop_num; loop_idx++) { + const BLOCK_SIZE min_partition_size = sb_enc->min_partition_size; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); #endif - PC_TREE *const pc_root_p0 = av1_alloc_pc_tree_node(sb_size); - av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, - &dummy_rdc, dummy_rdc, pc_root_p0, sms_root, NULL, - SB_DRY_PASS, NULL); + PC_TREE *const pc_root_p0 = av1_alloc_pc_tree_node( + mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); + av1_rd_pick_partition( + cpi, td, tile_data, tp, mi_row, mi_col, sb_size, &dummy_rdc, + dummy_rdc, pc_root_p0, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + xd->tree_type == CHROMA_PART ? xd->sbi->ptree_root[0] : NULL, NULL, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + sms_root, NULL, SB_DRY_PASS, NULL); #if CONFIG_SDP + sb_enc->min_partition_size = min_partition_size; } xd->tree_type = SHARED_PART; #endif @@ -636,17 +717,25 @@ av1_restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col); #if CONFIG_SDP for (int loop_idx = 0; loop_idx < total_loop_num; loop_idx++) { + const BLOCK_SIZE min_partition_size = sb_enc->min_partition_size; xd->tree_type = (total_loop_num == 1 ? SHARED_PART : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); #endif - PC_TREE *const pc_root_p1 = av1_alloc_pc_tree_node(sb_size); - av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, - &dummy_rdc, dummy_rdc, pc_root_p1, sms_root, NULL, - SB_WET_PASS, NULL); + + PC_TREE *const pc_root_p1 = av1_alloc_pc_tree_node( + mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); + av1_rd_pick_partition( + cpi, td, tile_data, tp, mi_row, mi_col, sb_size, &dummy_rdc, + dummy_rdc, pc_root_p1, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + xd->tree_type == CHROMA_PART ? xd->sbi->ptree_root[0] : NULL, NULL, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + sms_root, NULL, SB_WET_PASS, NULL); #if CONFIG_SDP + sb_enc->min_partition_size = min_partition_size; } xd->tree_type = SHARED_PART; #endif @@ -713,7 +802,9 @@ for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0; mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) { (*(enc_row_mt->sync_read_ptr))(row_mt_sync, sb_row, sb_col_in_tile); + av1_reset_is_mi_coded_map(xd, cm->seq_params.mib_size); + av1_set_sb_info(cm, xd, mi_row, mi_col); if (tile_data->allow_update_cdf && row_mt_enabled && (tile_info->mi_row_start != mi_row)) { if ((tile_info->mi_col_start == mi_col)) { @@ -778,8 +869,7 @@ MACROBLOCKD *const xd = &x->e_mbd; // Copy data over into macro block data structures. - av1_setup_src_planes(x, cpi->source, 0, 0, num_planes, - cm->seq_params.sb_size); + av1_setup_src_planes(x, cpi->source, 0, 0, num_planes, NULL); av1_setup_block_planes(xd, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, num_planes);
diff --git a/av1/encoder/encodeframe.h b/av1/encoder/encodeframe.h index 36b38d5..f6f97b4 100644 --- a/av1/encoder/encodeframe.h +++ b/av1/encoder/encodeframe.h
@@ -33,7 +33,8 @@ void av1_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, - int mi_col, const int num_planes, BLOCK_SIZE bsize); + int mi_col, const int num_planes, + const CHROMA_REF_INFO *chr_ref_info); void av1_encode_frame(struct AV1_COMP *cpi); @@ -43,6 +44,9 @@ int tile_col); void av1_encode_sb_row(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col, int mi_row); +void av1_enc_set_offsets(const struct AV1_COMP *cpi, const TileInfo *const tile, + struct macroblock *const x, int mi_row, int mi_col, + BLOCK_SIZE bsize, CHROMA_REF_INFO *chr_ref_info); #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c index eddfe72..6ef3968 100644 --- a/av1/encoder/encodeframe_utils.c +++ b/av1/encoder/encodeframe_utils.c
@@ -682,9 +682,9 @@ } } -void av1_restore_context(MACROBLOCK *x, const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, - int mi_row, int mi_col, BLOCK_SIZE bsize, - const int num_planes) { +void av1_restore_context(const AV1_COMMON *cm, MACROBLOCK *x, + const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row, + int mi_col, BLOCK_SIZE bsize, const int num_planes) { MACROBLOCKD *xd = &x->e_mbd; int p; const int num_4x4_blocks_wide = mi_size_wide[bsize]; @@ -727,6 +727,9 @@ sizeof(*xd->above_txfm_context) * mi_width); memcpy(xd->left_txfm_context, ctx->tl, sizeof(*xd->left_txfm_context) * mi_height); + + av1_mark_block_as_not_coded(xd, mi_row, mi_col, bsize, + cm->seq_params.sb_size); } void av1_save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, @@ -1148,13 +1151,16 @@ void av1_update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type, BLOCK_SIZE bsize, int mib_size, int mi_row, int mi_col) { +#if !CONFIG_EXT_RECUR_PARTITIONS assert(mi_size_wide[bsize] == mi_size_high[bsize]); +#endif // !CONFIG_EXT_RECUR_PARTITIONS const int sb_size_mask = mib_size - 1; const int mi_row_in_sb = mi_row & sb_size_mask; const int mi_col_in_sb = mi_col & sb_size_mask; - const int mi_size = mi_size_wide[bsize]; - for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) { - for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) { + const int mi_size_h = mi_size_high[bsize]; + const int mi_size_w = mi_size_wide[bsize]; + for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size_h; ++i) { + for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size_w; ++j) { x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type; } } @@ -1342,7 +1348,13 @@ CDF_SIZE(10)); } } -#endif +#endif // CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS + for (int i = 0; i < PARTITION_CONTEXTS_REC; ++i) { + AVERAGE_CDF(ctx_left->partition_rec_cdf[i], ctx_tr->partition_rec_cdf[i], + 4); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf, SWITCHABLE_FILTERS); AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES); @@ -1477,7 +1489,7 @@ const int num_planes = av1_num_planes(cm); const BLOCK_SIZE sb_size = cm->seq_params.sb_size; - av1_restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, + av1_restore_context(cm, x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes); cpi->td.rd_counts = sb_fp_stats->rd_count;
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h index 5fde760..b39fe48 100644 --- a/av1/encoder/encodeframe_utils.h +++ b/av1/encoder/encodeframe_utils.h
@@ -64,9 +64,13 @@ // This structure contains block size related // variables for use in rd_pick_partition(). -typedef struct { +typedef struct PartitionBlkParams { // Half of block width to determine block edge. int mi_step; +#if CONFIG_EXT_RECUR_PARTITIONS + int mi_step_h; + int mi_step_w; +#endif // CONFIG_EXT_RECUR_PARTITIONS // Block row and column indices. int mi_row; @@ -79,11 +83,18 @@ // Block width of current partition block. int width; +#if CONFIG_EXT_RECUR_PARTITIONS + // Minimum partition size allowed. + BLOCK_SIZE min_partition_size; +#else // Block width of minimum partition size allowed. int min_partition_size_1d; +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS // Flag to indicate if partition is 8x8 or higher size. int bsize_at_least_8x8; +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Indicates edge blocks in frame. int has_rows; @@ -100,7 +111,7 @@ } PartitionBlkParams; // Structure holding state variables for partition search. -typedef struct { +typedef struct PartitionSearchState { // Intra partitioning related info. PartitionSearchInfo *intra_part_info; @@ -118,6 +129,9 @@ // Array holding partition type cost. int tmp_partition_cost[PARTITION_TYPES]; +#if CONFIG_EXT_RECUR_PARTITIONS + int partition_cost_table[EXT_PARTITION_TYPES]; +#endif // Pointer to partition cost buffer int *partition_cost; @@ -130,6 +144,10 @@ // rect_part_rd[1][i] is the RD cost of ith partition index of PARTITION_VERT. int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT]; +#if CONFIG_EXT_RECUR_PARTITIONS + // New Simple Motion Result for PARTITION_NONE + SMSPartitionStats none_data; +#endif // CONFIG_EXT_RECUR_PARTITIONS // Flags indicating if the corresponding partition was winner or not. // Used to bypass similar blocks during AB partition evaluation. int is_split_ctx_is_ready[2]; @@ -140,8 +158,11 @@ int partition_none_allowed; int partition_rect_allowed[NUM_RECT_PARTS]; int do_rectangular_split; +#if !CONFIG_EXT_RECUR_PARTITIONS int do_square_split; +#endif // !CONFIG_EXT_RECUR_PARTITIONS int prune_rect_part[NUM_RECT_PARTS]; + int is_block_splittable; // Chroma subsampling in x and y directions. int ss_x; @@ -294,9 +315,9 @@ const MB_MODE_INFO *above_mi, const MB_MODE_INFO *left_mi, const int intraonly); -void av1_restore_context(MACROBLOCK *x, const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, - int mi_row, int mi_col, BLOCK_SIZE bsize, - const int num_planes); +void av1_restore_context(const AV1_COMMON *cm, MACROBLOCK *x, + const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row, + int mi_col, BLOCK_SIZE bsize, const int num_planes); void av1_save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -338,6 +359,12 @@ const TileInfo *const tile_info, const int mi_row, const int mi_col); +#ifndef NDEBUG +static AOM_INLINE int is_bsize_square(BLOCK_SIZE bsize) { + return block_size_wide[bsize] == block_size_high[bsize]; +} +#endif // NDEBUG + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 4d67ab4..deae150 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -563,15 +563,24 @@ const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; -#if CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize_base = get_bsize_base(xd, mbmi, plane); + const TX_SIZE plane_tx_size = + plane ? av1_get_max_uv_txsize(bsize_base, pd->subsampling_x, + pd->subsampling_y) + : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, + blk_col)]; +#elif CONFIG_SDP const TX_SIZE plane_tx_size = plane ? av1_get_max_uv_txsize(mbmi->sb_type[xd->tree_type == CHROMA_PART], pd->subsampling_x, pd->subsampling_y) : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, blk_col)]; #else + const BLOCK_SIZE bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base : mbmi->sb_type; const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, + plane ? av1_get_max_uv_txsize(bsize_base, pd->subsampling_x, pd->subsampling_y) : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, blk_col)]; @@ -728,9 +737,8 @@ encode_block_pass1, &args); } -void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, +void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, RUN_TYPE dry_run) { - assert(bsize < BLOCK_SIZES_ALL); MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi = xd->mi[0]; #if CONFIG_SDP @@ -765,8 +773,24 @@ const int subsampling_x = pd->subsampling_x; const int subsampling_y = pd->subsampling_y; if (plane && !xd->is_chroma_ref) break; + +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, subsampling_x, subsampling_y); + get_mb_plane_block_size(xd, mbmi, plane, subsampling_x, subsampling_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base + : mbmi->sb_type[xd->tree_type == CHROMA_PART]; + assert(plane_bsize == + get_plane_block_size(bsize_base, subsampling_x, subsampling_y)); + (void)bsize_base; +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#else + const BLOCK_SIZE bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base : mbmi->sb_type; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(bsize_base, subsampling_x, subsampling_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP assert(plane_bsize < BLOCK_SIZES_ALL); const int mi_width = mi_size_wide[plane_bsize]; const int mi_height = mi_size_high[plane_bsize]; @@ -954,14 +978,13 @@ #else if (plane == AOM_PLANE_Y && xd->cfl.store_y) { #endif - cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); + cfl_store_tx(xd, blk_row, blk_col, tx_size); } } void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int plane, RUN_TYPE dry_run, TRELLIS_OPT_TYPE enable_optimize_b) { - assert(bsize < BLOCK_SIZES_ALL); const MACROBLOCKD *const xd = &x->e_mbd; if (plane && !xd->is_chroma_ref) return; @@ -971,21 +994,25 @@ ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 }; ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 }; #if CONFIG_SDP - struct encode_b_args arg = { - cpi, - x, - NULL, - &(xd->mi[0]->skip_txfm[xd->tree_type == CHROMA_PART]), + int8_t *skip_txfm = &(xd->mi[0]->skip_txfm[xd->tree_type == CHROMA_PART]); #else - struct encode_b_args arg = { - cpi, x, NULL, &(xd->mi[0]->skip_txfm), -#endif - ta, - tl, - dry_run, - enable_optimize_b - }; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); + int8_t *skip_txfm = &(xd->mi[0]->skip_txfm); +#endif // CONFIG_SDP + + struct encode_b_args arg = { cpi, x, NULL, skip_txfm, + ta, tl, dry_run, enable_optimize_b }; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, xd->mi[0], plane, ss_x, ss_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == get_plane_block_size(bsize, ss_x, ss_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS + (void)bsize; +#else + const BLOCK_SIZE bsize_base = + plane ? xd->mi[0]->chroma_ref_info.bsize_base : bsize; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize_base, ss_x, ss_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP if (enable_optimize_b) { av1_get_entropy_contexts(plane_bsize, pd, ta, tl); }
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h index 91b47cc..88274b7 100644 --- a/av1/encoder/encodemb.h +++ b/av1/encoder/encodemb.h
@@ -63,8 +63,7 @@ TRELLIS_OPT_TYPE enable_optimize_b; }; -void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - RUN_TYPE dry_run); +void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, RUN_TYPE dry_run); void av1_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE plane_bsize, int plane,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 43ea020..24f4cde 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -1447,6 +1447,9 @@ thread_data->td->firstpass_ctx = NULL; av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf); av1_free_sms_tree(thread_data->td); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_free_sms_bufs(thread_data->td); +#endif // CONFIG_EXT_RECUR_PARTITIONS aom_free(thread_data->td); } } @@ -1965,6 +1968,9 @@ av1_free_context_buffers(cm); av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf); av1_free_sms_tree(&cpi->td); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_free_sms_bufs(&cpi->td); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_free_pmc(cpi->td.firstpass_ctx, av1_num_planes(cm)); cpi->td.firstpass_ctx = NULL; alloc_compressor_data(cpi); @@ -2068,8 +2074,8 @@ const int use_ccso = !cm->features.coded_lossless && !cm->tiles.large_scale && cm->seq_params.enable_ccso; const int num_planes = av1_num_planes(cm); - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, &cm->cur_frame->buf, - 0, 0, 0, num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, 0, 0, 0, num_planes, + NULL); const int ccso_stride = xd->plane[0].dst.width; const int ccso_stride_ext = xd->plane[0].dst.width + (CCSO_PADDING_SIZE << 1); for (int pli = 0; pli < 2; pli++) { @@ -2135,8 +2141,8 @@ #if CONFIG_CCSO if (use_ccso) { - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, &cm->cur_frame->buf, - 0, 0, 0, num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, 0, 0, 0, num_planes, + NULL); // Reading original and reconstructed chroma samples as input for (int pli = 1; pli < 3; pli++) { const int pic_height = xd->plane[pli].dst.height;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index 71a1158..3cc21be 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -1157,6 +1157,9 @@ [PALETTE_COLOR_INDEX_CONTEXTS] [PALETTE_COLORS]; unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES]; +#if CONFIG_EXT_RECUR_PARTITIONS + unsigned int partition_rec[PARTITION_CONTEXTS_REC][PARTITION_TYPES_REC]; +#endif // CONFIG_EXT_RECUR_PARTITIONS unsigned int txb_skip[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS][2]; unsigned int eob_extra[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] [EOB_COEF_CONTEXTS][2]; @@ -1456,6 +1459,9 @@ PC_TREE_SHARED_BUFFERS shared_coeff_buf; SIMPLE_MOTION_DATA_TREE *sms_tree; SIMPLE_MOTION_DATA_TREE *sms_root; +#if CONFIG_EXT_RECUR_PARTITIONS + struct SimpleMotionDataBufs *sms_bufs; +#endif // CONFIG_EXT_RECUR_PARTITIONS InterModesInfo *inter_modes_info; uint32_t *hash_value_buffer[2][2]; OBMCBuffer obmc_buffer;
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h index b4291d2..e458443 100644 --- a/av1/encoder/encoder_alloc.h +++ b/av1/encoder/encoder_alloc.h
@@ -74,8 +74,14 @@ av1_setup_shared_coeff_buffer(&cpi->common, &cpi->td.shared_coeff_buf); av1_setup_sms_tree(cpi, &cpi->td); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_setup_sms_bufs(&cpi->common, &cpi->td); +#endif // CONFIG_EXT_RECUR_PARTITIONS + cpi->td.firstpass_ctx = - av1_alloc_pmc(cm, BLOCK_16X16, &cpi->td.shared_coeff_buf); + av1_alloc_pmc(cm, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0, + cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, + &cpi->td.shared_coeff_buf); } static AOM_INLINE void realloc_segmentation_maps(AV1_COMP *cpi) { @@ -295,6 +301,9 @@ av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf); av1_free_sms_tree(&cpi->td); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_free_sms_bufs(&cpi->td); +#endif // CONFIG_EXT_RECUR_PARTITIONS aom_free(cpi->td.mb.palette_buffer); release_compound_type_rd_buffers(&cpi->td.mb.comp_rd_buffer);
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c index 3bffabd..e506d2d 100644 --- a/av1/encoder/encodetxb.c +++ b/av1/encoder/encodetxb.c
@@ -329,14 +329,8 @@ int block, TX_SIZE tx_size) { MACROBLOCKD *xd = &x->e_mbd; const CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff; -#if CONFIG_SDP const int txb_offset = - x->mbmi_ext_frame->cb_offset[plane > 0 && xd->tree_type == CHROMA_PART] / - (TX_SIZE_W_MIN * TX_SIZE_H_MIN); -#else - const int txb_offset = - x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); -#endif + x->mbmi_ext_frame->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset; const uint16_t eob = eob_txb[block]; const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; @@ -430,14 +424,8 @@ const int height = get_txb_high(tx_size); uint8_t levels_buf[TX_PAD_2D]; uint8_t *const levels = set_levels(levels_buf, width); -#if CONFIG_SDP const tran_low_t *tcoeff_txb = - cb_coef_buff->tcoeff[plane] + - x->mbmi_ext_frame->cb_offset[plane > 0 && xd->tree_type == CHROMA_PART]; -#else - const tran_low_t *tcoeff_txb = - cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset; -#endif + cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset[plane]; const tran_low_t *tcoeff = tcoeff_txb + BLOCK_OFFSET(block); av1_txb_init_levels(tcoeff, width, height, levels); const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); @@ -1594,15 +1582,8 @@ } CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff; -#if CONFIG_SDP const int txb_offset = - x->mbmi_ext_frame - ->cb_offset[(plane > 0 && xd->tree_type == CHROMA_PART) ? 1 : 0] / - (TX_SIZE_W_MIN * TX_SIZE_H_MIN); -#else - const int txb_offset = - x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); -#endif + x->mbmi_ext_frame->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset; uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; entropy_ctx[block] = txb_ctx.txb_skip_ctx; @@ -1615,14 +1596,8 @@ } const int segment_id = mbmi->segment_id; const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size); -#if CONFIG_SDP tran_low_t *tcoeff_txb = - cb_coef_buff->tcoeff[plane] + - x->mbmi_ext_frame->cb_offset[plane > 0 && xd->tree_type == CHROMA_PART]; -#else - tran_low_t *tcoeff_txb = - cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset; -#endif + cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset[plane]; tcoeff = tcoeff_txb + block_offset; memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob); @@ -1667,6 +1642,7 @@ if (allow_update_cdf) { if (c == eob - 1) { assert(coeff_ctx < 4); + assert(level > 0); update_cdf( ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3); @@ -1677,6 +1653,7 @@ } if (c == eob - 1) { assert(coeff_ctx < 4); + assert(level > 0); #if CONFIG_ENTROPY_STATS ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type] [coeff_ctx][AOMMIN(level, 3) - 1]; @@ -1743,7 +1720,10 @@ if (mbmi->skip_txfm[xd->tree_type == CHROMA_PART]) { #else if (mbmi->skip_txfm) { -#endif +#endif // CONFIG_SDP +#if CONFIG_SDP + assert(bsize == mbmi->sb_type[av1_get_sdp_idx(xd->tree_type)]); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_reset_entropy_context(xd, bsize, num_planes); return; } @@ -1758,7 +1738,15 @@ const struct macroblockd_plane *const pd = &xd->plane[plane]; const int ss_x = pd->subsampling_x; const int ss_y = pd->subsampling_y; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, mbmi, plane, ss_x, ss_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == get_plane_block_size(bsize, ss_x, ss_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#else const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP av1_foreach_transformed_block_in_plane( xd, plane_bsize, plane, av1_update_and_record_txb_context, &arg); }
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index 0f754b7..0e48e7e 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -550,6 +550,9 @@ if (i > 0) { // Set up sms_tree. av1_setup_sms_tree(cpi, thread_data->td); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_setup_sms_bufs(cm, thread_data->td); +#endif // CONFIG_EXT_RECUR_PARTITIONS alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm); @@ -666,8 +669,10 @@ if (i > 0) { // Set up firstpass PICK_MODE_CONTEXT. - thread_data->td->firstpass_ctx = - av1_alloc_pmc(cm, BLOCK_16X16, &thread_data->td->shared_coeff_buf); + thread_data->td->firstpass_ctx = av1_alloc_pmc( + cm, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0, + cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, + &thread_data->td->shared_coeff_buf); // Create threads if (!winterface->reset(worker))
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c index 62e520b..f91c0c4 100644 --- a/av1/encoder/firstpass.c +++ b/av1/encoder/firstpass.c
@@ -365,8 +365,8 @@ xd->mi[0]->ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, tile, mb_row * mb_scale, mi_size_high[bsize], mb_col * mb_scale, mi_size_wide[bsize], mi_params->mi_rows, - mi_params->mi_cols); - set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], num_planes); + mi_params->mi_cols, NULL); + set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], num_planes, NULL); xd->mi[0]->segment_id = 0; xd->lossless[xd->mi[0]->segment_id] = (qindex == 0); xd->mi[0]->mode = DC_PRED; @@ -952,6 +952,8 @@ AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync; + xd->tile = *tile; + const YV12_BUFFER_CONFIG *const last_frame = get_ref_frame_yv12_buf(cm, LAST_FRAME); const YV12_BUFFER_CONFIG *golden_frame = @@ -1015,7 +1017,7 @@ cpi->oxcf.border_in_pixels); av1_setup_src_planes(x, cpi->source, mb_row << FP_MIB_SIZE_LOG2, - tile->mi_col_start, num_planes, fp_block_size); + tile->mi_col_start, num_planes, NULL); // Fix - zero the 16x16 block first. This ensures correct this_intra_error for // block sizes smaller than 16x16. @@ -1133,12 +1135,11 @@ av1_setup_block_planes(xd, seq_params->subsampling_x, seq_params->subsampling_y, num_planes); - av1_setup_src_planes(x, cpi->source, 0, 0, num_planes, fp_block_size); - av1_setup_dst_planes(xd->plane, seq_params->sb_size, this_frame, 0, 0, 0, - num_planes); + av1_setup_src_planes(x, cpi->source, 0, 0, num_planes, NULL); + av1_setup_dst_planes(xd->plane, this_frame, 0, 0, 0, num_planes, NULL); if (!frame_is_intra_only(cm)) { - av1_setup_pre_planes(xd, 0, last_frame, 0, 0, NULL, num_planes); + av1_setup_pre_planes(xd, 0, last_frame, 0, 0, NULL, num_planes, NULL); } set_mi_offsets(mi_params, xd, 0, 0);
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c index b40def7..9a48c09 100644 --- a/av1/encoder/intra_mode_search.c +++ b/av1/encoder/intra_mode_search.c
@@ -238,7 +238,7 @@ int *best_angle_delta, int64_t *best_rd) { MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; #if CONFIG_SDP - assert(!is_inter_block(mbmi, x->e_mbd.tree_type)); + assert(!is_inter_block(mbmi, cpi->td.mb.e_mbd.tree_type)); #else assert(!is_inter_block(mbmi)); #endif @@ -246,8 +246,7 @@ int64_t this_rd; RD_STATS tokenonly_rd_stats; - if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) - return INT64_MAX; + if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, best_rd_in)) return INT64_MAX; this_rate = tokenonly_rd_stats.rate + intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead); this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); @@ -338,19 +337,16 @@ MB_MODE_INFO *const mbmi = xd->mi[0]; const MACROBLOCKD_PLANE *pd = &xd->plane[AOM_PLANE_U]; const ModeCosts *mode_costs = &x->mode_costs; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP #if CONFIG_SDP assert(xd->tree_type != LUMA_PART); - const BLOCK_SIZE plane_bsize = get_plane_block_size( - mbmi->sb_type[PLANE_TYPE_UV], pd->subsampling_x, pd->subsampling_y); -#else +#endif // CONFIG_SDP + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, mbmi, PLANE_TYPE_UV, pd->subsampling_x, pd->subsampling_y); +#else // !CONFIG_SDP && ! CONFIG_EXT_RECUR_PARTITIONS const BLOCK_SIZE plane_bsize = -#if CONFIG_SDP - get_plane_block_size(mbmi->sb_type[xd->tree_type == CHROMA_PART], - pd->subsampling_x, pd->subsampling_y); -#else get_plane_block_size(mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); -#endif -#endif +#endif // CONFIG_SDP assert(is_cfl_allowed(xd) && cpi->oxcf.intra_mode_cfg.enable_cfl_intra); assert(plane_bsize < BLOCK_SIZES_ALL); @@ -499,11 +495,6 @@ if (xd->tree_type == SHARED_PART) { #endif if (xd->cfl.store_y) { - // Restore reconstructed luma values. - // TODO(chiyotsai@google.com): right now we are re-computing the txfm in - // this function everytime we search through uv modes. There is some - // potential speed up here if we cache the result to avoid redundant - // computation. #if CONFIG_SDP av1_encode_intra_block_plane(cpi, x, mbmi->sb_type[PLANE_TYPE_Y], AOM_PLANE_Y, DRY_RUN_NORMAL, @@ -512,7 +503,7 @@ av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y, DRY_RUN_NORMAL, cpi->optimize_seg_arr[mbmi->segment_id]); -#endif +#endif // CONFIG_SDP xd->cfl.store_y = 0; } #if CONFIG_SDP @@ -562,7 +553,7 @@ continue; } else { // Predict directly if we don't need to search for angle delta. - if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) { + if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, best_rd)) { continue; } } @@ -918,6 +909,17 @@ best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist); try_filter_intra = (best_rd_so_far / 2) <= best_rd; } +#if CONFIG_EXT_RECUR_PARTITIONS + const MB_MODE_INFO *cached_mode = x->inter_mode_cache; + const FILTER_INTRA_MODE_INFO *cached_fi_mode = + cached_mode ? &cached_mode->filter_intra_mode_info : NULL; + if (should_reuse_mode(x, REUSE_INTRA_MODE_IN_INTERFRAME_FLAG) && + !frame_is_intra_only(cm) && cached_fi_mode && + !cached_fi_mode->use_filter_intra) { + // assert(cached_mode->mode == DC_PRED); + try_filter_intra = 0; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS if (try_filter_intra) { handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost,
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c index 8601b55..ad0f4d7 100644 --- a/av1/encoder/mcomp.c +++ b/av1/encoder/mcomp.c
@@ -1912,7 +1912,7 @@ // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL, - MAX_MB_PLANE); + MAX_MB_PLANE, NULL); } if (xd->bd != 8) {
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h index 2519cc8..7d892f1 100644 --- a/av1/encoder/mcomp.h +++ b/av1/encoder/mcomp.h
@@ -58,18 +58,6 @@ struct AV1_COMP; struct SPEED_FEATURES; -// ============================================================================= -// Cost functions -// ============================================================================= - -enum { - MV_COST_ENTROPY, // Use the entropy rate of the mv as the cost - MV_COST_L1_LOWRES, // Use the l1 norm of the mv as the cost (<480p) - MV_COST_L1_MIDRES, // Use the l1 norm of the mv as the cost (>=480p) - MV_COST_L1_HDRES, // Use the l1 norm of the mv as the cost (>=720p) - MV_COST_NONE // Use 0 as as cost irrespective of the current mv -} UENUM1BYTE(MV_COST_TYPE); - typedef struct { // The reference mv used to compute the mv cost const MV *ref_mv;
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c index ca271ad..e33b577 100644 --- a/av1/encoder/motion_search_facade.c +++ b/av1/encoder/motion_search_facade.c
@@ -72,7 +72,7 @@ backup_yv12[i] = xd->plane[i].pre[ref_idx]; } av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, - num_planes); + num_planes, &mbmi->chroma_ref_info); } // Work out the size of the first step in the mv step search. @@ -385,7 +385,7 @@ for (i = 0; i < num_planes; i++) backup_yv12[ref][i] = xd->plane[i].pre[ref]; av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, - NULL, num_planes); + NULL, num_planes, &mbmi->chroma_ref_info); } } @@ -532,7 +532,7 @@ const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, - num_planes); + num_planes, &mbmi->chroma_ref_info); } int bestsme = INT_MAX; @@ -764,12 +764,12 @@ int_mv best_mv; av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col, - get_ref_scale_factors(cm, ref), num_planes); + get_ref_scale_factors(cm, ref), num_planes, NULL); set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); if (scaled_ref_frame) { backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx]; av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, - num_planes); + num_planes, NULL); } // Allow more mesh searches for screen content type on the ARF. @@ -843,3 +843,120 @@ return best_mv; } + +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, + MACROBLOCK *const x, int mi_row, int mi_col, + BLOCK_SIZE bsize, const CHROMA_REF_INFO *chr_ref_info); +int_mv av1_simple_motion_search_ext(AV1_COMP *const cpi, + const TileInfo *const tile, MACROBLOCK *x, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int ref, FULLPEL_MV start_mv, + int num_planes, int use_subpixel, + SimpleMotionData *sms_data) { + assert(num_planes == 1 && + "Currently simple_motion_search only supports luma plane"); + assert(!frame_is_intra_only(&cpi->common) && + "Simple motion search only enabled for non-key frames"); + AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + + // TODO(debargha,chiyotsai): Can we use set_offsets_for_motion_search() + av1_set_offsets(cpi, tile, x, mi_row, mi_col, bsize, NULL); + // set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); + + MB_MODE_INFO *mbmi = xd->mi[0]; +#if CONFIG_SDP + mbmi->sb_type[0] = mbmi->sb_type[1] = bsize; +#else + mbmi->sb_type = bsize; +#endif // CONFIG_SDP + mbmi->ref_frame[0] = ref; + mbmi->ref_frame[1] = NONE_FRAME; + mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_REMOVE_DUAL_FILTER + mbmi->interp_fltr = EIGHTTAP_REGULAR; +#else + mbmi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR); +#endif // CONFIG_REMOVE_DUAL_FILTER + + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref); + const YV12_BUFFER_CONFIG *scaled_ref_frame = + av1_get_scaled_ref_frame(cpi, ref); + struct buf_2d backup_yv12; + // ref_mv is used to calculate the cost of the motion vector + const MV ref_mv = kZeroMv; + const int step_param = + AOMMIN(cpi->mv_search_params.mv_step_param + + cpi->sf.part_sf.simple_motion_search_reduce_search_steps, + MAX_MVSEARCH_STEPS - 2); + const search_site_config *src_search_sites = + cpi->mv_search_params.search_site_cfg[SS_CFG_SRC]; + int cost_list[5]; + const int ref_idx = 0; + int var; + int_mv best_mv; + + av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col, + get_ref_scale_factors(cm, ref), num_planes, NULL); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + if (scaled_ref_frame) { + backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx]; + av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, + num_planes, NULL); + } + + // Allow more mesh searches for screen content type on the ARF. + const int fine_search_interval = use_fine_search_interval(cpi); + sms_data->sadpb = x->mv_costs.sadperbit; + sms_data->errorperbit = x->mv_costs.errorperbit; + FULLPEL_MOTION_SEARCH_PARAMS full_ms_params; + av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv, + src_search_sites, fine_search_interval); + + var = av1_full_pixel_search(start_mv, &full_ms_params, step_param, + cond_cost_list(cpi, cost_list), + &best_mv.as_fullmv, NULL); + + sms_data->fullmv = best_mv.as_mv; + const int use_subpel_search = + var < INT_MAX && !cpi->common.features.cur_frame_force_integer_mv && + use_subpixel; + if (scaled_ref_frame) { + xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12; + } + if (use_subpel_search) { + int not_used = 0; + + SUBPEL_MOTION_SEARCH_PARAMS ms_params; + av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, + cost_list); + // TODO(yunqing): integrate this into av1_make_default_subpel_ms_params(). + ms_params.forced_stop = cpi->sf.mv_sf.simple_motion_subpel_force_stop; + + MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + + cpi->mv_search_params.find_fractional_mv_step( + xd, cm, &ms_params, subpel_start_mv, &best_mv.as_mv, ¬_used, + &x->pred_sse[ref], NULL); + } else { + // Manually convert from units of pixel to 1/8-pixels if we are not doing + // subpel search + convert_fullmv_to_mv(&best_mv); + } + sms_data->submv = best_mv.as_mv; + mbmi->mv[0] = best_mv; + + // Get a copy of the prediction output + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, + AOM_PLANE_Y, AOM_PLANE_Y); + + aom_clear_system_state(); + + if (scaled_ref_frame) { + xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12; + } + + return best_mv; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h index e631c4e..c671211 100644 --- a/av1/encoder/motion_search_facade.h +++ b/av1/encoder/motion_search_facade.h
@@ -70,7 +70,14 @@ int mi_row, int mi_col, BLOCK_SIZE bsize, const FULLPEL_MV start_mv, int use_subpixel, unsigned int *sse, unsigned int *var); - +#if CONFIG_EXT_RECUR_PARTITIONS +int_mv av1_simple_motion_search_ext(AV1_COMP *const cpi, + const TileInfo *const tile, MACROBLOCK *x, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int ref, FULLPEL_MV start_mv, + int num_planes, int use_subpixel, + SimpleMotionData *sms_data); +#endif // CONFIG_EXT_RECUR_PARTITIONS #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/mv_prec.c b/av1/encoder/mv_prec.c index a923533..50d7c40 100644 --- a/av1/encoder/mv_prec.c +++ b/av1/encoder/mv_prec.c
@@ -276,74 +276,126 @@ } // Split block +#if CONFIG_EXT_RECUR_PARTITIONS +static AOM_INLINE void collect_mv_stats_sb(MV_STATS *mv_stats, + const AV1_COMP *cpi, int mi_row, + int mi_col, BLOCK_SIZE bsize, + PARTITION_TREE *ptree) { +#else static AOM_INLINE void collect_mv_stats_sb(MV_STATS *mv_stats, const AV1_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize) { +#endif // EXT_RECUR_PARTITIONS assert(bsize < BLOCK_SIZES_ALL); const AV1_COMMON *cm = &cpi->common; if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) return; -#if CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS + const PARTITION_TYPE partition = ptree->partition; +#elif CONFIG_SDP const PARTITION_TYPE partition = get_partition(cm, SHARED_PART, mi_row, mi_col, bsize); #else - const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); +const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); #endif + const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); - const int hbs = mi_size_wide[bsize] / 2; - const int qbs = mi_size_wide[bsize] / 4; + const int hbs_w = mi_size_wide[bsize] / 2; + const int hbs_h = mi_size_high[bsize] / 2; + const int qbs_w = mi_size_wide[bsize] / 4; + const int qbs_h = mi_size_high[bsize] / 4; switch (partition) { case PARTITION_NONE: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); break; case PARTITION_HORZ: +#if CONFIG_EXT_RECUR_PARTITIONS + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + hbs_h, mi_col, subsize, + ptree->sub_tree[1]); +#else collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row + hbs, mi_col); + collect_mv_stats_b(mv_stats, cpi, mi_row + hbs_h, mi_col); +#endif // CONFIG_EXT_RECUR_PARTITIONS break; case PARTITION_VERT: +#if CONFIG_EXT_RECUR_PARTITIONS + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + hbs_w, subsize, + ptree->sub_tree[1]); +#else collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs); + collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs_w); +#endif // CONFIG_EXT_RECUR_PARTITIONS break; +#if !CONFIG_EXT_RECUR_PARTITIONS case PARTITION_SPLIT: collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize); - collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + hbs, subsize); - collect_mv_stats_sb(mv_stats, cpi, mi_row + hbs, mi_col, subsize); - collect_mv_stats_sb(mv_stats, cpi, mi_row + hbs, mi_col + hbs, subsize); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + hbs_w, subsize); + collect_mv_stats_sb(mv_stats, cpi, mi_row + hbs_h, mi_col, subsize); + collect_mv_stats_sb(mv_stats, cpi, mi_row + hbs_h, mi_col + hbs_w, + subsize); break; +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: { + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + qbs_h, mi_col, + get_partition_subsize(bsize, PARTITION_HORZ), + ptree->sub_tree[1]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + 3 * qbs_h, mi_col, subsize, + ptree->sub_tree[2]); + break; + } + case PARTITION_VERT_3: { + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + qbs_w, + get_partition_subsize(bsize, PARTITION_VERT), + ptree->sub_tree[1]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 3 * qbs_w, subsize, + ptree->sub_tree[2]); + break; + } +#else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_HORZ_A: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs); - collect_mv_stats_b(mv_stats, cpi, mi_row + hbs, mi_col); + collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs_w); + collect_mv_stats_b(mv_stats, cpi, mi_row + hbs_h, mi_col); break; case PARTITION_HORZ_B: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row + hbs, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row + hbs, mi_col + hbs); + collect_mv_stats_b(mv_stats, cpi, mi_row + hbs_h, mi_col); + collect_mv_stats_b(mv_stats, cpi, mi_row + hbs_h, mi_col + hbs_w); break; case PARTITION_VERT_A: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row + hbs, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs); + collect_mv_stats_b(mv_stats, cpi, mi_row + hbs_h, mi_col); + collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs_w); break; case PARTITION_VERT_B: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); - collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs); - collect_mv_stats_b(mv_stats, cpi, mi_row + hbs, mi_col + hbs); + collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col + hbs_w); + collect_mv_stats_b(mv_stats, cpi, mi_row + hbs_h, mi_col + hbs_w); break; case PARTITION_HORZ_4: for (int i = 0; i < 4; ++i) { - const int this_mi_row = mi_row + i * qbs; + const int this_mi_row = mi_row + i * qbs_h; collect_mv_stats_b(mv_stats, cpi, this_mi_row, mi_col); } break; case PARTITION_VERT_4: for (int i = 0; i < 4; ++i) { - const int this_mi_col = mi_col + i * qbs; + const int this_mi_col = mi_col + i * qbs_w; collect_mv_stats_b(mv_stats, cpi, mi_row, this_mi_col); } break; +#endif // CONFIG_EXT_RECUR_PARTITIONS default: assert(0); } } @@ -360,7 +412,18 @@ BLOCK_SIZE sb_size = cm->seq_params.sb_size; for (int mi_row = mi_row_start; mi_row < mi_row_end; mi_row += sb_size_mi) { for (int mi_col = mi_col_start; mi_col < mi_col_end; mi_col += sb_size_mi) { +#if CONFIG_EXT_RECUR_PARTITIONS + const SB_INFO *sb_info = av1_get_sb_info(cm, mi_row, mi_col); +#if CONFIG_SDP + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, sb_size, + sb_info->ptree_root[0]); +#else + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, sb_size, + sb_info->ptree_root); +#endif // CONFIG_SDP +#else collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, sb_size); +#endif // EXT_RECUR_PARTITIONS } } }
diff --git a/av1/encoder/palette.c b/av1/encoder/palette.c index fb483d0..e6af1dc 100644 --- a/av1/encoder/palette.c +++ b/av1/encoder/palette.c
@@ -792,7 +792,7 @@ } } - av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); + av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, *best_rd); if (tokenonly_rd_stats.rate == INT_MAX) continue; this_rate = tokenonly_rd_stats.rate + intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost);
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c index 947dca1..7ca6495 100644 --- a/av1/encoder/partition_search.c +++ b/av1/encoder/partition_search.c
@@ -12,11 +12,13 @@ #include "aom_ports/system_state.h" #include "av1/common/blockd.h" +#include "av1/common/common_data.h" #include "av1/common/enums.h" #include "av1/common/reconintra.h" #include "av1/encoder/aq_complexity.h" #include "av1/encoder/aq_variance.h" +#include "av1/encoder/block.h" #include "av1/encoder/context_tree.h" #include "av1/encoder/encoder.h" #include "av1/encoder/encodeframe.h" @@ -24,6 +26,7 @@ #include "av1/encoder/encodemv.h" #include "av1/encoder/motion_search_facade.h" #include "av1/encoder/partition_search.h" +#include "av1/encoder/partition_strategy.h" #include "av1/encoder/reconinter_enc.h" #include "av1/encoder/tokenize.h" @@ -383,7 +386,7 @@ for (int plane = plane_start; plane < plane_end; ++plane) { #else for (int plane = 0; plane < num_planes; ++plane) { -#endif +#endif // CONFIG_SDP av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run, cpi->optimize_seg_arr[mbmi->segment_id]); } @@ -435,7 +438,8 @@ assert(IMPLIES(!is_intrabc_block(mbmi), cfg)); #endif av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, - xd->block_ref_scale_factors[ref], num_planes); + xd->block_ref_scale_factors[ref], num_planes, + &mbmi->chroma_ref_info); } int start_plane = 0; av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, @@ -452,9 +456,7 @@ int pixel_c, pixel_r; mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, pd->subsampling_x, pd->subsampling_y); - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; + if (plane && !xd->is_chroma_ref) continue; mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, cm->current_frame.order_hint, plane, pixel_c, pixel_r, pd->width, pd->height, @@ -465,7 +467,7 @@ (void)num_planes; #endif - av1_encode_sb(cpi, x, bsize, dry_run); + av1_encode_sb(cpi, x, dry_run); av1_tokenize_sb_vartx(cpi, td, dry_run, bsize, rate, tile_data->allow_update_cdf); } @@ -616,11 +618,13 @@ } } #endif + + av1_mark_block_as_coded(xd, bsize, cm->seq_params.sb_size); } -static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, - int mi_row, int mi_col, BLOCK_SIZE bsize, - AQ_MODE aq_mode, MB_MODE_INFO *mbmi) { +void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, + int mi_row, int mi_col, BLOCK_SIZE bsize, + AQ_MODE aq_mode, MB_MODE_INFO *mbmi) { x->rdmult = cpi->rd.RDMULT; #if CONFIG_SDP MACROBLOCKD *const xd = &x->e_mbd; @@ -667,7 +671,8 @@ void av1_set_offsets_without_segment_id(const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, - int mi_col, BLOCK_SIZE bsize) { + int mi_col, BLOCK_SIZE bsize, + const CHROMA_REF_INFO *chr_ref_info) { const AV1_COMMON *const cm = &cpi->common; const int num_planes = av1_num_planes(cm); MACROBLOCKD *const xd = &x->e_mbd; @@ -678,29 +683,32 @@ set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, mi_row, mi_col); - set_entropy_context(xd, mi_row, mi_col, num_planes); + set_entropy_context(xd, mi_row, mi_col, num_planes, chr_ref_info); xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col; xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); // Set up destination pointers. - av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0, - num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, chr_ref_info); // Set up limit values for MV components. // Mv beyond the range do not produce new/different prediction block. av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height, mi_width, cpi->oxcf.border_in_pixels); - set_plane_n4(xd, mi_width, mi_height, num_planes); + set_plane_n4(xd, mi_width, mi_height, num_planes, chr_ref_info); // Set up distance of MB to edge of frame in 1/8th pel units. +#if !CONFIG_EXT_RECUR_PARTITIONS assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); +#endif // !CONFIG_EXT_RECUR_PARTITIONS set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, - cm->mi_params.mi_rows, cm->mi_params.mi_cols); + cm->mi_params.mi_rows, cm->mi_params.mi_cols, chr_ref_info); // Set up source buffers. - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); + av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, + chr_ref_info); // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs() xd->tile = *tile; @@ -708,13 +716,14 @@ void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, int mi_col, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, const CHROMA_REF_INFO *chr_ref_info) { const AV1_COMMON *const cm = &cpi->common; const struct segmentation *const seg = &cm->seg; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi; - av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); + av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize, + chr_ref_info); // Setup segment ID. mbmi = xd->mi[0]; @@ -780,9 +789,14 @@ const int num_planes = av1_num_planes(cm); MACROBLOCKD *const xd = &x->e_mbd; int plane_type = (xd->tree_type == CHROMA_PART); + assert(bsize < BLOCK_SIZES_ALL); + assert(IMPLIES(xd->tree_type == CHROMA_PART, + AOMMIN(block_size_wide[bsize], block_size_high[bsize]) > 4)); #endif + assert(is_bsize_geq(bsize, cpi->common.mi_params.mi_alloc_bsize)); - av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize); + av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize, + &ctx->chroma_ref_info); if (ctx->rd_mode_is_ready) { #if CONFIG_SDP @@ -824,6 +838,7 @@ mbmi->sb_type = bsize; #endif mbmi->partition = partition; + mbmi->chroma_ref_info = ctx->chroma_ref_info; #if CONFIG_RD_DEBUG mbmi->mi_row = mi_row; @@ -1440,13 +1455,15 @@ static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data, ThreadData *td, TokenExtra **tp, int mi_row, int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, - PARTITION_TYPE partition, PICK_MODE_CONTEXT *const ctx, - int *rate) { + PARTITION_TYPE partition, + const PICK_MODE_CONTEXT *const ctx, int *rate) { + const AV1_COMMON *const cm = &cpi->common; TileInfo *const tile = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *xd = &x->e_mbd; - av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); + av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize, + &ctx->chroma_ref_info); const int origin_mult = x->rdmult; setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); MB_MODE_INFO *mbmi = xd->mi[0]; @@ -1454,36 +1471,47 @@ av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run); #if CONFIG_SDP - int plane_type = (xd->tree_type == CHROMA_PART); -#endif + const int num_planes = av1_num_planes(cm); + const int plane_start = (xd->tree_type == CHROMA_PART); + const int plane_end = (xd->tree_type == LUMA_PART) ? 1 : num_planes; +#endif // CONFIG_SDP if (!dry_run) { #if CONFIG_SDP - x->mbmi_ext_frame->cb_offset[plane_type] = x->cb_offset[plane_type]; - assert(x->cb_offset[plane_type] < - (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size])); + for (int plane = plane_start; plane < plane_end; plane++) { + x->mbmi_ext_frame->cb_offset[plane] = x->cb_offset[plane]; + assert(x->cb_offset[plane] < + (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size])); + } #else - x->mbmi_ext_frame->cb_offset = x->cb_offset; - assert(x->cb_offset < + memcpy(x->mbmi_ext_frame->cb_offset, x->cb_offset, sizeof(x->cb_offset)); + assert(x->cb_offset[0] < (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size])); -#endif +#endif // CONFIG_SDP } encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate); if (!dry_run) { - const AV1_COMMON *const cm = &cpi->common; #if CONFIG_SDP - x->cb_offset[plane_type] += block_size_wide[bsize] * block_size_high[bsize]; + for (int plane = plane_start; plane < plane_end; ++plane) { #else - x->cb_offset += block_size_wide[bsize] * block_size_high[bsize]; -#endif + for (int plane = 0; plane < MAX_MB_PLANE; ++plane) { +#endif // CONFIG_SDP + if (plane == 0) { + x->cb_offset[plane] += block_size_wide[bsize] * block_size_high[bsize]; + } else if (xd->is_chroma_ref) { + const BLOCK_SIZE bsize_base = mbmi->chroma_ref_info.bsize_base; + x->cb_offset[plane] += + block_size_wide[bsize_base] * block_size_high[bsize_base]; + } + } #if CONFIG_SDP if (bsize == cpi->common.seq_params.sb_size && mbmi->skip_txfm[xd->tree_type == CHROMA_PART] == 1 && #else if (bsize == cpi->common.seq_params.sb_size && mbmi->skip_txfm == 1 && -#endif +#endif // CONFIG_SDP cm->delta_q_info.delta_lf_present_flag) { const int frame_lf_count = av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; @@ -1605,6 +1633,44 @@ x->rdmult = origin_mult; } +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +/*!\brief Reconstructs a partition (may contain multiple coding blocks) + * + * \ingroup partition_search + * Reconstructs a sub-partition of the superblock by applying the chosen modes + * and partition trees stored in pc_tree. + * + * \param[in] cpi Top-level encoder structure + * \param[in] td Pointer to thread data + * \param[in] tile_data Pointer to struct holding adaptive + * data/contexts/models for the tile during encoding + * \param[in] tp Pointer to the starting token + * \param[in] mi_row Row coordinate of the block in a step size of + * MI_SIZE + * \param[in] mi_col Column coordinate of the block in a step size of + * MI_SIZE + * \param[in] dry_run A code indicating whether it is part of the final + * pass for reconstructing the superblock + * \param[in] bsize Current block size + * \param[in] pc_tree Pointer to the PC_TREE node storing the picked + * partitions and mode info for the current block + * \param[in] ptree Pointer to the PARTITION_TREE node holding the + * partition info for the current node and all of its + * descendants. + * \param[in] ptree_luma Pointer to the luma partition tree so that the + * encoder to estimate the + * partition type for chroma. + * \param[in] rate Pointer to the total rate for the current block + * + * \return Nothing is returned. Instead, reconstructions (w/o in-loop filters) + * will be updated in the pixel buffers in td->mb.e_mbd. + */ +static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, int mi_row, + int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, + const PC_TREE *pc_tree, PARTITION_TREE *ptree, + PARTITION_TREE *ptree_luma, int *rate) { +#else /*!\brief Reconstructs a partition (may contain multiple coding blocks) * * \ingroup partition_search @@ -1624,6 +1690,9 @@ * \param[in] bsize Current block size * \param[in] pc_tree Pointer to the PC_TREE node storing the picked * partitions and mode info for the current block + * \param[in] ptree Pointer to the PARTITION_TREE node holding the + * partition info for the current node and all of its + * descendants. * \param[in] rate Pointer to the total rate for the current block * * \return Nothing is returned. Instead, reconstructions (w/o in-loop filters) @@ -1632,133 +1701,325 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, TokenExtra **tp, int mi_row, int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, - PC_TREE *pc_tree, int *rate) { + const PC_TREE *pc_tree, PARTITION_TREE *ptree, + int *rate) { +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS assert(bsize < BLOCK_SIZES_ALL); const AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; + + if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; assert(bsize < BLOCK_SIZES_ALL); - const int hbs = mi_size_wide[bsize] / 2; - const int is_partition_root = bsize >= BLOCK_8X8; + const int hbs_w = mi_size_wide[bsize] / 2; + const int hbs_h = mi_size_high[bsize] / 2; + const int qbs_w = mi_size_wide[bsize] / 4; + const int qbs_h = mi_size_high[bsize] / 4; + const int is_partition_root = is_partition_point(bsize); const int ctx = is_partition_root ? partition_plane_context(xd, mi_row, mi_col, bsize) : -1; const PARTITION_TYPE partition = pc_tree->partitioning; const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); - int quarter_step = mi_size_wide[bsize] / 4; - int i; - BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); +#if !CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); +#endif // !CONFIG_EXT_RECUR_PARTITIONS - if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; if (subsize == BLOCK_INVALID) return; +#if CONFIG_EXT_RECUR_PARTITIONS + assert(partition != PARTITION_SPLIT); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (!dry_run && ctx >= 0) { - const int has_rows = (mi_row + hbs) < mi_params->mi_rows; - const int has_cols = (mi_col + hbs) < mi_params->mi_cols; + const int has_rows = (mi_row + hbs_h) < mi_params->mi_rows; + const int has_cols = (mi_col + hbs_w) < mi_params->mi_cols; #if CONFIG_SDP const int plane_index = xd->tree_type == CHROMA_PART; -#endif - - if (has_rows && has_cols) { +#endif // CONFIG_SDP +#if CONFIG_EXT_RECUR_PARTITIONS + if (is_square_block(bsize)) { +#endif // CONFIG_EXT_RECUR_PARTITIONS + if (has_rows && has_cols) { #if CONFIG_ENTROPY_STATS - td->counts->partition[ctx][partition]++; + td->counts->partition[ctx][partition]++; #endif - - if (tile_data->allow_update_cdf) { - FRAME_CONTEXT *fc = xd->tile_ctx; + if (tile_data->allow_update_cdf) { + FRAME_CONTEXT *fc = xd->tile_ctx; #if CONFIG_SDP - int luma_split_flag = 0; - int parent_block_width = block_size_wide[bsize]; - if (xd->tree_type == CHROMA_PART && - parent_block_width >= SHARED_PART_SIZE) { - luma_split_flag = - get_luma_split_flag(bsize, mi_params, mi_row, mi_col); - } - if (luma_split_flag <= 3) { - update_cdf(fc->partition_cdf[plane_index][ctx], partition, - partition_cdf_length(bsize)); - } else { - // if luma blocks uses smaller blocks, then chroma will also split - assert(partition == PARTITION_SPLIT); - } + int parent_block_width = block_size_wide[bsize]; +#if CONFIG_EXT_RECUR_PARTITIONS + const int min_bsize_1d = + AOMMIN(block_size_high[bsize], parent_block_width); + if (xd->tree_type == CHROMA_PART && ptree_luma && + min_bsize_1d >= SHARED_PART_SIZE) { + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + PARTITION_TYPE derived_partition_mode = sdp_chroma_part_from_luma( + bsize, ptree_luma->partition, ss_x, ss_y); + if (partition != derived_partition_mode) + assert(0 && "Chroma partition does not match the derived mode."); + } else { + update_cdf(fc->partition_cdf[plane_index][ctx], partition, + partition_cdf_length(bsize)); + } +#else // CONFIG_EXT_RECUR_PARTITIONS + int luma_split_flag = 0; + if (xd->tree_type == CHROMA_PART && + parent_block_width >= SHARED_PART_SIZE) { + luma_split_flag = + get_luma_split_flag(bsize, mi_params, mi_row, mi_col); + } + if (luma_split_flag <= 3) { + update_cdf(fc->partition_cdf[plane_index][ctx], partition, + partition_cdf_length(bsize)); + } else { + // if luma blocks uses smaller blocks, then chroma will also split + assert(partition == PARTITION_SPLIT); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS #else update_cdf(fc->partition_cdf[ctx], partition, partition_cdf_length(bsize)); -#endif +#endif // CONFIG_SDP + } } +#if CONFIG_EXT_RECUR_PARTITIONS + } else { +#if CONFIG_SDP + int parent_block_width = block_size_wide[bsize]; + const int min_bsize_1d = + AOMMIN(block_size_high[bsize], parent_block_width); + if (xd->tree_type == CHROMA_PART && ptree_luma && + min_bsize_1d >= SHARED_PART_SIZE) { + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + PARTITION_TYPE derived_partition_mode = + sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y); + assert(partition == derived_partition_mode); + (void)derived_partition_mode; + } else { +#endif + const PARTITION_TYPE_REC p_rec = + get_symbol_from_partition_rec_block(bsize, partition); +#if CONFIG_ENTROPY_STATS + td->counts->partition_rec[ctx][p_rec]++; +#endif // CONFIG_ENTROPY_STATS + + if (tile_data->allow_update_cdf) { + FRAME_CONTEXT *fc = xd->tile_ctx; + update_cdf(fc->partition_rec_cdf[ctx], p_rec, + partition_rec_cdf_length(bsize)); + } +#if CONFIG_SDP + } +#endif } +#endif // CONFIG_EXT_RECUR_PARTITIONS } + PARTITION_TREE *sub_tree[4] = { NULL, NULL, NULL, NULL }; + if (!dry_run) { + assert(ptree); + + ptree->partition = partition; + ptree->bsize = bsize; + ptree->mi_row = mi_row; + ptree->mi_col = mi_col; + PARTITION_TREE *parent = ptree->parent; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + set_chroma_ref_info( + mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info, + parent ? &parent->chroma_ref_info : NULL, + parent ? parent->bsize : BLOCK_INVALID, + parent ? parent->partition : PARTITION_NONE, ss_x, ss_y); + + switch (partition) { + case PARTITION_SPLIT: + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2); + ptree->sub_tree[3] = av1_alloc_ptree_node(ptree, 3); + break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ: + case PARTITION_VERT: + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + break; + case PARTITION_HORZ_3: + case PARTITION_VERT_3: + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2); + break; +#endif // CONFIG_EXT_RECUR_PARTITIONS + default: break; + } + for (int i = 0; i < 4; ++i) sub_tree[i] = ptree->sub_tree[i]; + } + +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const int min_bsize_1d = + AOMMIN(block_size_high[bsize], block_size_wide[bsize]); + const int track_ptree_luma = xd->tree_type && ptree_luma && + ptree_luma->partition == partition && + min_bsize_1d >= SHARED_PART_SIZE; +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS switch (partition) { case PARTITION_NONE: encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, partition, pc_tree->none, rate); break; case PARTITION_VERT: +#if CONFIG_EXT_RECUR_PARTITIONS + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->vertical[0], sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + rate); + if (mi_col + hbs_w < cm->mi_params.mi_cols) { + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs_w, dry_run, + subsize, pc_tree->vertical[1], sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + rate); + } +#else // CONFIG_EXT_RECUR_PARTITIONS encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, partition, pc_tree->vertical[0], rate); - if (mi_col + hbs < mi_params->mi_cols) { - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, - partition, pc_tree->vertical[1], rate); + if (mi_col + hbs_w < mi_params->mi_cols) { + encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs_w, dry_run, + subsize, partition, pc_tree->vertical[1], rate); } +#endif // CONFIG_EXT_RECUR_PARTITIONS break; case PARTITION_HORZ: +#if CONFIG_EXT_RECUR_PARTITIONS + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->horizontal[0], sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + rate); + if (mi_row + hbs_h < cm->mi_params.mi_rows) { + encode_sb(cpi, td, tile_data, tp, mi_row + hbs_h, mi_col, dry_run, + subsize, pc_tree->horizontal[1], sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + rate); + } +#else // CONFIG_EXT_RECUR_PARTITIONS encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, partition, pc_tree->horizontal[0], rate); - if (mi_row + hbs < mi_params->mi_rows) { - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, - partition, pc_tree->horizontal[1], rate); + if (mi_row + hbs_h < mi_params->mi_rows) { + encode_b(cpi, tile_data, td, tp, mi_row + hbs_h, mi_col, dry_run, + subsize, partition, pc_tree->horizontal[1], rate); } +#endif // CONFIG_EXT_RECUR_PARTITIONS break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: { + const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->horizontal3[0], sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + rate); + if (mi_row + qbs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + qbs_h, mi_col, dry_run, bsize3, + pc_tree->horizontal3[1], sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + rate); + if (mi_row + 3 * qbs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + 3 * qbs_h, mi_col, dry_run, + subsize, pc_tree->horizontal3[2], sub_tree[2], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, +#endif // CONFIG_SDP + rate); + break; + } + case PARTITION_VERT_3: { + const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->vertical3[0], sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, +#endif // CONFIG_SDP + rate); + if (mi_col + qbs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + qbs_w, dry_run, bsize3, + pc_tree->vertical3[1], sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, +#endif // CONFIG_SDP + rate); + if (mi_col + 3 * qbs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 3 * qbs_w, dry_run, + subsize, pc_tree->vertical3[2], sub_tree[2], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, +#endif // CONFIG_SDP + rate); + break; + } +#else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_SPLIT: encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, - pc_tree->split[0], rate); - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize, - pc_tree->split[1], rate); - encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize, - pc_tree->split[2], rate); - encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run, - subsize, pc_tree->split[3], rate); + pc_tree->split[0], sub_tree[0], rate); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs_w, dry_run, + subsize, pc_tree->split[1], sub_tree[1], rate); + encode_sb(cpi, td, tile_data, tp, mi_row + hbs_h, mi_col, dry_run, + subsize, pc_tree->split[2], sub_tree[2], rate); + encode_sb(cpi, td, tile_data, tp, mi_row + hbs_h, mi_col + hbs_w, dry_run, + subsize, pc_tree->split[3], sub_tree[3], rate); break; - case PARTITION_HORZ_A: encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, partition, pc_tree->horizontala[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, + encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs_w, dry_run, bsize2, partition, pc_tree->horizontala[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, + encode_b(cpi, tile_data, td, tp, mi_row + hbs_h, mi_col, dry_run, subsize, partition, pc_tree->horizontala[2], rate); break; case PARTITION_HORZ_B: encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, partition, pc_tree->horizontalb[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, + encode_b(cpi, tile_data, td, tp, mi_row + hbs_h, mi_col, dry_run, bsize2, partition, pc_tree->horizontalb[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, + encode_b(cpi, tile_data, td, tp, mi_row + hbs_h, mi_col + hbs_w, dry_run, bsize2, partition, pc_tree->horizontalb[2], rate); break; case PARTITION_VERT_A: encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, partition, pc_tree->verticala[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, + encode_b(cpi, tile_data, td, tp, mi_row + hbs_h, mi_col, dry_run, bsize2, partition, pc_tree->verticala[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, + encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs_w, dry_run, subsize, partition, pc_tree->verticala[2], rate); break; case PARTITION_VERT_B: encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, partition, pc_tree->verticalb[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, + encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs_w, dry_run, bsize2, partition, pc_tree->verticalb[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, + encode_b(cpi, tile_data, td, tp, mi_row + hbs_h, mi_col + hbs_w, dry_run, bsize2, partition, pc_tree->verticalb[2], rate); break; case PARTITION_HORZ_4: - for (i = 0; i < SUB_PARTITIONS_PART4; ++i) { - int this_mi_row = mi_row + i * quarter_step; + for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { + int this_mi_row = mi_row + i * qbs_h; if (i > 0 && this_mi_row >= mi_params->mi_rows) break; encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize, @@ -1766,19 +2027,87 @@ } break; case PARTITION_VERT_4: - for (i = 0; i < SUB_PARTITIONS_PART4; ++i) { - int this_mi_col = mi_col + i * quarter_step; + for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { + int this_mi_col = mi_col + i * qbs_w; if (i > 0 && this_mi_col >= mi_params->mi_cols) break; encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize, partition, pc_tree->vertical4[i], rate); } break; +#endif // CONFIG_EXT_RECUR_PARTITIONS default: assert(0 && "Invalid partition type."); break; } + if (ptree) ptree->is_settled = 1; update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); } +#if CONFIG_EXT_RECUR_PARTITIONS +static void build_one_split_tree(AV1_COMMON *const cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE final_bsize, + PARTITION_TREE *ptree) { + assert(block_size_high[bsize] == block_size_wide[bsize]); + if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) + return; + if (bsize == BLOCK_4X4 || bsize == final_bsize) { + ptree->partition = PARTITION_NONE; + return; + } + + const int hbs_w = mi_size_wide[bsize] >> 1; + const int hbs_h = mi_size_high[bsize] >> 1; + const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; + + ptree->partition = PARTITION_HORZ; + ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + + ptree->sub_tree[0]->partition = PARTITION_VERT; + ptree->sub_tree[0]->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[0]->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + + ptree->sub_tree[1]->partition = PARTITION_VERT; + ptree->sub_tree[1]->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); + ptree->sub_tree[1]->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); + + build_one_split_tree(cm, mi_row, mi_col, subsize, final_bsize, + ptree->sub_tree[0]->sub_tree[0]); + build_one_split_tree(cm, mi_row, mi_col + hbs_w, subsize, final_bsize, + ptree->sub_tree[0]->sub_tree[1]); + build_one_split_tree(cm, mi_row + hbs_h, mi_col, subsize, final_bsize, + ptree->sub_tree[1]->sub_tree[0]); + build_one_split_tree(cm, mi_row + hbs_h, mi_col + hbs_w, subsize, final_bsize, + ptree->sub_tree[1]->sub_tree[1]); +} + +void av1_build_partition_tree_fixed_partitioning(AV1_COMMON *const cm, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + PARTITION_TREE *ptree) { + const BLOCK_SIZE sb_size = cm->seq_params.sb_size; + + build_one_split_tree(cm, mi_row, mi_col, sb_size, bsize, ptree); +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + +static PARTITION_TYPE get_preset_partition(const AV1_COMMON *cm, +#if CONFIG_SDP + int plane_type, +#endif // CONFIG_SDP + int mi_row, int mi_col, + BLOCK_SIZE bsize, + PARTITION_TREE *ptree) { + if (ptree) return ptree->partition; + if (bsize >= BLOCK_8X8) { +#if CONFIG_SDP + return get_partition(cm, plane_type, mi_row, mi_col, bsize); +#else + return get_partition(cm, mi_row, mi_col, bsize); +#endif // CONFIG_SDP + } + return PARTITION_NONE; +} + /*!\brief AV1 block partition search (partition estimation and partial search). * * \ingroup partition_search @@ -1795,7 +2124,8 @@ blocks starting from the first pixel of the current block * \param[in] tp Pointer to the starting token -* \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE +* \param[in] mi_row Row coordinate of the block in a step size of +MI_SIZE * \param[in] mi_col Column coordinate of the block in a step size of MI_SIZE * \param[in] bsize Current block size @@ -1805,6 +2135,8 @@ * \param[in] do_recon Whether the reconstruction function needs to be run, either for finalizing a superblock or providing reference for future sub-partitions +* \param[in] ptree Pointer to the PARTITION_TREE node holding the +pre-calculated partition tree (if any) for the current block * \param[in] pc_tree Pointer to the PC_TREE node holding the picked partitions and mode info for the current block * @@ -1815,42 +2147,50 @@ void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MB_MODE_INFO **mib, TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, - int64_t *dist, int do_recon, PC_TREE *pc_tree) { + int64_t *dist, int do_recon, PARTITION_TREE *ptree, + PC_TREE *pc_tree) { AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; const int num_planes = av1_num_planes(cm); TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; const ModeCosts *mode_costs = &x->mode_costs; const int bs = mi_size_wide[bsize]; const int hbs = bs / 2; +#if CONFIG_EXT_RECUR_PARTITIONS + const int hbh = mi_size_high[bsize] / 2; + const int hbw = mi_size_wide[bsize] / 2; +#endif // CONFIG_EXT_RECUR_PARTITIONS const int pl = (bsize >= BLOCK_8X8) ? partition_plane_context(xd, mi_row, mi_col, bsize) : 0; - const PARTITION_TYPE partition = #if CONFIG_SDP - (bsize >= BLOCK_8X8) ? get_partition(cm, xd->tree_type == CHROMA_PART, - mi_row, mi_col, bsize) + const int plane_type = (xd->tree_type == CHROMA_PART); + const PARTITION_TYPE partition = + get_preset_partition(cm, plane_type, mi_row, mi_col, bsize, ptree); #else - (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) -#endif - : PARTITION_NONE; + const PARTITION_TYPE partition = + get_preset_partition(cm, mi_row, mi_col, bsize, ptree); +#endif // CONFIG_SDP const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; RD_STATS last_part_rdc, invalid_rdc; -#if CONFIG_SDP - int plane_type = (xd->tree_type == CHROMA_PART); -#endif if (pc_tree->none == NULL) { - pc_tree->none = av1_alloc_pmc(cm, bsize, &td->shared_coeff_buf); + pc_tree->none = + av1_alloc_pmc(cm, mi_row, mi_col, bsize, pc_tree, PARTITION_NONE, 0, + ss_x, ss_y, &td->shared_coeff_buf); } PICK_MODE_CONTEXT *ctx_none = pc_tree->none; if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; +#if !CONFIG_EXT_RECUR_PARTITIONS assert(mi_size_wide[bsize] == mi_size_high[bsize]); +#endif // !CONFIG_EXT_RECUR_PARTITIONS av1_invalid_rd_stats(&last_part_rdc); av1_invalid_rd_stats(&invalid_rdc); @@ -1864,7 +2204,8 @@ av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); if (bsize == BLOCK_16X16 && cpi->vaq_refresh) { - av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); + av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize, + &pc_tree->chroma_ref_info); x->mb_energy = av1_log_block_var(cpi, x, bsize); } @@ -1872,34 +2213,61 @@ const int orig_rdmult = x->rdmult; setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); +#if !CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE split_subsize = + get_partition_subsize(bsize, PARTITION_SPLIT); for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { - pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); - pc_tree->split[i]->index = i; + int x_idx = (i & 1) * hbs; + int y_idx = (i >> 1) * hbs; + pc_tree->split[i] = + av1_alloc_pc_tree_node(mi_row + y_idx, mi_col + x_idx, split_subsize, + pc_tree, PARTITION_SPLIT, i, i == 3, ss_x, ss_y); } +#endif // !CONFIG_EXT_RECUR_PARTITIONS switch (partition) { case PARTITION_NONE: pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, PARTITION_NONE, bsize, ctx_none, invalid_rdc); break; case PARTITION_HORZ: +#if CONFIG_EXT_RECUR_PARTITIONS + pc_tree->horizontal[0] = av1_alloc_pc_tree_node( + mi_row, mi_col, subsize, pc_tree, PARTITION_HORZ, 0, 0, ss_x, ss_y); + pc_tree->horizontal[1] = + av1_alloc_pc_tree_node(mi_row + hbh, mi_col, subsize, pc_tree, + PARTITION_HORZ, 1, 1, ss_x, ss_y); + av1_rd_use_partition(cpi, td, tile_data, mib, tp, mi_row, mi_col, subsize, + &last_part_rdc.rate, &last_part_rdc.dist, 1, + ptree ? ptree->sub_tree[0] : NULL, + pc_tree->horizontal[0]); +#else // CONFIG_EXT_RECUR_PARTITIONS for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { pc_tree->horizontal[i] = - av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf); + av1_alloc_pmc(cm, mi_row + hbs * i, mi_col, subsize, pc_tree, + PARTITION_HORZ, i, ss_x, ss_y, &td->shared_coeff_buf); } pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, PARTITION_HORZ, subsize, pc_tree->horizontal[0], invalid_rdc); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + hbs < mi_params->mi_rows) { RD_STATS tmp_rdc; - const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0]; av1_init_rd_stats(&tmp_rdc); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_rd_use_partition( + cpi, td, tile_data, mib + hbh * mi_params->mi_stride, tp, + mi_row + hbh, mi_col, subsize, &tmp_rdc.rate, &tmp_rdc.dist, 0, + ptree ? ptree->sub_tree[1] : NULL, pc_tree->horizontal[1]); +#else // CONFIG_EXT_RECUR_PARTITIONS + const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0]; av1_update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1); encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL); pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, PARTITION_HORZ, subsize, pc_tree->horizontal[1], invalid_rdc); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { av1_invalid_rd_stats(&last_part_rdc); break; @@ -1910,23 +2278,43 @@ } break; case PARTITION_VERT: +#if CONFIG_EXT_RECUR_PARTITIONS + pc_tree->vertical[0] = av1_alloc_pc_tree_node( + mi_row, mi_col, subsize, pc_tree, PARTITION_VERT, 0, 0, ss_x, ss_y); + pc_tree->vertical[1] = + av1_alloc_pc_tree_node(mi_row, mi_col + hbw, subsize, pc_tree, + PARTITION_VERT, 1, 1, ss_x, ss_y); + av1_rd_use_partition(cpi, td, tile_data, mib, tp, mi_row, mi_col, subsize, + &last_part_rdc.rate, &last_part_rdc.dist, 1, + ptree ? ptree->sub_tree[0] : NULL, + pc_tree->vertical[0]); +#else // CONFIG_EXT_RECUR_PARTITIONS for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { pc_tree->vertical[i] = - av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf); + av1_alloc_pmc(cm, mi_row, mi_col + hbs * i, subsize, pc_tree, + PARTITION_VERT, i, ss_x, ss_y, &td->shared_coeff_buf); } pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, PARTITION_VERT, subsize, pc_tree->vertical[0], invalid_rdc); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + hbs < mi_params->mi_cols) { RD_STATS tmp_rdc; - const PICK_MODE_CONTEXT *const ctx_v = pc_tree->vertical[0]; av1_init_rd_stats(&tmp_rdc); +#if CONFIG_EXT_RECUR_PARTITIONS + av1_rd_use_partition( + cpi, td, tile_data, mib + hbw, tp, mi_row, mi_col + hbw, subsize, + &tmp_rdc.rate, &tmp_rdc.dist, 0, ptree ? ptree->sub_tree[1] : NULL, + pc_tree->vertical[1]); +#else // CONFIG_EXT_RECUR_PARTITIONS + const PICK_MODE_CONTEXT *const ctx_v = pc_tree->vertical[0]; av1_update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1); encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL); pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, PARTITION_VERT, subsize, pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { av1_invalid_rd_stats(&last_part_rdc); break; @@ -1936,6 +2324,10 @@ last_part_rdc.rdcost += tmp_rdc.rdcost; } break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: + case PARTITION_VERT_3: +#else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_SPLIT: last_part_rdc.rate = 0; last_part_rdc.dist = 0; @@ -1950,11 +2342,12 @@ continue; av1_init_rd_stats(&tmp_rdc); - av1_rd_use_partition( - cpi, td, tile_data, - mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp, - mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, - &tmp_rdc.dist, i != (SUB_PARTITIONS_SPLIT - 1), pc_tree->split[i]); + av1_rd_use_partition(cpi, td, tile_data, + mib + jj * hbs * mi_params->mi_stride + ii * hbs, + tp, mi_row + y_idx, mi_col + x_idx, subsize, + &tmp_rdc.rate, &tmp_rdc.dist, + i != (SUB_PARTITIONS_SPLIT - 1), NULL, + pc_tree->split[i]); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { av1_invalid_rd_stats(&last_part_rdc); break; @@ -1969,6 +2362,7 @@ case PARTITION_HORZ_B: case PARTITION_HORZ_4: case PARTITION_VERT_4: +#endif // CONFIG_EXT_RECUR_PARTITIONS assert(0 && "Cannot handle extended partition types"); default: assert(0); break; } @@ -1992,7 +2386,7 @@ #endif if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; - av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); + av1_restore_context(cm, x, &x_ctx, mi_row, mi_col, bsize, num_planes); // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. @@ -2006,15 +2400,31 @@ // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, // bsize, pc_tree, &rate_coeffs); #if CONFIG_SDP - x->cb_offset[plane_type] = 0; -#else - x->cb_offset = 0; -#endif + const int plane_start = (xd->tree_type == CHROMA_PART); + const int plane_end = (xd->tree_type == LUMA_PART) ? 1 : num_planes; + for (int plane = plane_start; plane < plane_end; plane++) { + x->cb_offset[plane] = 0; + } + av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, - pc_tree, NULL); + pc_tree, xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#if CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_EXT_RECUR_PARTITIONS + NULL); +#else + memset(x->cb_offset, 0, sizeof(x->cb_offset)); + av1_reset_ptree_in_sbi(xd->sbi); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, + pc_tree, xd->sbi->ptree_root, NULL); +#endif // CONFIG_SDP } else { encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, - pc_tree, NULL); + pc_tree, NULL, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + NULL); } } @@ -2023,6 +2433,7 @@ x->rdmult = orig_rdmult; } +#if !CONFIG_EXT_RECUR_PARTITIONS // Try searching for an encoding for the given subblock. Returns zero if the // rdcost is already too high (to tell the caller not to bother searching for // encodings of further subblocks). @@ -2106,29 +2517,71 @@ pc_tree->partitioning = partition; return true; } +#endif // !CONFIG_EXT_RECUR_PARTITIONS + +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE int check_is_chroma_size_valid(PARTITION_TYPE partition, + BLOCK_SIZE bsize, int mi_row, + int mi_col, int ss_x, int ss_y, + const PC_TREE *pc_tree) { + const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); + int is_valid = 0; + if (subsize < BLOCK_SIZES_ALL) { + CHROMA_REF_INFO tmp_chr_ref_info = { + 1, 0, mi_row, mi_col, subsize, subsize + }; + set_chroma_ref_info(mi_row, mi_col, 0, subsize, &tmp_chr_ref_info, + &pc_tree->chroma_ref_info, bsize, partition, ss_x, + ss_y); + is_valid = get_plane_block_size(tmp_chr_ref_info.bsize_base, ss_x, ss_y) != + BLOCK_INVALID; + } + return is_valid; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS // Initialize state variables of partition search used in // av1_rd_pick_partition(). static void init_partition_search_state_params( MACROBLOCK *x, AV1_COMP *const cpi, PartitionSearchState *part_search_state, +#if CONFIG_EXT_RECUR_PARTITIONS + PC_TREE *pc_tree, +#endif // CONFIG_EXT_RECUR_PARTITIONS int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams *blk_params = &part_search_state->part_blk_params; const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; + assert(bsize < BLOCK_SIZES_ALL); + // Initialization of block size related parameters. blk_params->mi_step = mi_size_wide[bsize] / 2; +#if CONFIG_EXT_RECUR_PARTITIONS + blk_params->mi_step_h = mi_size_high[bsize] / 2; + blk_params->mi_step_w = mi_size_wide[bsize] / 2; +#endif // CONFIG_EXT_RECUR_PARTITIONS blk_params->mi_row = mi_row; blk_params->mi_col = mi_col; +#if CONFIG_EXT_RECUR_PARTITIONS + blk_params->mi_row_edge = mi_row + blk_params->mi_step_h; + blk_params->mi_col_edge = mi_col + blk_params->mi_step_w; +#else // CONFIG_EXT_RECUR_PARTITIONS blk_params->mi_row_edge = mi_row + blk_params->mi_step; blk_params->mi_col_edge = mi_col + blk_params->mi_step; +#endif // CONFIG_EXT_RECUR_PARTITIONS blk_params->width = block_size_wide[bsize]; +#if CONFIG_EXT_RECUR_PARTITIONS + blk_params->min_partition_size = x->sb_enc.min_partition_size; +#else blk_params->min_partition_size_1d = block_size_wide[x->sb_enc.min_partition_size]; +#endif // CONFIG_EXT_RECUR_PARTITIONS blk_params->subsize = get_partition_subsize(bsize, PARTITION_SPLIT); blk_params->split_bsize2 = blk_params->subsize; +#if !CONFIG_EXT_RECUR_PARTITIONS blk_params->bsize_at_least_8x8 = (bsize >= BLOCK_8X8); +#endif // !CONFIG_EXT_RECUR_PARTITIONS blk_params->bsize = bsize; // Check if the partition corresponds to edge block. @@ -2145,17 +2598,42 @@ // Set partition plane context index. part_search_state->pl_ctx_idx = +#if CONFIG_EXT_RECUR_PARTITIONS + is_partition_point(bsize) +#else blk_params->bsize_at_least_8x8 +#endif // CONFIG_EXT_RECUR_PARTITIONS ? partition_plane_context(xd, mi_row, mi_col, bsize) : 0; // Partition cost buffer update ModeCosts *mode_costs = &x->mode_costs; +#if CONFIG_EXT_RECUR_PARTITIONS + const int pl = part_search_state->pl_ctx_idx; + if (is_square_block(bsize)) { #if CONFIG_SDP + part_search_state->partition_cost = + mode_costs->partition_cost[xd->tree_type == CHROMA_PART][pl]; +#else + part_search_state->partition_cost = mode_costs->partition_cost[pl]; +#endif // CONFIG_SDP + } else { + for (PARTITION_TYPE p = PARTITION_NONE; p < EXT_PARTITION_TYPES; ++p) { + PARTITION_TYPE_REC p_rec = get_symbol_from_partition_rec_block(bsize, p); + + if (p_rec != PARTITION_INVALID_REC) + part_search_state->partition_cost_table[p] = + mode_costs->partition_rec_cost[pl][p_rec]; + else + part_search_state->partition_cost_table[p] = INT_MAX; + } + part_search_state->partition_cost = part_search_state->partition_cost_table; + } +#elif CONFIG_SDP part_search_state->partition_cost = mode_costs->partition_cost[xd->tree_type == CHROMA_PART] [part_search_state->pl_ctx_idx]; -#else +#else // !CONFIG_EXT_RECUR_PARTITIONS && !CONFIG_SDP part_search_state->partition_cost = mode_costs->partition_cost[part_search_state->pl_ctx_idx]; #endif @@ -2185,55 +2663,144 @@ // Initialize partition search flags to defaults. part_search_state->terminate_partition_search = 0; + + av1_zero(part_search_state->prune_rect_part); + #if CONFIG_SDP +#if !CONFIG_EXT_RECUR_PARTITIONS part_search_state->do_square_split = blk_params->bsize_at_least_8x8 && (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8); part_search_state->do_rectangular_split = cpi->oxcf.part_cfg.enable_rect_partitions && (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8); +#else + part_search_state->do_rectangular_split = + cpi->oxcf.part_cfg.enable_rect_partitions && + (xd->tree_type != CHROMA_PART || is_bsize_gt(bsize, BLOCK_8X8)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS - av1_zero(part_search_state->prune_rect_part); + const BLOCK_SIZE horz_subsize = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE vert_subsize = get_partition_subsize(bsize, PARTITION_VERT); +#if CONFIG_EXT_RECUR_PARTITIONS + // TODO(chiyotsai,yuec@google.com): Fix the rect_allowed condition when both + // SDP and ERP are on. + const int is_horz_size_valid = + is_partition_valid(bsize, PARTITION_HORZ) && + IMPLIES(xd->tree_type == SHARED_PART, + check_is_chroma_size_valid(PARTITION_HORZ, bsize, mi_row, mi_col, + part_search_state->ss_x, + part_search_state->ss_y, pc_tree)); + + const int is_vert_size_valid = + is_partition_valid(bsize, PARTITION_VERT) && + IMPLIES(xd->tree_type == SHARED_PART, + check_is_chroma_size_valid(PARTITION_VERT, bsize, mi_row, mi_col, + part_search_state->ss_x, + part_search_state->ss_y, pc_tree)); +#else + const int is_horz_size_valid = + horz_subsize != BLOCK_INVALID && + get_plane_block_size(horz_subsize, part_search_state->ss_x, + part_search_state->ss_y) != BLOCK_INVALID; + const int is_vert_size_valid = + vert_subsize != BLOCK_INVALID && + get_plane_block_size(vert_subsize, part_search_state->ss_x, + part_search_state->ss_y) != BLOCK_INVALID; +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS + const bool no_sub_16_chroma_part = + xd->tree_type != CHROMA_PART || + (block_size_wide[bsize] > 8 && block_size_high[bsize] > 8); +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Initialize allowed partition types for the partition block. + part_search_state->is_block_splittable = is_partition_point(bsize); +#if CONFIG_EXT_RECUR_PARTITIONS + part_search_state->partition_none_allowed = + (xd->tree_type == CHROMA_PART && bsize == BLOCK_8X8) || + (blk_params->has_rows && blk_params->has_cols && + is_bsize_geq(blk_params->bsize, blk_params->min_partition_size)); +#else part_search_state->partition_none_allowed = blk_params->has_rows && blk_params->has_cols; +#endif // CONFIG_EXT_RECUR_PARTITIONS part_search_state->partition_rect_allowed[HORZ] = - blk_params->has_cols && blk_params->bsize_at_least_8x8 && - cpi->oxcf.part_cfg.enable_rect_partitions && - (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8) && - get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), - part_search_state->ss_x, - part_search_state->ss_y) != BLOCK_INVALID; - part_search_state->partition_rect_allowed[VERT] = - blk_params->has_rows && blk_params->bsize_at_least_8x8 && - (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8) && - cpi->oxcf.part_cfg.enable_rect_partitions && - get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), - part_search_state->ss_x, - part_search_state->ss_y) != BLOCK_INVALID; +#if CONFIG_EXT_RECUR_PARTITIONS + (blk_params->has_cols || !blk_params->has_rows) && #else + blk_params->has_cols && +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS + blk_params->bsize_at_least_8x8 && no_sub_16_chroma_part && +#endif // !CONFIG_EXT_RECUR_PARTITIONS + cpi->oxcf.part_cfg.enable_rect_partitions && +#if CONFIG_EXT_RECUR_PARTITIONS + is_bsize_geq(horz_subsize, blk_params->min_partition_size) && +#endif // CONFIG_EXT_RECUR_PARTITIONS + is_horz_size_valid; + part_search_state->partition_rect_allowed[VERT] = +#if CONFIG_EXT_RECUR_PARTITIONS + (blk_params->has_rows || !blk_params->has_cols) && +#else + blk_params->has_rows && +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS + blk_params->bsize_at_least_8x8 && no_sub_16_chroma_part && +#endif // !CONFIG_EXT_RECUR_PARTITIONS + cpi->oxcf.part_cfg.enable_rect_partitions && +#if CONFIG_EXT_RECUR_PARTITIONS + is_bsize_geq(vert_subsize, blk_params->min_partition_size) && +#endif // CONFIG_EXT_RECUR_PARTITIONS + is_vert_size_valid; +#else // !CONFIG_SDP +#if !CONFIG_EXT_RECUR_PARTITIONS part_search_state->do_square_split = blk_params->bsize_at_least_8x8; +#endif // !CONFIG_EXT_RECUR_PARTITIONS part_search_state->do_rectangular_split = cpi->oxcf.part_cfg.enable_rect_partitions; - av1_zero(part_search_state->prune_rect_part); // Initialize allowed partition types for the partition block. +#if CONFIG_EXT_RECUR_PARTITIONS + int is_chroma_size_valid_horz = check_is_chroma_size_valid( + PARTITION_HORZ, bsize, mi_row, mi_col, part_search_state->ss_x, + part_search_state->ss_y, pc_tree); + + int is_chroma_size_valid_vert = check_is_chroma_size_valid( + PARTITION_VERT, bsize, mi_row, mi_col, part_search_state->ss_x, + part_search_state->ss_y, pc_tree); +#endif // CONFIG_EXT_RECUR_PARTITIONS + + part_search_state->is_block_splittable = is_partition_point(bsize); part_search_state->partition_none_allowed = blk_params->has_rows && blk_params->has_cols; part_search_state->partition_rect_allowed[HORZ] = - blk_params->has_cols && blk_params->bsize_at_least_8x8 && - cpi->oxcf.part_cfg.enable_rect_partitions && +#if CONFIG_EXT_RECUR_PARTITIONS + (blk_params->has_cols || !blk_params->has_rows) && + is_partition_valid(bsize, PARTITION_HORZ) && is_chroma_size_valid_horz && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ), + blk_params->min_partition_size) && +#else // CONFIG_EXT_RECUR_PARTITIONS + blk_params->has_cols && is_partition_valid(bsize, PARTITION_HORZ) && get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), part_search_state->ss_x, - part_search_state->ss_y) != BLOCK_INVALID; + part_search_state->ss_y) != BLOCK_INVALID && +#endif // CONFIG_EXT_RECUR_PARTITIONS + cpi->oxcf.part_cfg.enable_rect_partitions; part_search_state->partition_rect_allowed[VERT] = - blk_params->has_rows && blk_params->bsize_at_least_8x8 && - cpi->oxcf.part_cfg.enable_rect_partitions && +#if CONFIG_EXT_RECUR_PARTITIONS + (blk_params->has_rows || !blk_params->has_cols) && + is_partition_valid(bsize, PARTITION_VERT) && is_chroma_size_valid_vert && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT), + blk_params->min_partition_size) && +#else // CONFIG_EXT_RECUR_PARTITIONS + blk_params->has_rows && is_partition_valid(bsize, PARTITION_VERT) && get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), part_search_state->ss_x, - part_search_state->ss_y) != BLOCK_INVALID; -#endif + part_search_state->ss_y) != BLOCK_INVALID && +#endif // CONFIG_EXT_RECUR_PARTITIONS + cpi->oxcf.part_cfg.enable_rect_partitions; +#endif // CONFIG_SDP // Reset the flag indicating whether a partition leading to a rdcost lower // than the bound best_rdc has been found. @@ -2247,8 +2814,29 @@ PartitionSearchState *part_search_state) { #else AV1_COMMON const *cm, PartitionSearchState *part_search_state) { -#endif +#endif // CONFIG_SDP PartitionBlkParams blk_params = part_search_state->part_blk_params; +#if CONFIG_EXT_RECUR_PARTITIONS + const int has_rows = blk_params.has_rows; + const int has_cols = blk_params.has_cols; + (void)cm; + if (!(has_rows && has_cols)) { + if (!has_rows && !has_cols) { + // At the bottom right, horz or vert + aom_cdf_prob binary_cdf[2] = { 16384, AOM_ICDF(CDF_PROB_TOP) }; + static const int binary_inv_map[2] = { PARTITION_HORZ, PARTITION_VERT }; + av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, + binary_cdf, binary_inv_map); + } else { + for (int i = 0; i < PARTITION_TYPES; ++i) + part_search_state->tmp_partition_cost[i] = 0; + } + part_search_state->partition_cost = part_search_state->tmp_partition_cost; + } +#if CONFIG_SDP + (void)xd; +#endif // CONFIG_SDP +#else // CONFIG_EXT_RECUR_PARTITIONS assert(blk_params.bsize_at_least_8x8 && part_search_state->pl_ctx_idx >= 0); #if CONFIG_SDP const int plane = xd->tree_type == CHROMA_PART; @@ -2281,36 +2869,108 @@ } // Override the partition cost buffer. part_search_state->partition_cost = part_search_state->tmp_partition_cost; +#endif // CONFIG_EXT_RECUR_PARTITIONS } // Reset the partition search state flags when // must_find_valid_partition is equal to 1. static AOM_INLINE void reset_part_limitations( - AV1_COMP *const cpi, PartitionSearchState *part_search_state) { + AV1_COMP *const cpi, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + TREE_TYPE tree_type, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_EXT_RECUR_PARTITIONS + PC_TREE *pc_tree, +#endif // CONFIG_EXT_RECUR_PARTITIONS + PartitionSearchState *part_search_state) { PartitionBlkParams blk_params = part_search_state->part_blk_params; - const int is_rect_part_allowed = - blk_params.bsize_at_least_8x8 && - cpi->oxcf.part_cfg.enable_rect_partitions && - (blk_params.width > blk_params.min_partition_size_1d); +#if !CONFIG_EXT_RECUR_PARTITIONS part_search_state->do_square_split = blk_params.bsize_at_least_8x8 && (blk_params.width > blk_params.min_partition_size_1d); +#endif // !CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + part_search_state->partition_none_allowed = + (tree_type == CHROMA_PART && blk_params.bsize == BLOCK_8X8) || + (blk_params.has_rows && blk_params.has_cols && + is_bsize_geq(blk_params.bsize, blk_params.min_partition_size)); +#else part_search_state->partition_none_allowed = blk_params.has_rows && blk_params.has_cols && +#if CONFIG_EXT_RECUR_PARTITIONS + is_bsize_geq(blk_params.bsize, blk_params.min_partition_size); +#else (blk_params.width >= blk_params.min_partition_size_1d); +#endif // CONFIG_EXT_RECUR_PARTITIONS +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + + // Initialize allowed partition types for the partition block. +#if CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE horz_subsize = + get_partition_subsize(blk_params.bsize, PARTITION_HORZ); + const BLOCK_SIZE vert_subsize = + get_partition_subsize(blk_params.bsize, PARTITION_VERT); + const int is_chroma_size_valid_horz = check_is_chroma_size_valid( + PARTITION_HORZ, blk_params.bsize, blk_params.mi_row, blk_params.mi_col, + part_search_state->ss_x, part_search_state->ss_y, pc_tree); + const int is_chroma_size_valid_vert = check_is_chroma_size_valid( + PARTITION_VERT, blk_params.bsize, blk_params.mi_row, blk_params.mi_col, + part_search_state->ss_x, part_search_state->ss_y, pc_tree); +#endif // CONFIG_EXT_RECUR_PARTITIONS part_search_state->partition_rect_allowed[HORZ] = - blk_params.has_cols && is_rect_part_allowed && +#if CONFIG_EXT_RECUR_PARTITIONS + (blk_params.has_cols || !blk_params.has_rows) && + is_partition_valid(blk_params.bsize, PARTITION_HORZ) && + is_chroma_size_valid_horz && + is_bsize_geq(horz_subsize, blk_params.min_partition_size) && +#else // CONFIG_EXT_RECUR_PARTITIONS + blk_params.has_cols && + is_partition_valid(blk_params.bsize, PARTITION_HORZ) && get_plane_block_size( get_partition_subsize(blk_params.bsize, PARTITION_HORZ), - part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID; + part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID && + (blk_params.width > blk_params.min_partition_size_1d) && +#endif // CONFIG_EXT_RECUR_PARTITIONS + cpi->oxcf.part_cfg.enable_rect_partitions; part_search_state->partition_rect_allowed[VERT] = - blk_params.has_rows && is_rect_part_allowed && +#if CONFIG_EXT_RECUR_PARTITIONS + (blk_params.has_rows || !blk_params.has_cols) && + is_partition_valid(blk_params.bsize, PARTITION_VERT) && + is_chroma_size_valid_vert && + is_bsize_geq(vert_subsize, blk_params.min_partition_size) && +#else // CONFIG_EXT_RECUR_PARTITIONS + blk_params.has_rows && + is_partition_valid(blk_params.bsize, PARTITION_VERT) && get_plane_block_size( get_partition_subsize(blk_params.bsize, PARTITION_VERT), - part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID; + part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID && + (blk_params.width > blk_params.min_partition_size_1d) && +#endif // CONFIG_EXT_RECUR_PARTITIONS + cpi->oxcf.part_cfg.enable_rect_partitions; part_search_state->terminate_partition_search = 0; +#if CONFIG_EXT_RECUR_PARTITIONS + if (!is_square_block(blk_params.bsize)) { + if (!part_search_state->partition_rect_allowed[HORZ] && + !part_search_state->partition_rect_allowed[VERT] && + !part_search_state->partition_none_allowed) { + if (block_size_wide[blk_params.bsize] > + block_size_high[blk_params.bsize]) { + if (is_bsize_geq(vert_subsize, blk_params.min_partition_size)) { + part_search_state->partition_rect_allowed[VERT] = 1; + } + } else { + if (is_bsize_geq(horz_subsize, blk_params.min_partition_size)) { + part_search_state->partition_rect_allowed[HORZ] = 1; + } + } + } + } +#endif // CONFIG_EXT_RECUR_PARTITIONS } +static const int rect_partition_type[NUM_RECT_PARTS] = { PARTITION_HORZ, + PARTITION_VERT }; +#if !CONFIG_EXT_RECUR_PARTITIONS // Rectangular partitions evaluation at sub-block level. static void rd_pick_rect_partition(AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x, @@ -2343,9 +3003,86 @@ part_search_state->rect_part_rd[rect_part][idx] = part_search_state->this_rdc.rdcost; } +#else +static void rd_pick_rect_partition( + AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, + TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree, + PartitionSearchState *part_search_state, RD_STATS *best_rdc, + RECT_PART_TYPE rect_type, + const int mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][2], + BLOCK_SIZE bsize, const int is_not_edge_block[NUM_RECT_PARTS], + SB_MULTI_PASS_MODE multi_pass_mode +#if CONFIG_SDP + , + const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree +#endif // CONFIG_SDP +) { + const PARTITION_TYPE partition_type = rect_partition_type[rect_type]; + RD_STATS *sum_rdc = &part_search_state->sum_rdc; + + sum_rdc->rate = part_search_state->partition_cost[partition_type]; + sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, 0); + + RD_STATS this_rdc; + RD_STATS best_remain_rdcost; + PC_TREE **sub_tree = + (rect_type == HORZ) ? pc_tree->horizontal : pc_tree->vertical; +#if CONFIG_SDP + const int track_ptree_luma = + ptree_luma && ptree_luma->partition == partition_type; +#endif // CONFIG_SDP + av1_rd_stats_subtraction(x->rdmult, best_rdc, sum_rdc, &best_remain_rdcost); + bool partition_found = av1_rd_pick_partition( + cpi, td, tile_data, tp, mi_pos_rect[rect_type][0][0], + mi_pos_rect[rect_type][0][1], bsize, &this_rdc, best_remain_rdcost, + sub_tree[0], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, + get_partition_subtree_const(template_tree, 0), +#endif // CONFIG_SDP + NULL, NULL, multi_pass_mode, NULL); + av1_rd_cost_update(x->rdmult, &this_rdc); + if (!partition_found) { + av1_invalid_rd_stats(sum_rdc); + return; + } else { + sum_rdc->rate += this_rdc.rate; + sum_rdc->dist += this_rdc.dist; + av1_rd_cost_update(x->rdmult, sum_rdc); + } + part_search_state->rect_part_rd[rect_type][0] = this_rdc.rdcost; + + if (sum_rdc->rdcost < best_rdc->rdcost && is_not_edge_block[rect_type]) { + av1_rd_stats_subtraction(x->rdmult, best_rdc, sum_rdc, &best_remain_rdcost); + partition_found = av1_rd_pick_partition( + cpi, td, tile_data, tp, mi_pos_rect[rect_type][1][0], + mi_pos_rect[rect_type][1][1], bsize, &this_rdc, best_remain_rdcost, + sub_tree[1], +#if CONFIG_SDP + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, + get_partition_subtree_const(template_tree, 1), +#endif // CONFIG_SDP + NULL, NULL, multi_pass_mode, NULL); + av1_rd_cost_update(x->rdmult, &this_rdc); + part_search_state->rect_part_rd[rect_type][1] = this_rdc.rdcost; + + if (!partition_found) { + av1_invalid_rd_stats(sum_rdc); + return; + } else { + sum_rdc->rate += this_rdc.rate; + sum_rdc->dist += this_rdc.dist; + av1_rd_cost_update(x->rdmult, sum_rdc); + } + } +} +#endif typedef int (*active_edge_info)(const AV1_COMP *cpi, int mi_col, int mi_step); +#define IS_FORCED_PARTITION_TYPE(cur_partition) \ + (forced_partition == PARTITION_INVALID || forced_partition == (cur_partition)) + // Checks if HORZ / VERT partition search is allowed. static AOM_INLINE int is_rect_part_allowed( const AV1_COMP *cpi, PartitionSearchState *part_search_state, @@ -2355,23 +3092,75 @@ (!part_search_state->terminate_partition_search && part_search_state->partition_rect_allowed[rect_part] && !part_search_state->prune_rect_part[rect_part] && +#if CONFIG_EXT_RECUR_PARTITIONS + is_partition_valid(blk_params.bsize, rect_partition_type[rect_part]) && +#endif // CONFIG_EXT_RECUR_PARTITIONS (part_search_state->do_rectangular_split || active_edge[rect_part](cpi, mi_pos, blk_params.mi_step))); return is_part_allowed; } +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE int is_bsize_pruning_cand(BLOCK_SIZE bsize) { + if (bsize == BLOCK_INVALID) { + return 0; + } + + const int avg_bsize = (block_size_wide[bsize] + block_size_high[bsize]) / 2; + return avg_bsize <= 32; +} + +static AOM_INLINE PARTITION_TYPE get_forced_partition_type( + const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, int mi_col, + BLOCK_SIZE bsize +#if CONFIG_SDP + , + const PARTITION_TREE *template_tree +#endif // CONFIG_SDP +) { +#if CONFIG_SDP + if (template_tree) { + return template_tree->partition; + } +#endif // CONFIG_SDP + + if (should_reuse_mode(x, REUSE_PARTITION_MODE_FLAG)) { + return av1_get_prev_partition(x, mi_row, mi_col, bsize, + cm->seq_params.sb_size); + } + return PARTITION_INVALID; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + // Rectangular partition types search function. static void rectangular_partition_search( AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, PartitionSearchState *part_search_state, RD_STATS *best_rdc, +#if CONFIG_EXT_RECUR_PARTITIONS + SB_MULTI_PASS_MODE multi_pass_mode, +#if CONFIG_SDP + const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree, +#endif // CONFIG_SDP +#endif // CONFIG_EXT_RECUR_PARTITIONS RD_RECT_PART_WIN_INFO *rect_part_win_info) { const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams blk_params = part_search_state->part_blk_params; +#if CONFIG_EXT_RECUR_PARTITIONS + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; +#if CONFIG_SDP + const PARTITION_TYPE forced_partition = + get_forced_partition_type(cm, x, blk_params.mi_row, blk_params.mi_col, + blk_params.bsize, template_tree); +#else + const PARTITION_TYPE forced_partition = get_forced_partition_type( + cm, x, blk_params.mi_row, blk_params.mi_col, blk_params.bsize); +#endif // CONFIG_SDP +#endif // CONFIG_EXT_RECUR_PARTITIONS RD_STATS *sum_rdc = &part_search_state->sum_rdc; - const int rect_partition_type[NUM_RECT_PARTS] = { PARTITION_HORZ, - PARTITION_VERT }; // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][0]: mi_row postion of // HORZ and VERT partition types. @@ -2392,12 +3181,14 @@ // Indicates edge blocks for HORZ and VERT partition types. const int is_not_edge_block[NUM_RECT_PARTS] = { blk_params.has_rows, blk_params.has_cols }; +#if !CONFIG_EXT_RECUR_PARTITIONS // Initialize pc tree context for HORZ and VERT partition types. PICK_MODE_CONTEXT **cur_ctx[NUM_RECT_PARTS][SUB_PARTITIONS_RECT] = { { &pc_tree->horizontal[0], &pc_tree->horizontal[1] }, { &pc_tree->vertical[0], &pc_tree->vertical[1] } }; +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Loop over rectangular partition types. for (RECT_PART_TYPE i = HORZ; i < NUM_RECT_PARTS; i++) { @@ -2410,16 +3201,67 @@ continue; // Sub-partition idx. - int sub_part_idx = 0; - PARTITION_TYPE partition_type = rect_partition_type[i]; + const PARTITION_TYPE partition_type = rect_partition_type[i]; blk_params.subsize = get_partition_subsize(blk_params.bsize, partition_type); + const int part_hv_rate = part_search_state->partition_cost[partition_type]; + if (part_hv_rate == INT_MAX || + RDCOST(x->rdmult, part_hv_rate, 0) >= best_rdc->rdcost) { + continue; + } +#if !CONFIG_EXT_RECUR_PARTITIONS assert(blk_params.subsize <= BLOCK_LARGEST); +#endif // !CONFIG_EXT_RECUR_PARTITIONS av1_init_rd_stats(sum_rdc); +#if CONFIG_EXT_RECUR_PARTITIONS + if (!IS_FORCED_PARTITION_TYPE(partition_type)) { + continue; + } + + if (cpi->sf.part_sf.enable_fast_erp && !frame_is_intra_only(cm) && + !x->must_find_valid_partition && + is_bsize_pruning_cand(blk_params.bsize)) { + if (av1_prune_part_hv_with_sms(cpi, tile_data, x, part_search_state, + best_rdc, &blk_params, i, part_hv_rate)) { + continue; + } + } + + PC_TREE **sub_tree = (i == HORZ) ? pc_tree->horizontal : pc_tree->vertical; + + const int num_planes = av1_num_planes(cm); + for (int idx = 0; idx < SUB_PARTITIONS_RECT; idx++) { + if (sub_tree[idx]) { + av1_free_pc_tree_recursive(sub_tree[idx], num_planes, 0, 0); + sub_tree[idx] = NULL; + } + } + sub_tree[0] = av1_alloc_pc_tree_node( + mi_pos_rect[i][0][0], mi_pos_rect[i][0][1], blk_params.subsize, pc_tree, + partition_type, 0, 0, ss_x, ss_y); + sub_tree[1] = av1_alloc_pc_tree_node( + mi_pos_rect[i][1][0], mi_pos_rect[i][1][1], blk_params.subsize, pc_tree, + partition_type, 1, 1, ss_x, ss_y); + +#if CONFIG_SDP + rd_pick_rect_partition(cpi, td, tile_data, tp, x, pc_tree, + part_search_state, best_rdc, i, mi_pos_rect, + blk_params.subsize, is_not_edge_block, + multi_pass_mode, ptree_luma, template_tree); +#else + rd_pick_rect_partition( + cpi, td, tile_data, tp, x, pc_tree, part_search_state, best_rdc, i, + mi_pos_rect, blk_params.subsize, is_not_edge_block, multi_pass_mode); +#endif // CONFIG_SDP +#else + int sub_part_idx = 0; for (int j = 0; j < SUB_PARTITIONS_RECT; j++) { + assert(cur_ctx[i][j] != NULL); if (cur_ctx[i][j][0] == NULL) { - cur_ctx[i][j][0] = - av1_alloc_pmc(cm, blk_params.subsize, &td->shared_coeff_buf); + cur_ctx[i][j][0] = av1_alloc_pmc( + cm, mi_pos_rect[i][j][0], mi_pos_rect[i][j][1], blk_params.subsize, + pc_tree, partition_type, j, part_search_state->ss_x, + part_search_state->ss_y, &td->shared_coeff_buf); } } sum_rdc->rate = part_search_state->partition_cost[partition_type]; @@ -2462,6 +3304,7 @@ best_rdc, 1, mi_pos_rect[i][sub_part_idx][0], mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type); } +#endif // CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_COLLECT_PARTITION_STATS if (partition_timer_on) { aom_usec_timer_mark(&partition_timer); @@ -2483,11 +3326,12 @@ if (rect_part_win_info != NULL) rect_part_win_info->rect_part_win[i] = false; } - av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col, + av1_restore_context(cm, x, x_ctx, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, av1_num_planes(cm)); } } +#if !CONFIG_EXT_RECUR_PARTITIONS // AB partition type evaluation. static void rd_pick_ab_part( AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, @@ -2530,7 +3374,7 @@ partition_timer_on = 0; } #endif - av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); } // Check if AB partitions search is allowed. @@ -2647,9 +3491,13 @@ blk_params.subsize = get_partition_subsize(bsize, part_type); for (int i = 0; i < SUB_PARTITIONS_AB; i++) { + assert(cur_part_ctxs[ab_part_type] != NULL); // Set AB partition context. - cur_part_ctxs[ab_part_type][i] = - av1_alloc_pmc(cm, ab_subsize[ab_part_type][i], &td->shared_coeff_buf); + cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc( + cm, ab_mi_pos[ab_part_type][i][0], ab_mi_pos[ab_part_type][i][1], + ab_subsize[ab_part_type][i], pc_tree, part_type, i, + part_search_state->ss_x, part_search_state->ss_y, + &td->shared_coeff_buf); // Set mode as not ready. cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0; } @@ -2691,7 +3539,7 @@ MACROBLOCK *x, const AV1_COMMON *const cm, ThreadData *td, PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4], PartitionSearchState *part_search_state, PARTITION_TYPE partition_type, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, int mi_pos[SUB_PARTITIONS_PART4][2], PC_TREE *pc_tree) { // Initialize sum_rdc RD cost structure. av1_init_rd_stats(&part_search_state->sum_rdc); const int subsize = get_partition_subsize(bsize, partition_type); @@ -2699,8 +3547,12 @@ part_search_state->partition_cost[partition_type]; part_search_state->sum_rdc.rdcost = RDCOST(x->rdmult, part_search_state->sum_rdc.rate, 0); - for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) - cur_part_ctx[i] = av1_alloc_pmc(cm, subsize, &td->shared_coeff_buf); + for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) { + cur_part_ctx[i] = + av1_alloc_pmc(cm, mi_pos[i][0], mi_pos[i][1], subsize, pc_tree, + partition_type, i, part_search_state->ss_x, + part_search_state->ss_y, &td->shared_coeff_buf); + } } // Partition search of HORZ4 / VERT4 partition types. @@ -2719,11 +3571,11 @@ int mi_pos[SUB_PARTITIONS_PART4][2]; blk_params.subsize = get_partition_subsize(blk_params.bsize, partition_type); - // Set partition context and RD cost. - set_4_part_ctx_and_rdcost(x, cm, td, cur_part_ctx, part_search_state, - partition_type, blk_params.bsize); // Set mi positions for sub-block sizes. set_mi_pos_partition4(inc_step, mi_pos, blk_params.mi_row, blk_params.mi_col); + // Set partition context and RD cost. + set_4_part_ctx_and_rdcost(x, cm, td, cur_part_ctx, part_search_state, + partition_type, blk_params.bsize, mi_pos, pc_tree); #if CONFIG_COLLECT_PARTITION_STATS if (best_rdc.rdcost - part_search_state->sum_rdc.rdcost >= 0) { partition_attempts[partition_type] += 1; @@ -2761,7 +3613,7 @@ partition_timer_on = 0; } #endif - av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col, + av1_restore_context(cm, x, x_ctx, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, av1_num_planes(cm)); } @@ -2822,7 +3674,8 @@ part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID; } - // Pruning: pruning out 4-way partitions based on the current best partition. + // Pruning: pruning out 4-way partitions based on the current best + // partition. if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) { part4_search_allowed[HORZ4] &= (pc_tree->partitioning == PARTITION_HORZ || pc_tree->partitioning == PARTITION_HORZ_A || @@ -2848,18 +3701,34 @@ pb_source_variance, mi_row, mi_col); } - // Pruning: pruning out 4-way partitions based on the number of horz/vert wins - // in the current block and sub-blocks in PARTITION_SPLIT. + // Pruning: pruning out 4-way partitions based on the number of horz/vert + // wins in the current block and sub-blocks in PARTITION_SPLIT. prune_4_partition_using_split_info(cpi, x, part_search_state, part4_search_allowed); } +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Set PARTITION_NONE allowed flag. static AOM_INLINE void set_part_none_allowed_flag( - AV1_COMP *const cpi, PartitionSearchState *part_search_state) { + const AV1_COMP *const cpi, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + TREE_TYPE tree_type, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + PartitionSearchState *part_search_state) { PartitionBlkParams blk_params = part_search_state->part_blk_params; +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + if (tree_type == CHROMA_PART && blk_params.bsize == BLOCK_8X8) { + part_search_state->partition_none_allowed = 1; + return; + } +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_EXT_RECUR_PARTITIONS + if (is_bsize_geq(blk_params.min_partition_size, blk_params.bsize) && + blk_params.has_rows && blk_params.has_cols) +#else if ((blk_params.width <= blk_params.min_partition_size_1d) && blk_params.has_rows && blk_params.has_cols) +#endif // CONFIG_EXT_RECUR_PARTITIONS part_search_state->partition_none_allowed = 1; assert(part_search_state->terminate_partition_search == 0); @@ -2887,11 +3756,14 @@ RD_STATS partition_rdcost; // Set PARTITION_NONE context. if (pc_tree->none == NULL) - pc_tree->none = av1_alloc_pmc(cm, blk_params.bsize, &td->shared_coeff_buf); + pc_tree->none = av1_alloc_pmc( + cm, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, pc_tree, + PARTITION_NONE, 0, part_search_state->ss_x, part_search_state->ss_y, + &td->shared_coeff_buf); // Set PARTITION_NONE type cost. if (part_search_state->partition_none_allowed) { - if (blk_params.bsize_at_least_8x8) { + if (part_search_state->is_block_splittable) { *pt_cost = part_search_state->partition_cost[PARTITION_NONE] < INT_MAX ? part_search_state->partition_cost[PARTITION_NONE] : 0; @@ -2916,22 +3788,37 @@ const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; PartitionBlkParams blk_params = part_search_state->part_blk_params; +#if !CONFIG_EXT_RECUR_PARTITIONS const CommonModeInfoParams *const mi_params = &cm->mi_params; +#endif // !CONFIG_EXT_RECUR_PARTITIONS RD_STATS *this_rdc = &part_search_state->this_rdc; const BLOCK_SIZE bsize = blk_params.bsize; assert(bsize < BLOCK_SIZES_ALL); +#if CONFIG_EXT_RECUR_PARTITIONS + (void)sms_tree; +#endif // !CONFIG_EXT_RECUR_PARTITIONS + if (!frame_is_intra_only(cm) && +#if CONFIG_EXT_RECUR_PARTITIONS + part_search_state->do_rectangular_split && +#else (part_search_state->do_square_split || part_search_state->do_rectangular_split) && +#endif !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) { const int use_ml_based_breakout = bsize <= cpi->sf.part_sf.use_square_partition_only_threshold && +#if CONFIG_EXT_RECUR_PARTITIONS + is_square_block(bsize) && +#endif // CONFIG_EXT_RECUR_PARTITIONS bsize > BLOCK_4X4 && xd->bd == 8; if (use_ml_based_breakout) { if (av1_ml_predict_breakout(cpi, bsize, x, this_rdc, *pb_source_variance)) { +#if !CONFIG_EXT_RECUR_PARTITIONS part_search_state->do_square_split = 0; +#endif part_search_state->do_rectangular_split = 0; } } @@ -2951,11 +3838,14 @@ // disable the early termination at that speed. if (best_rdc->dist < dist_breakout_thr && best_rdc->rate < rate_breakout_thr) { +#if !CONFIG_EXT_RECUR_PARTITIONS part_search_state->do_square_split = 0; +#endif part_search_state->do_rectangular_split = 0; } } +#if !CONFIG_EXT_RECUR_PARTITIONS // Early termination: using simple_motion_search features and the // rate, distortion, and rdcost of PARTITION_NONE, a DNN will make a // decision on early terminating at PARTITION_NONE. @@ -2971,8 +3861,10 @@ cpi, x, sms_tree, blk_params.mi_row, blk_params.mi_col, bsize, this_rdc, &part_search_state->terminate_partition_search); } +#endif // !CONFIG_EXT_RECUR_PARTITIONS } +#if !CONFIG_EXT_RECUR_PARTITIONS // Decide early termination and rectangular partition pruning // based on PARTITION_NONE and PARTITION_SPLIT costs. static void prune_partitions_after_split( @@ -2986,6 +3878,13 @@ const BLOCK_SIZE bsize = blk_params.bsize; assert(bsize < BLOCK_SIZES_ALL); +#if CONFIG_EXT_RECUR_PARTITIONS + (void)sms_tree; + (void)part_none_rd; + (void)part_split_rd; +#endif // !CONFIG_EXT_RECUR_PARTITIONS + +#if !CONFIG_EXT_RECUR_PARTITIONS // Early termination: using the rd costs of PARTITION_NONE and subblocks // from PARTITION_SPLIT to determine an early breakout. if (cpi->sf.part_sf.ml_early_term_after_part_split_level && @@ -2999,6 +3898,7 @@ part_search_state->split_rd, mi_row, mi_col, &part_search_state->terminate_partition_search); } +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Use the rd costs of PARTITION_NONE and subblocks from PARTITION_SPLIT // to prune out rectangular partitions in some directions. @@ -3010,13 +3910,14 @@ part_search_state->prune_rect_part[VERT]) && !part_search_state->terminate_partition_search) { av1_setup_src_planes(x, cpi->source, mi_row, mi_col, av1_num_planes(cm), - bsize); + NULL); av1_ml_prune_rect_partition( cpi, x, bsize, best_rdc->rdcost, part_search_state->none_rd, part_search_state->split_rd, &part_search_state->prune_rect_part[HORZ], &part_search_state->prune_rect_part[VERT]); } } +#endif // PARTITION_NONE search. static void none_partition_search( @@ -3033,8 +3934,15 @@ const BLOCK_SIZE bsize = blk_params.bsize; assert(bsize < BLOCK_SIZES_ALL); +#if CONFIG_EXT_RECUR_PARTITIONS + (void)part_none_rd; +#endif // CONFIG_EXT_RECUR_PARTITIONS // Set PARTITION_NONE allowed flag. - set_part_none_allowed_flag(cpi, part_search_state); + set_part_none_allowed_flag(cpi, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + x->e_mbd.tree_type, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + part_search_state); if (!part_search_state->partition_none_allowed) return; int pt_cost = 0; @@ -3052,9 +3960,21 @@ partition_timer_on = 1; } #endif +#if CONFIG_EXT_RECUR_PARTITIONS + SimpleMotionData *sms_data = av1_get_sms_data_entry( + x->sms_bufs, mi_row, mi_col, bsize, cm->seq_params.sb_size); + av1_set_best_mode_cache(x, sms_data->mode_cache); +#endif // CONFIG_EXT_RECUR_PARTITIONS + // PARTITION_NONE evaluation and cost update. pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, PARTITION_NONE, bsize, pc_tree->none, best_remain_rdcost); +#if CONFIG_EXT_RECUR_PARTITIONS + x->inter_mode_cache = NULL; + if (this_rdc->rate != INT_MAX) { + av1_add_mode_search_context_to_cache(sms_data, pc_tree->none); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_rd_cost_update(x->rdmult, this_rdc); #if CONFIG_COLLECT_PARTITION_STATS @@ -3078,17 +3998,23 @@ } // Calculate the total cost and update the best partition. - if (blk_params.bsize_at_least_8x8) { + if (part_search_state->is_block_splittable) { this_rdc->rate += pt_cost; this_rdc->rdcost = RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist); } +#if !CONFIG_EXT_RECUR_PARTITIONS *part_none_rd = this_rdc->rdcost; +#endif if (this_rdc->rdcost < best_rdc->rdcost) { *best_rdc = *this_rdc; part_search_state->found_best_partition = true; +#if !CONFIG_EXT_RECUR_PARTITIONS if (blk_params.bsize_at_least_8x8) { pc_tree->partitioning = PARTITION_NONE; } +#else + pc_tree->partitioning = PARTITION_NONE; +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Disable split and rectangular partition search // based on PARTITION_NONE cost. @@ -3097,9 +4023,10 @@ pb_source_variance); } } - av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); } +#if !CONFIG_EXT_RECUR_PARTITIONS // PARTITION_SPLIT search. static void split_partition_search( AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, @@ -3122,12 +4049,6 @@ !part_search_state->do_square_split) return; - for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { - if (pc_tree->split[i] == NULL) - pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); - pc_tree->split[i]->index = i; - } - // Initialization of this partition RD stats. av1_init_rd_stats(&sum_rdc); sum_rdc.rate = part_search_state->partition_cost[PARTITION_SPLIT]; @@ -3151,7 +4072,11 @@ mi_col + x_idx >= mi_params->mi_cols) continue; - pc_tree->split[idx]->index = idx; + if (pc_tree->split[idx] == NULL) { + pc_tree->split[idx] = av1_alloc_pc_tree_node( + mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, PARTITION_SPLIT, + idx, idx == 3, part_search_state->ss_x, part_search_state->ss_y); + } int64_t *p_split_rd = &part_search_state->split_rd[idx]; RD_STATS best_remain_rdcost; av1_rd_stats_subtraction(x->rdmult, best_rdc, &sum_rdc, @@ -3224,9 +4149,453 @@ !(partition_none_valid && partition_none_better); } } - av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); +} +#endif // !CONFIG_EXT_RECUR_PARTITIONS + +#if CONFIG_EXT_RECUR_PARTITIONS +/*!\cond */ +typedef struct { + SIMPLE_MOTION_DATA_TREE *sms_tree; + PC_TREE *pc_tree; +#if CONFIG_SDP + const PARTITION_TREE *ptree_luma; + const PARTITION_TREE *template_tree; +#endif // CONFIG_SDP + PICK_MODE_CONTEXT *ctx; + int mi_row; + int mi_col; + BLOCK_SIZE bsize; + PARTITION_TYPE partition; + int is_last_subblock; + int is_splittable; +} SUBBLOCK_RDO_DATA; +/*!\endcond */ + +// Try searching for an encoding for the given subblock. Returns zero if the +// rdcost is already too high (to tell the caller not to bother searching for +// encodings of further subblocks) +static int rd_try_subblock_new(AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, + SUBBLOCK_RDO_DATA *rdo_data, + RD_STATS best_rdcost, RD_STATS *sum_rdc, + SB_MULTI_PASS_MODE multi_pass_mode) { + MACROBLOCK *const x = &td->mb; + const int orig_mult = x->rdmult; + const int mi_row = rdo_data->mi_row; + const int mi_col = rdo_data->mi_col; + const BLOCK_SIZE bsize = rdo_data->bsize; + + setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); + + av1_rd_cost_update(x->rdmult, &best_rdcost); + + RD_STATS rdcost_remaining; + av1_rd_stats_subtraction(x->rdmult, &best_rdcost, sum_rdc, &rdcost_remaining); + RD_STATS this_rdc; + + if (rdo_data->is_splittable) { + if (!av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, + &this_rdc, rdcost_remaining, rdo_data->pc_tree, +#if CONFIG_SDP + rdo_data->ptree_luma, rdo_data->template_tree, +#endif // CONFIG_SDP + rdo_data->sms_tree, NULL, multi_pass_mode, + NULL)) { + av1_invalid_rd_stats(sum_rdc); + return 0; + } + } else { + const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size; + SimpleMotionData *sms_data = + av1_get_sms_data_entry(x->sms_bufs, mi_row, mi_col, bsize, sb_size); + av1_set_best_mode_cache(x, sms_data->mode_cache); + + pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, + rdo_data->partition, bsize, rdo_data->ctx, rdcost_remaining); + + x->inter_mode_cache = NULL; + if (this_rdc.rate != INT_MAX) { + av1_add_mode_search_context_to_cache(sms_data, rdo_data->ctx); + } + } + + if (this_rdc.rate == INT_MAX) { + sum_rdc->rdcost = INT64_MAX; + } else { + sum_rdc->rate += this_rdc.rate; + sum_rdc->dist += this_rdc.dist; + av1_rd_cost_update(x->rdmult, sum_rdc); + } + + if (sum_rdc->rdcost >= best_rdcost.rdcost) { + x->rdmult = orig_mult; + return 0; + } + + if (!rdo_data->is_last_subblock && !rdo_data->is_splittable) { + av1_update_state(cpi, td, rdo_data->ctx, mi_row, mi_col, bsize, 1); + encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, bsize, NULL); + } + + x->rdmult = orig_mult; + return 1; } +static INLINE void search_partition_horz_3( + PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, + PC_TREE *pc_tree, +#if CONFIG_SDP + const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree, +#endif // CONFIG_SDP + RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, SB_MULTI_PASS_MODE multi_pass_mode) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + const int num_planes = av1_num_planes(cm); +#if CONFIG_EXT_RECUR_PARTITIONS + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; +#endif // CONFIG_EXT_RECUR_PARTITIONS + + const PartitionBlkParams *blk_params = &search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; + const BLOCK_SIZE bsize = blk_params->bsize; + + const BLOCK_SIZE sml_subsize = get_partition_subsize(bsize, PARTITION_HORZ_3); + const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ); + + if (search_state->terminate_partition_search || !blk_params->has_rows || + !is_partition_valid(bsize, PARTITION_HORZ_3) || + !(search_state->do_rectangular_split || + av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) { + return; + } + + const int part_h3_rate = search_state->partition_cost[PARTITION_HORZ_3]; + if (part_h3_rate == INT_MAX || + RDCOST(x->rdmult, part_h3_rate, 0) >= best_rdc->rdcost) { + return; + } + RD_STATS sum_rdc; + av1_init_rd_stats(&sum_rdc); + const int quarter_step = mi_size_high[bsize] / 4; + + sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_3]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); + + const int step_multipliers[3] = { 0, 1, 2 }; + const BLOCK_SIZE subblock_sizes[3] = { sml_subsize, big_subsize, + sml_subsize }; + + for (int idx = 0; idx < 3; idx++) { + if (pc_tree->horizontal3[idx]) { + av1_free_pc_tree_recursive(pc_tree->horizontal3[idx], num_planes, 0, 0); + pc_tree->horizontal3[idx] = NULL; + } + } + pc_tree->horizontal3[0] = + av1_alloc_pc_tree_node(mi_row, mi_col, subblock_sizes[0], pc_tree, + PARTITION_HORZ_3, 0, 0, ss_x, ss_y); + pc_tree->horizontal3[1] = + av1_alloc_pc_tree_node(mi_row + quarter_step, mi_col, subblock_sizes[1], + pc_tree, PARTITION_HORZ_3, 1, 0, ss_x, ss_y); + pc_tree->horizontal3[2] = av1_alloc_pc_tree_node( + mi_row + quarter_step * 3, mi_col, subblock_sizes[2], pc_tree, + PARTITION_HORZ_3, 2, 1, ss_x, ss_y); + + if (cpi->sf.part_sf.enable_fast_erp && !frame_is_intra_only(cm) && + !x->must_find_valid_partition && is_bsize_pruning_cand(bsize)) { + const SimpleMotionData *up = av1_get_sms_data( + cpi, &tile_data->tile_info, x, mi_row, mi_col, subblock_sizes[0]); + const SimpleMotionData *middle = + av1_get_sms_data(cpi, &tile_data->tile_info, x, mi_row + quarter_step, + mi_col, subblock_sizes[1]); + const SimpleMotionData *down = + av1_get_sms_data(cpi, &tile_data->tile_info, x, + mi_row + 3 * quarter_step, mi_col, subblock_sizes[2]); + + SMSPartitionStats part_data; + part_data.sms_data[0] = up; + part_data.sms_data[1] = middle; + part_data.sms_data[2] = down; + part_data.num_sub_parts = 3; + part_data.part_rate = part_h3_rate; + + if (best_rdc->rdcost < INT64_MAX && + (blk_params->mi_row + 2 * (blk_params->mi_step_h) <= + cm->mi_params.mi_rows) && + (blk_params->mi_col + 2 * (blk_params->mi_step_w) <= + cm->mi_params.mi_cols) && + av1_prune_new_part(&search_state->none_data, &part_data, x->rdmult, + blk_params->bsize, &cpi->sf)) { + const BLOCK_SIZE midsize = subblock_sizes[1]; + const BLOCK_SIZE subsubsize = + get_partition_subsize(midsize, PARTITION_VERT); + if (subsubsize == BLOCK_INVALID) { + return; + } + + // Do one more check to deal with recursion + SMSPartitionStats subpart_data; + const SimpleMotionData *midleft = + av1_get_sms_data(cpi, &tile_data->tile_info, x, mi_row + quarter_step, + mi_col + 2 * quarter_step, subsubsize); + const SimpleMotionData *midright = + av1_get_sms_data(cpi, &tile_data->tile_info, x, mi_row + quarter_step, + mi_col + 2 * quarter_step, subsubsize); + subpart_data.sms_data[0] = up; + subpart_data.sms_data[1] = midleft; + subpart_data.sms_data[2] = midright; + subpart_data.sms_data[3] = down; + subpart_data.num_sub_parts = 4; + subpart_data.part_rate = 0; + if (av1_prune_new_part(&search_state->none_data, &subpart_data, x->rdmult, + bsize, &cpi->sf)) { + return; + } + } + } + + int this_mi_row = mi_row; + for (int i = 0; i < 3; ++i) { + this_mi_row += quarter_step * step_multipliers[i]; + + if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break; + + SUBBLOCK_RDO_DATA rdo_data = { + NULL, + pc_tree->horizontal3[i], +#if CONFIG_SDP + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, i), +#endif // CONFIG_SDP + NULL, + this_mi_row, + mi_col, + subblock_sizes[i], + PARTITION_HORZ_3, + i == 2, + 1 + }; + if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, + &sum_rdc, multi_pass_mode)) { + av1_invalid_rd_stats(&sum_rdc); + break; + } + } + + av1_rd_cost_update(x->rdmult, &sum_rdc); + if (sum_rdc.rdcost < best_rdc->rdcost) { + *best_rdc = sum_rdc; + search_state->found_best_partition = true; + pc_tree->partitioning = PARTITION_HORZ_3; + } + + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +} + +static INLINE void search_partition_vert_3( + PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, + PC_TREE *pc_tree, +#if CONFIG_SDP + const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree, +#endif // CONFIG_SDP + RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, SB_MULTI_PASS_MODE multi_pass_mode) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + const int num_planes = av1_num_planes(cm); +#if CONFIG_EXT_RECUR_PARTITIONS + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; +#endif // CONFIG_EXT_RECUR_PARTITIONS + + const PartitionBlkParams *blk_params = &search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; + const BLOCK_SIZE bsize = blk_params->bsize; + + const BLOCK_SIZE sml_subsize = get_partition_subsize(bsize, PARTITION_VERT_3); + const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT); + + if (search_state->terminate_partition_search || !blk_params->has_cols || + !is_partition_valid(bsize, PARTITION_VERT_3) || + !(search_state->do_rectangular_split || + av1_active_v_edge(cpi, mi_row, blk_params->mi_step_h))) { + return; + } + + const int part_v3_rate = search_state->partition_cost[PARTITION_VERT_3]; + if (part_v3_rate == INT_MAX || + RDCOST(x->rdmult, part_v3_rate, 0) >= best_rdc->rdcost) { + return; + } + + RD_STATS sum_rdc; + av1_init_rd_stats(&sum_rdc); + const int quarter_step = mi_size_wide[bsize] / 4; + + sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_3]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); + + const int step_multipliers[3] = { 0, 1, 2 }; + const BLOCK_SIZE subblock_sizes[3] = { sml_subsize, big_subsize, + sml_subsize }; + + for (int idx = 0; idx < 3; idx++) { + if (pc_tree->vertical3[idx]) { + av1_free_pc_tree_recursive(pc_tree->vertical3[idx], num_planes, 0, 0); + pc_tree->vertical3[idx] = NULL; + } + } + pc_tree->vertical3[0] = + av1_alloc_pc_tree_node(mi_row, mi_col, subblock_sizes[0], pc_tree, + PARTITION_VERT_3, 0, 0, ss_x, ss_y); + pc_tree->vertical3[1] = + av1_alloc_pc_tree_node(mi_row, mi_col + quarter_step, subblock_sizes[1], + pc_tree, PARTITION_VERT_3, 1, 0, ss_x, ss_y); + pc_tree->vertical3[2] = av1_alloc_pc_tree_node( + mi_row, mi_col + quarter_step * 3, subblock_sizes[2], pc_tree, + PARTITION_VERT_3, 2, 1, ss_x, ss_y); + + if (cpi->sf.part_sf.enable_fast_erp && !frame_is_intra_only(cm) && + !x->must_find_valid_partition && is_bsize_pruning_cand(bsize)) { + const SimpleMotionData *left = av1_get_sms_data( + cpi, &tile_data->tile_info, x, mi_row, mi_col, subblock_sizes[0]); + const SimpleMotionData *middle = + av1_get_sms_data(cpi, &tile_data->tile_info, x, mi_row, + mi_col + quarter_step, subblock_sizes[1]); + const SimpleMotionData *right = + av1_get_sms_data(cpi, &tile_data->tile_info, x, mi_row, + mi_col + 3 * quarter_step, subblock_sizes[2]); + + SMSPartitionStats part_data; + part_data.sms_data[0] = left; + part_data.sms_data[1] = middle; + part_data.sms_data[2] = right; + part_data.num_sub_parts = 3; + part_data.part_rate = part_v3_rate; + + if (best_rdc->rdcost < INT64_MAX && + (blk_params->mi_row + 2 * blk_params->mi_step_h <= + cm->mi_params.mi_rows) && + (blk_params->mi_col + 2 * blk_params->mi_step_w <= + cm->mi_params.mi_cols) && + av1_prune_new_part(&search_state->none_data, &part_data, x->rdmult, + blk_params->bsize, &cpi->sf)) { + const BLOCK_SIZE midsize = subblock_sizes[1]; + const BLOCK_SIZE subsubsize = + get_partition_subsize(midsize, PARTITION_HORZ); + if (subsubsize == BLOCK_INVALID) { + return; + } + + // Do one more check to deal with recursion + SMSPartitionStats subpart_data; + const SimpleMotionData *leftmid = + av1_get_sms_data(cpi, &tile_data->tile_info, x, mi_row, + mi_col + quarter_step, subsubsize); + const SimpleMotionData *rightmid = av1_get_sms_data( + cpi, &tile_data->tile_info, x, mi_row + 2 * quarter_step, + mi_col + quarter_step, subsubsize); + subpart_data.sms_data[0] = left; + subpart_data.sms_data[1] = leftmid; + subpart_data.sms_data[2] = rightmid; + subpart_data.sms_data[3] = right; + subpart_data.num_sub_parts = 4; + subpart_data.part_rate = 0; + if (av1_prune_new_part(&search_state->none_data, &subpart_data, x->rdmult, + bsize, &cpi->sf)) { + return; + } + } + } + + int this_mi_col = mi_col; + for (int i = 0; i < 3; ++i) { + this_mi_col += quarter_step * step_multipliers[i]; + + if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break; + + SUBBLOCK_RDO_DATA rdo_data = { + NULL, + pc_tree->vertical3[i], +#if CONFIG_SDP + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, i), +#endif // CONFIG_SDP + NULL, + mi_row, + this_mi_col, + subblock_sizes[i], + PARTITION_VERT_3, + i == 2, + 1 + }; + if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, + &sum_rdc, multi_pass_mode)) { + av1_invalid_rd_stats(&sum_rdc); + break; + } + } + + av1_rd_cost_update(x->rdmult, &sum_rdc); + if (sum_rdc.rdcost < best_rdc->rdcost) { + *best_rdc = sum_rdc; + search_state->found_best_partition = true; + pc_tree->partitioning = PARTITION_VERT_3; + } + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +/*!\brief AV1 block partition search (full search). +* +* \ingroup partition_search +* \callgraph +* Searches for the best partition pattern for a block based on the +* rate-distortion cost, and returns a bool value to indicate whether a valid +* partition pattern is found. The partition can recursively go down to the +* smallest block size. +* +* \param[in] cpi Top-level encoder structure +* \param[in] td Pointer to thread data +* \param[in] tile_data Pointer to struct holding adaptive +data/contexts/models for the tile during +encoding +* \param[in] tp Pointer to the starting token +* \param[in] mi_row Row coordinate of the block in a step size +of MI_SIZE +* \param[in] mi_col Column coordinate of the block in a step +size of MI_SIZE +* \param[in] bsize Current block size +* \param[in] rd_cost Pointer to the final rd cost of the block +* \param[in] best_rdc Upper bound of rd cost of a valid partition +* \param[in] pc_tree Pointer to the PC_TREE node storing the +picked partitions and mode info for the +current block +* \param[in] ptree_luma Pointer to the luma partition tree so that the +* encoder to estimate the partition type for chroma. +* \param[in] template_tree A partial tree that contains the partition +* structure to be used as a template. +* \param[in] sms_tree Pointer to struct holding simple motion +search data for the current block +* \param[in] none_rd Pointer to the rd cost in the case of not +splitting the current block +* \param[in] multi_pass_mode SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS +* \param[in] rect_part_win_info Pointer to struct storing whether horz/vert +partition outperforms previously tested +partitions +* +* \return A bool value is returned indicating if a valid partition is found. +* The pc_tree struct is modified to store the picked partition and modes. +* The rd_cost struct is also updated with the RD stats corresponding to the +* best partition found. +*/ +#else /*!\brief AV1 block partition search (full search). * * \ingroup partition_search @@ -3266,10 +4635,15 @@ * The rd_cost struct is also updated with the RD stats corresponding to the * best partition found. */ +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost, RD_STATS best_rdc, PC_TREE *pc_tree, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS SIMPLE_MOTION_DATA_TREE *sms_tree, int64_t *none_rd, SB_MULTI_PASS_MODE multi_pass_mode, RD_RECT_PART_WIN_INFO *rect_part_win_info) { @@ -3282,15 +4656,57 @@ const TokenExtra *const tp_orig = *tp; PartitionSearchState part_search_state; // Initialization of state variables used in partition search. - init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col, - bsize); + init_partition_search_state_params(x, cpi, &part_search_state, +#if CONFIG_EXT_RECUR_PARTITIONS + pc_tree, +#endif // CONFIG_EXT_RECUR_PARTITIONS + mi_row, mi_col, bsize); PartitionBlkParams blk_params = part_search_state.part_blk_params; - - sms_tree->partitioning = PARTITION_NONE; +#if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_SDP + const PARTITION_TYPE forced_partition = + get_forced_partition_type(cm, x, mi_row, mi_col, bsize, template_tree); +#else + const PARTITION_TYPE forced_partition = + get_forced_partition_type(cm, x, mi_row, mi_col, bsize); +#endif // CONFIG_SDP + if (sms_tree != NULL) +#endif // CONFIG_EXT_RECUR_PARTITIONS + sms_tree->partitioning = PARTITION_NONE; if (best_rdc.rdcost < 0) { av1_invalid_rd_stats(rd_cost); return part_search_state.found_best_partition; } +#if CONFIG_EXT_RECUR_PARTITIONS + // Check whether there is a counterpart pc_tree node with the same size + // and the same neighboring context at the same location but from a + // different partition path. If yes directly copy the RDO decision made for + // the counterpart. + PC_TREE *counterpart_block = av1_look_for_counterpart_block(pc_tree); + if (counterpart_block) { + if (counterpart_block->rd_cost.rate != INT_MAX) { + av1_copy_pc_tree_recursive(cm, pc_tree, counterpart_block, + part_search_state.ss_x, part_search_state.ss_y, + &td->shared_coeff_buf, num_planes); + *rd_cost = pc_tree->rd_cost; + assert(bsize != cm->seq_params.sb_size); + if (bsize == cm->seq_params.sb_size) exit(0); + + if (!pc_tree->is_last_subblock) { + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, + pc_tree, NULL, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + NULL); + } + return true; + } else { + av1_invalid_rd_stats(rd_cost); + return false; + } + } +#endif // CONFIG_EXT_RECUR_PARTITIONS if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0; // Override skipping rectangular partition operations for edge blocks. @@ -3334,10 +4750,14 @@ sizeof(x->txfm_search_info.blk_skip)); #endif // NDEBUG + assert(bsize < BLOCK_SIZES_ALL); +#if !CONFIG_EXT_RECUR_PARTITIONS assert(mi_size_wide[bsize] == mi_size_high[bsize]); +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Set buffers and offsets. - av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); + av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize, + &pc_tree->chroma_ref_info); // Save rdmult before it might be changed, so it can be restored later. const int orig_rdmult = x->rdmult; @@ -3358,6 +4778,7 @@ int *partition_horz_allowed = &part_search_state.partition_rect_allowed[HORZ]; int *partition_vert_allowed = &part_search_state.partition_rect_allowed[VERT]; +#if !CONFIG_EXT_RECUR_PARTITIONS int *prune_horz = &part_search_state.prune_rect_part[HORZ]; int *prune_vert = &part_search_state.prune_rect_part[VERT]; // Pruning: before searching any partition type, using source and simple @@ -3367,17 +4788,45 @@ &part_search_state.partition_none_allowed, partition_horz_allowed, partition_vert_allowed, &part_search_state.do_rectangular_split, &part_search_state.do_square_split, prune_horz, prune_vert); +#endif // !CONFIG_EXT_RECUR_PARTITIONS - // Pruning: eliminating partition types leading to coding block sizes outside - // the min and max bsize limitations set from the encoder. + // Pruning: eliminating partition types leading to coding block sizes + // outside the min and max bsize limitations set from the encoder. av1_prune_partitions_by_max_min_bsize( &x->sb_enc, bsize, blk_params.has_rows && blk_params.has_cols, &part_search_state.partition_none_allowed, partition_horz_allowed, +#if CONFIG_EXT_RECUR_PARTITIONS + partition_vert_allowed, NULL); +#else partition_vert_allowed, &part_search_state.do_square_split); +#endif #if CONFIG_SDP int luma_split_flag = 0; - int parent_block_width = block_size_wide[bsize]; + const int parent_block_width = block_size_wide[bsize]; +#if CONFIG_EXT_RECUR_PARTITIONS + const int min_bsize_1d = AOMMIN(block_size_high[bsize], parent_block_width); + int horz_3_allowed_sdp = 1; + int vert_3_allowed_sdp = 1; + if (xd->tree_type == CHROMA_PART && min_bsize_1d >= SHARED_PART_SIZE && + ptree_luma) { + PARTITION_TYPE derived_partition_mode = sdp_chroma_part_from_luma( + bsize, ptree_luma->partition, part_search_state.ss_x, + part_search_state.ss_y); + + if (derived_partition_mode != PARTITION_NONE) + part_search_state.partition_none_allowed = BLOCK_INVALID; + if (derived_partition_mode != PARTITION_HORZ) + part_search_state.partition_rect_allowed[HORZ] = 0; + if (derived_partition_mode != PARTITION_VERT) + part_search_state.partition_rect_allowed[VERT] = 0; + if (derived_partition_mode != PARTITION_HORZ_3) horz_3_allowed_sdp = 0; + if (derived_partition_mode != PARTITION_VERT_3) vert_3_allowed_sdp = 0; + + // TODO(yuec): Need to make sure there is at least one valid partition + // mode + } +#else // CONFIG_EXT_RECUR_PARTITIONS const CommonModeInfoParams *const mi_params = &cm->mi_params; if (xd->tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) { luma_split_flag = get_luma_split_flag(bsize, mi_params, mi_row, mi_col); @@ -3388,25 +4837,50 @@ part_search_state.partition_rect_allowed[HORZ] = 0; part_search_state.partition_rect_allowed[VERT] = 0; } +#endif // CONFIG_EXT_RECUR_PARTITIONS #endif // Partition search BEGIN_PARTITION_SEARCH: - // If a valid partition is required, usually when the first round cannot find - // a valid one under the cost limit after pruning, reset the limitations on - // partition types. + // If a valid partition is required, usually when the first round cannot + // find a valid one under the cost limit after pruning, reset the + // limitations on partition types. if (x->must_find_valid_partition) - reset_part_limitations(cpi, &part_search_state); + reset_part_limitations(cpi, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + xd->tree_type, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_EXT_RECUR_PARTITIONS + pc_tree, +#endif // CONFIG_EXT_RECUR_PARTITIONS + &part_search_state); // Partition block source pixel variance. unsigned int pb_source_variance = UINT_MAX; // PARTITION_NONE search stage. +#if CONFIG_EXT_RECUR_PARTITIONS + if (cpi->sf.part_sf.enable_fast_erp && !frame_is_intra_only(cm)) { + const SimpleMotionData *whole = + av1_get_sms_data(cpi, tile_info, x, mi_row, mi_col, bsize); + part_search_state.none_data.sms_data[0] = whole; + part_search_state.none_data.num_sub_parts = 1; + part_search_state.none_data.part_rate = + part_search_state.partition_cost[PARTITION_NONE]; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS int64_t part_none_rd = INT64_MAX; - none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, - &part_search_state, &best_rdc, &pb_source_variance, - none_rd, &part_none_rd); +#if CONFIG_EXT_RECUR_PARTITIONS + if (IS_FORCED_PARTITION_TYPE(PARTITION_NONE)) { +#endif // CONFIG_EXT_RECUR_PARTITIONS + none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, + &part_search_state, &best_rdc, &pb_source_variance, + none_rd, &part_none_rd); +#if CONFIG_EXT_RECUR_PARTITIONS + } +#endif // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS // PARTITION_SPLIT search stage. int64_t part_split_rd = INT64_MAX; split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx, @@ -3424,14 +4898,21 @@ // Prune partitions based on PARTITION_NONE and PARTITION_SPLIT. prune_partitions_after_split(cpi, x, sms_tree, &part_search_state, &best_rdc, part_none_rd, part_split_rd); +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Rectangular partitions search stage. rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx, &part_search_state, &best_rdc, +#if CONFIG_EXT_RECUR_PARTITIONS + multi_pass_mode, +#if CONFIG_SDP + ptree_luma, template_tree, +#endif // CONFIG_SDP +#endif // CONFIG_EXT_RECUR_PARTITIONS rect_part_win_info); if (pb_source_variance == UINT_MAX) { - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); + av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, NULL); if (is_cur_buf_hbd(xd)) { pb_source_variance = av1_high_get_sby_perpixel_variance( cpi, &x->plane[0].src, bsize, xd->bd); @@ -3444,6 +4925,7 @@ assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions, !part_search_state.do_rectangular_split)); +#if !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_SDP const int ext_partition_allowed = part_search_state.do_rectangular_split && @@ -3454,7 +4936,7 @@ part_search_state.do_rectangular_split && bsize > cpi->sf.part_sf.ext_partition_eval_thresh && blk_params.has_rows && blk_params.has_cols; -#endif +#endif // CONFIG_SDP // AB partitions search stage. ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, @@ -3464,18 +4946,13 @@ // 4-way partitions search stage. int part4_search_allowed[NUM_PART4_TYPES] = { 1, 1 }; - // Disable 4-way partition search flags for width less than twice the minimum - // width. -#if CONFIG_SDP + // Disable 4-way partition search flags for width less than twice the + // minimum width. #if CONFIG_SDP if (blk_params.width < (blk_params.min_partition_size_1d << 2) || (xd->tree_type == CHROMA_PART && bsize <= BLOCK_16X16) || (luma_split_flag > 3)) { #else - if (blk_params.width < (blk_params.min_partition_size_1d << 2) || - (xd->tree_type == CHROMA_PART && bsize <= BLOCK_16X16)) { -#endif -#else if (blk_params.width < (blk_params.min_partition_size_1d << 2)) { #endif part4_search_allowed[HORZ4] = 0; @@ -3516,6 +4993,69 @@ pc_tree->vertical4, &part_search_state, &best_rdc, inc_step, PARTITION_VERT_4); } +#endif // !CONFIG_EXT_RECUR_PARTITIONS + +#if CONFIG_EXT_RECUR_PARTITIONS + const int ext_partition_allowed = + (blk_params.has_rows && blk_params.has_cols) || !is_square_block(bsize); + const int partition_3_allowed = + ext_partition_allowed && bsize != BLOCK_128X128; + const int is_wide_block = block_size_wide[bsize] > block_size_high[bsize]; + const int is_tall_block = block_size_wide[bsize] < block_size_high[bsize]; + const int horz_3_allowed = + partition_3_allowed && (is_square_block(bsize) || is_tall_block) && +#if CONFIG_SDP + horz_3_allowed_sdp && +#endif // CONFIG_SDP + check_is_chroma_size_valid(PARTITION_HORZ_3, bsize, mi_row, mi_col, + part_search_state.ss_x, part_search_state.ss_y, + pc_tree) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_3), + blk_params.min_partition_size) && + IMPLIES(cpi->sf.part_sf.prune_part_3_with_part_none, + frame_is_intra_only(cm) || forced_partition == PARTITION_HORZ_3 || + pc_tree->partitioning != PARTITION_NONE); + + const int vert_3_allowed = + partition_3_allowed && (is_square_block(bsize) || is_wide_block) && +#if CONFIG_SDP + vert_3_allowed_sdp && +#endif // CONFIG_SDP + check_is_chroma_size_valid(PARTITION_VERT_3, bsize, mi_row, mi_col, + part_search_state.ss_x, part_search_state.ss_y, + pc_tree) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_3), + blk_params.min_partition_size) && + IMPLIES(cpi->sf.part_sf.prune_part_3_with_part_none, + frame_is_intra_only(cm) || forced_partition == PARTITION_VERT_3 || + pc_tree->partitioning != PARTITION_NONE); + + // PARTITION_HORZ_3 + if (IS_FORCED_PARTITION_TYPE(PARTITION_HORZ_3) && horz_3_allowed) { + search_partition_horz_3( + &part_search_state, cpi, td, tile_data, tp, &best_rdc, pc_tree, +#if CONFIG_SDP + (ptree_luma && ptree_luma->partition == PARTITION_HORZ_3) ? ptree_luma + : NULL, + + template_tree, +#endif // CONFIG_SDP + &x_ctx, multi_pass_mode); + } + + // PARTITION_VERT_3 + if (IS_FORCED_PARTITION_TYPE(PARTITION_VERT_3) && vert_3_allowed) { + search_partition_vert_3( + &part_search_state, cpi, td, tile_data, tp, &best_rdc, pc_tree, +#if CONFIG_SDP + (ptree_luma && ptree_luma->partition == PARTITION_VERT_3) ? ptree_luma + : NULL, + + template_tree, +#endif // CONFIG_SDP + &x_ctx, multi_pass_mode); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS if (bsize == cm->seq_params.sb_size && !part_search_state.found_best_partition) { @@ -3530,10 +5070,22 @@ // Store the final rd cost *rd_cost = best_rdc; + pc_tree->rd_cost = best_rdc; + if (!part_search_state.found_best_partition) { + av1_invalid_rd_stats(&pc_tree->rd_cost); + } else { +#if CONFIG_EXT_RECUR_PARTITIONS + av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize, + cm->seq_params.sb_size, pc_tree->partitioning); +#endif // CONFIG_EXT_RECUR_PARTITIONS + } // Also record the best partition in simple motion data tree because it is // necessary for the related speed features. - sms_tree->partitioning = pc_tree->partitioning; +#if CONFIG_EXT_RECUR_PARTITIONS + if (sms_tree) +#endif // CONFIG_EXT_RECUR_PARTITIONS + sms_tree->partitioning = pc_tree->partitioning; #if CONFIG_SDP if (luma_split_flag > 3) { @@ -3566,8 +5118,9 @@ #endif #if CONFIG_COLLECT_PARTITION_STATS == 2 - // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for - // the whole clip. So we need to pass the information upstream to the encoder. + // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats + // for the whole clip. So we need to pass the information upstream to the + // encoder. const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize); int *agg_attempts = part_stats->partition_attempts[bsize_idx]; int *agg_decisions = part_stats->partition_decisions[bsize_idx]; @@ -3585,31 +5138,57 @@ // If a valid partition is found and reconstruction is required for future // sub-blocks in the same group. if (part_search_state.found_best_partition && pc_tree->index != 3) { +#if CONFIG_EXT_RECUR_PARTITIONS + assert(pc_tree->partitioning != PARTITION_SPLIT); +#endif // CONFIG_EXT_RECUR_PARTITIONS if (bsize == cm->seq_params.sb_size) { // Encode the superblock. const int emit_output = multi_pass_mode != SB_DRY_PASS; const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL; #if CONFIG_SDP - x->cb_offset[xd->tree_type == CHROMA_PART] = 0; -#else - x->cb_offset = 0; -#endif + const int plane_start = (xd->tree_type == CHROMA_PART); + const int plane_end = (xd->tree_type == LUMA_PART) ? 1 : num_planes; + for (int plane = plane_start; plane < plane_end; plane++) { + x->cb_offset[plane] = 0; + } + av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize, - pc_tree, NULL); - // Dealloc the whole PC_TREE after a superblock is done. + pc_tree, xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)], +#if CONFIG_EXT_RECUR_PARTITIONS + xd->tree_type == CHROMA_PART ? xd->sbi->ptree_root[0] : NULL, +#endif // CONFIG_EXT_RECUR_PARTITIONS + NULL); +#else + memset(x->cb_offset, 0, sizeof(x->cb_offset)); + av1_reset_ptree_in_sbi(xd->sbi); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize, + pc_tree, xd->sbi->ptree_root, NULL); +#endif // CONFIG_SDP + // Dealloc the whole PC_TREE after a superblock is done. av1_free_pc_tree_recursive(pc_tree, num_planes, 0, 0); pc_tree_dealloc = 1; } else { // Encode the smaller blocks in DRY_RUN mode. encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, - pc_tree, NULL); + pc_tree, NULL, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + NULL, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + NULL); } } + int keep_tree = 0; +#if CONFIG_EXT_RECUR_PARTITIONS + keep_tree = should_reuse_mode(x, REUSE_INTER_MODE_IN_INTERFRAME_FLAG | + REUSE_INTRA_MODE_IN_INTERFRAME_FLAG); +#endif // CONFIG_EXT_RECUR_PARTITIONS + // If the tree still exists (non-superblock), dealloc most nodes, only keep // nodes for the best partition and PARTITION_NONE. - if (pc_tree_dealloc == 0) + if (!pc_tree_dealloc && !keep_tree) { av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1); + } if (bsize == cm->seq_params.sb_size) { assert(best_rdc.rate < INT_MAX);
diff --git a/av1/encoder/partition_search.h b/av1/encoder/partition_search.h index 9d1aa2d..8f5c342 100644 --- a/av1/encoder/partition_search.h +++ b/av1/encoder/partition_search.h
@@ -15,25 +15,41 @@ #include "av1/encoder/block.h" #include "av1/encoder/encoder.h" #include "av1/encoder/encodeframe.h" +#include "av1/encoder/encodeframe_utils.h" #include "av1/encoder/tokenize.h" void av1_set_offsets_without_segment_id(const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, - int mi_col, BLOCK_SIZE bsize); + int mi_col, BLOCK_SIZE bsize, + const CHROMA_REF_INFO *chr_ref_info); void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, int mi_col, - BLOCK_SIZE bsize); + BLOCK_SIZE bsize, const CHROMA_REF_INFO *chr_ref_info); void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MB_MODE_INFO **mib, TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, - int64_t *dist, int do_recon, PC_TREE *pc_tree); + int64_t *dist, int do_recon, PARTITION_TREE *ptree, + PC_TREE *pc_tree); bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, TokenExtra **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost, RD_STATS best_rdc, PC_TREE *pc_tree, +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, +#endif // CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS SIMPLE_MOTION_DATA_TREE *sms_tree, int64_t *none_rd, SB_MULTI_PASS_MODE multi_pass_mode, RD_RECT_PART_WIN_INFO *rect_part_win_info); +#if CONFIG_EXT_RECUR_PARTITIONS +void av1_build_partition_tree_fixed_partitioning(AV1_COMMON *const cm, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + PARTITION_TREE *ptree); +#endif // CONFIG_EXT_RECUR_PARTITIONS +void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, + int mi_row, int mi_col, BLOCK_SIZE bsize, + AQ_MODE aq_mode, MB_MODE_INFO *mbmi); #endif // AOM_AV1_ENCODER_PARTITION_SEARCH_H_
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c index 7499e87..bd36b38 100644 --- a/av1/encoder/partition_strategy.c +++ b/av1/encoder/partition_strategy.c
@@ -24,8 +24,12 @@ #include "av1/encoder/encoder.h" #include "av1/encoder/motion_search_facade.h" -#include "av1/encoder/partition_strategy.h" +#include "av1/encoder/partition_search.h" #include "av1/encoder/rdopt.h" +#if CONFIG_EXT_RECUR_PARTITIONS +#include "av1/common/idct.h" +#include "av1/encoder/hybrid_fwd_txfm.h" +#endif // CONFIG_EXT_RECUR_PARTITIONS static AOM_INLINE void simple_motion_search_prune_part_features( AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, @@ -834,9 +838,11 @@ get_min_bsize(sms_tree->split[i], min_bw, min_bh); } } else { +#if !CONFIG_EXT_RECUR_PARTITIONS if (part_type == PARTITION_HORZ_A || part_type == PARTITION_HORZ_B || part_type == PARTITION_VERT_A || part_type == PARTITION_VERT_B) part_type = PARTITION_SPLIT; +#endif // !CONFIG_EXT_RECUR_PARTITIONS const BLOCK_SIZE subsize = get_partition_subsize(bsize, part_type); if (subsize != BLOCK_INVALID) { *min_bw = AOMMIN(*min_bw, mi_size_wide_log2[subsize]); @@ -1192,10 +1198,15 @@ unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; { +#if CONFIG_EXT_RECUR_PARTITIONS + BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_3); + BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_3); +#else // CONFIG_EXT_RECUR_PARTITIONS BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); +#endif // CONFIG_EXT_RECUR_PARTITIONS av1_setup_src_planes(x, cpi->source, mi_row, mi_col, - av1_num_planes(&cpi->common), bsize); + av1_num_planes(&cpi->common), NULL); const int src_stride = x->plane[0].src.stride; uint8_t *src = x->plane[0].src.buf; const MACROBLOCKD *const xd = &x->e_mbd; @@ -1410,12 +1421,6 @@ } } -#ifndef NDEBUG -static AOM_INLINE int is_bsize_square(BLOCK_SIZE bsize) { - return block_size_wide[bsize] == block_size_high[bsize]; -} -#endif // NDEBUG - void av1_prune_partitions_by_max_min_bsize( SuperBlockEnc *sb_enc, BLOCK_SIZE bsize, int is_not_edge_block, int *partition_none_allowed, int *partition_horz_allowed, @@ -1423,19 +1428,38 @@ assert(is_bsize_square(sb_enc->max_partition_size)); assert(is_bsize_square(sb_enc->min_partition_size)); assert(sb_enc->min_partition_size <= sb_enc->max_partition_size); +#if !CONFIG_EXT_RECUR_PARTITIONS assert(is_bsize_square(bsize)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS const int max_partition_size_1d = block_size_wide[sb_enc->max_partition_size]; const int min_partition_size_1d = block_size_wide[sb_enc->min_partition_size]; const int bsize_1d = block_size_wide[bsize]; assert(min_partition_size_1d <= max_partition_size_1d); const int is_le_min_sq_part = bsize_1d <= min_partition_size_1d; +#if CONFIG_EXT_RECUR_PARTITIONS + const int block_height = block_size_high[bsize]; + const int block_width = block_size_wide[bsize]; + const int is_gt_max_sq_part = (block_height > max_partition_size_1d) || + (block_width > max_partition_size_1d); +#else // CONFIG_EXT_RECUR_PARTITIONS const int is_gt_max_sq_part = bsize_1d > max_partition_size_1d; +#endif // CONFIG_EXT_RECUR_PARTITIONS + +#if CONFIG_EXT_RECUR_PARTITIONS + (void)do_square_split; + (void)is_not_edge_block; +#endif if (is_gt_max_sq_part) { // If current block size is larger than max, only allow split. *partition_none_allowed = 0; +#if CONFIG_EXT_RECUR_PARTITIONS + *partition_horz_allowed = 1; + *partition_vert_allowed = 1; +#else // CONFIG_EXT_RECUR_PARTITIONS *partition_horz_allowed = 0; *partition_vert_allowed = 0; *do_square_split = 1; +#endif // CONFIG_EXT_RECUR_PARTITIONS } else if (is_le_min_sq_part) { // If current block size is less or equal to min, only allow none if valid // block large enough; only allow split otherwise. @@ -1443,8 +1467,12 @@ *partition_vert_allowed = 0; // only disable square split when current block is not at the picture // boundary. otherwise, inherit the square split flag from previous logic +#if CONFIG_EXT_RECUR_PARTITIONS + *partition_none_allowed = 1; +#else // CONFIG_EXT_RECUR_PARTITIONS if (is_not_edge_block) *do_square_split = 0; *partition_none_allowed = !(*do_square_split); +#endif // CONFIG_EXT_RECUR_PARTITIONS } } @@ -1626,3 +1654,378 @@ pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 1, 3); } } + +#if CONFIG_EXT_RECUR_PARTITIONS +// Gets the number of sms data in a single dimension +static INLINE int get_sms_count_from_length(int mi_length) { + switch (mi_length) { + case 32: return BLOCK_128_COUNT; + case 16: return BLOCK_64_COUNT; + case 8: return BLOCK_32_COUNT; + case 4: return BLOCK_16_COUNT; + case 2: return BLOCK_8_COUNT; + case 1: return BLOCK_4_COUNT; + default: assert(0 && "Invalid mi_width"); return -1; + } +} + +// Gets the linear index corresponds to the current block. +static INLINE int get_sms_arr_1d_idx(int mi_bsize, int mi_in_sb) { + int idx = -1; + if (mi_bsize == 1) { + idx = mi_in_sb; + } else { + assert(mi_in_sb % (mi_bsize / 2) == 0); + idx = mi_in_sb / (mi_bsize / 2); + } + assert(idx >= 0 && idx < get_sms_count_from_length(mi_bsize)); + + return idx; +} + +#define MAKE_SMS_ARR_SWITCH_CASE(width, height) \ + case BLOCK_##width##X##height: { \ + return sms_bufs->b_##width##x##height; \ + } + +// Returns the buffer in SimpleMotionDataBufs that correspond to bsize. +static INLINE SimpleMotionData *get_sms_arr(SimpleMotionDataBufs *sms_bufs, + BLOCK_SIZE bsize) { + switch (bsize) { + // Square blocks + MAKE_SMS_ARR_SWITCH_CASE(128, 128); + MAKE_SMS_ARR_SWITCH_CASE(64, 64); + MAKE_SMS_ARR_SWITCH_CASE(32, 32); + MAKE_SMS_ARR_SWITCH_CASE(16, 16); + MAKE_SMS_ARR_SWITCH_CASE(8, 8); + MAKE_SMS_ARR_SWITCH_CASE(4, 4); + + // 1:2 blocks + MAKE_SMS_ARR_SWITCH_CASE(64, 128); + MAKE_SMS_ARR_SWITCH_CASE(32, 64); + MAKE_SMS_ARR_SWITCH_CASE(16, 32); + MAKE_SMS_ARR_SWITCH_CASE(8, 16); + MAKE_SMS_ARR_SWITCH_CASE(4, 8); + + // 2:1 blocks + MAKE_SMS_ARR_SWITCH_CASE(128, 64); + MAKE_SMS_ARR_SWITCH_CASE(64, 32); + MAKE_SMS_ARR_SWITCH_CASE(32, 16); + MAKE_SMS_ARR_SWITCH_CASE(16, 8); + MAKE_SMS_ARR_SWITCH_CASE(8, 4); + + // 1:4 blocks + MAKE_SMS_ARR_SWITCH_CASE(16, 64); + MAKE_SMS_ARR_SWITCH_CASE(8, 32); + MAKE_SMS_ARR_SWITCH_CASE(4, 16); + + // 4:1 blocks + MAKE_SMS_ARR_SWITCH_CASE(64, 16); + MAKE_SMS_ARR_SWITCH_CASE(32, 8); + MAKE_SMS_ARR_SWITCH_CASE(16, 4); + + default: assert(0 && "Invalid bsize"); return NULL; + } +} +#undef MAKE_SMS_ARR_SWITCH_CASE + +// Retrieves the SimpleMotionData from SimpleMotionDataBufs +SimpleMotionData *av1_get_sms_data_entry(SimpleMotionDataBufs *sms_bufs, + int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE sb_size) { + assert(mi_size_high[sb_size] == mi_size_wide[sb_size]); + const int mi_in_sb = mi_size_high[sb_size]; + const int mi_row_in_sb = mi_row % mi_in_sb; + const int mi_col_in_sb = mi_col % mi_in_sb; + const int mi_high = mi_size_high[bsize]; + const int mi_wide = mi_size_wide[bsize]; + const int idx_row_in_sb = get_sms_arr_1d_idx(mi_high, mi_row_in_sb); + const int idx_col_in_sb = get_sms_arr_1d_idx(mi_wide, mi_col_in_sb); + const int arr_stride = get_sms_count_from_length(mi_wide); + SimpleMotionData *sms_arr = get_sms_arr(sms_bufs, bsize); + return &sms_arr[idx_row_in_sb * arr_stride + idx_col_in_sb]; +} + +void av1_cache_best_partition(SimpleMotionDataBufs *sms_bufs, int mi_row, + int mi_col, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, + PARTITION_TYPE partition) { + SimpleMotionData *cur_block = + av1_get_sms_data_entry(sms_bufs, mi_row, mi_col, bsize, sb_size); + cur_block->has_prev_partition = 1; + cur_block->prev_partition = partition; +} + +// Performs a simple motion search and store the result in sms_data. +static void compute_sms_data(AV1_COMP *const cpi, const TileInfo *const tile, + MACROBLOCK *x, SimpleMotionData *sms_data, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + const AV1_COMMON *const cm = &cpi->common; + const int ref_frame = + cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; + if (mi_col >= cm->mi_params.mi_cols || mi_row >= cm->mi_params.mi_rows) { + // If the whole block is outside of the image, set the var and sse to 0. + sms_data->sse = 0; + sms_data->var = 0; + sms_data->dist = 0; + sms_data->rate = 0; + sms_data->rdcost = 0; + sms_data->valid = 1; + return; + } + av1_set_offsets(cpi, tile, x, mi_row, mi_col, bsize, NULL); + // We need to update the rd-mult here to in case we are doing simple motion + // search on a subblock of the current coding block. + const int orig_rdmult = x->rdmult; + const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode; + MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; + setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi); + // Set error per bit for current rdmult + av1_set_error_per_bit(&x->mv_costs, x->rdmult); + if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { + const MACROBLOCKD *xd = &x->e_mbd; + const uint8_t *src_buf = x->plane[0].src.buf; + const uint8_t *dst_buf = xd->plane[0].dst.buf; + const int src_stride = x->plane[0].src.stride; + const int dst_stride = xd->plane[0].dst.stride; + if (sms_data->num_start_mvs == 0) { + sms_data->start_mv_list[sms_data->num_start_mvs++] = kZeroMv; + } + sms_data->rdcost = INT64_MAX; + SimpleMotionData best_data = *sms_data; + for (int idx = 0; idx < sms_data->num_start_mvs; idx++) { + const MV start_mv = sms_data->start_mv_list[idx]; + const FULLPEL_MV start_mv_full = get_fullmv_from_mv(&start_mv); + av1_simple_motion_search_ext(cpi, tile, x, mi_row, mi_col, bsize, + ref_frame, start_mv_full, 1, 1, sms_data); + sms_data->var = cpi->fn_ptr[bsize].vf(src_buf, src_stride, dst_buf, + dst_stride, &sms_data->sse); + sms_data->dist = 16 * sms_data->sse; + sms_data->rate = 0; + sms_data->rdcost = RDCOST(x->rdmult, sms_data->rate, sms_data->dist); + if (sms_data->rdcost <= best_data.rdcost) { + best_data = *sms_data; + } + } + *sms_data = best_data; + } + sms_data->valid = 1; + sms_data->bsize = bsize; + sms_data->mi_row = mi_row; + sms_data->mi_col = mi_col; + x->rdmult = orig_rdmult; + return; +} + +#if CONFIG_DEBUG +static INLINE void print_sms(const SimpleMotionData *sms_data, char *prefix) { + BLOCK_SIZE bsize = sms_data->bsize; + MV fullmv = sms_data->fullmv; + MV submv = sms_data->submv; + printf("%s:: bsize: (%d, %d), mi_row: %d, mi_col: %d, rd: %ld\n", prefix, + block_size_wide[bsize], block_size_high[bsize], sms_data->mi_row, + sms_data->mi_col, sms_data->rdcost); + printf("%s:: fullmv: (%d, %d), submv: (%d, %d),\n", prefix, fullmv.row, + fullmv.col, submv.row, submv.col); + printf("%s:: mv_cost_type: %d, sadpb: %d, errpb: %d\n", prefix, + sms_data->mv_cost_type, sms_data->sadpb, sms_data->errorperbit); +} +#endif + +static INLINE void add_start_mv_to_block(SimpleMotionData *block, MV start_mv) { + if (block->num_start_mvs == kSMSMaxStartMVs) { + return; + } + for (int idx = 0; idx < block->num_start_mvs; idx++) { + const int_mv *cur_mv = (int_mv *)&block->start_mv_list[idx]; + if (((int_mv *)&start_mv)->as_int == cur_mv->as_int) { + return; + } + } + block->start_mv_list[block->num_start_mvs++] = start_mv; +} + +static INLINE void add_start_mv_to_partition( + SimpleMotionDataBufs *sms_bufs, int mi_row, int mi_col, BLOCK_SIZE bsize, + BLOCK_SIZE sb_size, PARTITION_TYPE partition, MV start_mv) { + assert(bsize < BLOCK_SIZES_ALL); + const int quarter_step_h = block_size_high[bsize] / 4; + const int quarter_step_w = block_size_wide[bsize] / 4; + static const int subblock_count[EXT_PARTITION_TYPES] = { + 1, // PARTITION_NONE + 2, // PARTITION_HORZ + 2, // PARTITION_VERT + 3, // PARTITION_HORZ_3 + 3, // PARTITION_VERT_3 + }; + // PARTITION x NUM_SUBBLOCKS x (ROW and COL) + static const int step_multiplier[EXT_PARTITION_TYPES][3][2] = { + { { 0, 0 }, { 0, 0 }, { 0, 0 } }, // PARTITION_NONE + { { 0, 0 }, { 2, 0 }, { 0, 0 } }, // PARTITION_HORZ + { { 0, 0 }, { 0, 2 }, { 0, 0 } }, // PARTITION_VERT + { { 0, 0 }, { 1, 0 }, { 3, 0 } }, // PARTITION_HORZ_3 + { { 0, 0 }, { 0, 1 }, { 0, 3 } }, // PARTITION_VERT_3 + }; + for (int idx = 0; idx < subblock_count[partition]; idx++) { + BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); + if (subsize == BLOCK_INVALID) { + return; + } else if (partition == PARTITION_HORZ_3 && idx == 1) { + subsize = get_partition_subsize(bsize, PARTITION_HORZ); + } else if (partition == PARTITION_VERT_3 && idx == 1) { + subsize = get_partition_subsize(bsize, PARTITION_VERT); + } + const int sub_row = + mi_row + step_multiplier[partition][idx][0] * quarter_step_h / 4; + const int sub_col = + mi_col + step_multiplier[partition][idx][1] * quarter_step_w / 4; + SimpleMotionData *subblock = + av1_get_sms_data_entry(sms_bufs, sub_row, sub_col, subsize, sb_size); + add_start_mv_to_block(subblock, start_mv); + } +} + +// Computes and stores the simple motion search data for the block at mi_row, +// mi_col with block size bsize. +SimpleMotionData *av1_get_sms_data(AV1_COMP *const cpi, + const TileInfo *const tile, MACROBLOCK *x, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + const AV1_COMMON *const cm = &cpi->common; + const BLOCK_SIZE sb_size = cm->seq_params.sb_size; + SimpleMotionDataBufs *sms_bufs = x->sms_bufs; + SimpleMotionData *cur_block = + av1_get_sms_data_entry(sms_bufs, mi_row, mi_col, bsize, sb_size); + const int valid = cur_block->valid; + if (!valid) { + compute_sms_data(cpi, tile, x, cur_block, mi_row, mi_col, bsize); + for (PARTITION_TYPE partition = PARTITION_NONE; + partition < EXT_PARTITION_TYPES; partition++) { + add_start_mv_to_partition(sms_bufs, mi_row, mi_col, bsize, sb_size, + partition, cur_block->fullmv); + } + } + return cur_block; +} + +PARTITION_TYPE av1_get_prev_partition(MACROBLOCK *x, int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE sb_size) { + SimpleMotionDataBufs *sms_bufs = x->sms_bufs; + const SimpleMotionData *cur_block = + av1_get_sms_data_entry(sms_bufs, mi_row, mi_col, bsize, sb_size); + if (cur_block->has_prev_partition) { + return cur_block->prev_partition; + } else { + return PARTITION_INVALID; + } +} + +static INLINE void gather_part_rd_stats(RD_STATS *rd_stats, + const SMSPartitionStats *stat, + int rdmult) { + av1_init_rd_stats(rd_stats); + if (stat->part_rate < INT_MAX) { + // rd_stats->rate += part_rate; + } else { + rd_stats->rate = INT_MAX; + rd_stats->rdcost = INT64_MAX; + return; + } + for (int idx = 0; idx < stat->num_sub_parts; idx++) { + rd_stats->rate += stat->sms_data[idx]->rate; + rd_stats->dist += stat->sms_data[idx]->dist; + } + rd_stats->rdcost = RDCOST(rdmult, rd_stats->rate, rd_stats->dist); +} + +/*! \brief Checks if the average linear dimension of bsize is greater than or + * equal to dim. */ +static INLINE int is_avg_dim_greater_than(BLOCK_SIZE bsize, int dim) { + if (bsize == BLOCK_INVALID) { + return 0; + } + const int avg_dim = (block_size_wide[bsize] + block_size_high[bsize]) / 2; + return avg_dim > dim; +} + +int av1_prune_new_part(const SMSPartitionStats *old_part, + const SMSPartitionStats *new_part, int rdmult, + BLOCK_SIZE bsize, const SPEED_FEATURES *sf) { + RD_STATS old_rd_stat, new_rd_stat; + gather_part_rd_stats(&old_rd_stat, old_part, rdmult); + gather_part_rd_stats(&new_rd_stat, new_part, rdmult); + if (sf->part_sf.enable_fast_erp < 2 && is_avg_dim_greater_than(bsize, 32)) { + return old_rd_stat.rdcost < new_rd_stat.rdcost; + } + return old_rd_stat.rdcost < (int)(1.001 * new_rd_stat.rdcost); +} + +bool av1_prune_part_hv_with_sms(AV1_COMP *const cpi, TileDataEnc *tile_data, + MACROBLOCK *x, + const PartitionSearchState *part_search_state, + const RD_STATS *best_rdc, + const PartitionBlkParams *blk_params, + RECT_PART_TYPE rect_type, int part_rate) { + const AV1_COMMON *const cm = &cpi->common; + const int blk_offsets[NUM_RECT_PARTS][2] = { // HORZ + { blk_params->mi_step_h, 0 }, + // VERT + { 0, blk_params->mi_step_w } + }; + + SMSPartitionStats part_data; + const SimpleMotionData *blk1 = + av1_get_sms_data(cpi, &tile_data->tile_info, x, blk_params->mi_row, + blk_params->mi_col, blk_params->subsize); + const SimpleMotionData *blk2 = av1_get_sms_data( + cpi, &tile_data->tile_info, x, + blk_params->mi_row + blk_offsets[rect_type][0], + blk_params->mi_col + blk_offsets[rect_type][1], blk_params->subsize); + part_data.sms_data[0] = blk1; + part_data.sms_data[1] = blk2; + part_data.num_sub_parts = 2; + part_data.part_rate = part_rate; + + if (best_rdc->rdcost < INT64_MAX && + (blk_params->mi_row + 2 * blk_params->mi_step_h <= + cm->mi_params.mi_rows) && + (blk_params->mi_col + 2 * blk_params->mi_step_w <= + cm->mi_params.mi_cols) && + av1_prune_new_part(&part_search_state->none_data, &part_data, x->rdmult, + blk_params->bsize, &cpi->sf)) { + const PARTITION_TYPE second_level_part = + (rect_type == HORZ) ? PARTITION_VERT : PARTITION_HORZ; + const BLOCK_SIZE subsubsize = + get_partition_subsize(blk_params->subsize, second_level_part); + if (subsubsize == BLOCK_INVALID) { + return true; + } + + // Do one more check to deal with recursion + SMSPartitionStats subpart_data; + const SimpleMotionData *upleft = + av1_get_sms_data(cpi, &tile_data->tile_info, x, blk_params->mi_row, + blk_params->mi_col, subsubsize); + const SimpleMotionData *upright = av1_get_sms_data( + cpi, &tile_data->tile_info, x, blk_params->mi_row, + blk_params->mi_col + blk_params->mi_step_w, subsubsize); + const SimpleMotionData *downleft = + av1_get_sms_data(cpi, &tile_data->tile_info, x, + blk_params->mi_row + blk_params->mi_step_h, + blk_params->mi_col, subsubsize); + const SimpleMotionData *downright = av1_get_sms_data( + cpi, &tile_data->tile_info, x, + blk_params->mi_row + blk_params->mi_step_h, + blk_params->mi_col + blk_params->mi_step_w, subsubsize); + subpart_data.sms_data[0] = upleft; + subpart_data.sms_data[1] = upright; + subpart_data.sms_data[2] = downleft; + subpart_data.sms_data[3] = downright; + subpart_data.num_sub_parts = 4; + subpart_data.part_rate = 0; + if (av1_prune_new_part(&part_search_state->none_data, &subpart_data, + x->rdmult, blk_params->bsize, &cpi->sf)) { + return true; + } + } + return false; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h index a386bf9..1587b67 100644 --- a/av1/encoder/partition_strategy.h +++ b/av1/encoder/partition_strategy.h
@@ -12,6 +12,7 @@ #ifndef AOM_AV1_ENCODER_PARTITION_STRATEGY_H_ #define AOM_AV1_ENCODER_PARTITION_STRATEGY_H_ +#include "av1/encoder/block.h" #include "av1/encoder/encodeframe.h" #include "av1/encoder/encodemb.h" #include "av1/encoder/encoder.h" @@ -191,6 +192,67 @@ int *horza_partition_allowed, int *horzb_partition_allowed, int *verta_partition_allowed, int *vertb_partition_allowed); +#if CONFIG_EXT_RECUR_PARTITIONS +SimpleMotionData *av1_get_sms_data_entry(SimpleMotionDataBufs *sms_bufs, + int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE sb_size); +SimpleMotionData *av1_get_sms_data(AV1_COMP *const cpi, + const TileInfo *const tile, MACROBLOCK *x, + int mi_row, int mi_col, BLOCK_SIZE bsize); + +static AOM_INLINE void av1_add_mode_search_context_to_cache( + SimpleMotionData *sms_data, PICK_MODE_CONTEXT *ctx) { + if (!sms_data->mode_cache[0] || + sms_data->mode_cache[0]->rd_stats.rdcost > ctx->rd_stats.rdcost) { + sms_data->mode_cache[0] = ctx; + } +} + +static INLINE void av1_set_best_mode_cache(MACROBLOCK *x, + PICK_MODE_CONTEXT *mode_cache[1]) { + if (mode_cache[0] && mode_cache[0]->rd_stats.rate != INT_MAX) { + x->inter_mode_cache = &mode_cache[0]->mic; + } else { + x->inter_mode_cache = NULL; + } +} + +typedef struct SMSPartitionStats { + const SimpleMotionData *sms_data[4]; + int num_sub_parts; + int part_rate; +} SMSPartitionStats; + +static INLINE void av1_init_sms_partition_stats(SMSPartitionStats *stats) { + memset(stats->sms_data, 0, sizeof(stats->sms_data)); + stats->num_sub_parts = 0; + stats->part_rate = INT_MAX; +} + +// Returns 1 if we think the old part is better and we should prune new +// partition, 0 otherwise. +int av1_prune_new_part(const SMSPartitionStats *old_part, + const SMSPartitionStats *new_part, int rdmult, + BLOCK_SIZE bsize, const SPEED_FEATURES *sf); + +void av1_cache_best_partition(SimpleMotionDataBufs *sms_bufs, int mi_row, + int mi_col, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, + PARTITION_TYPE partition); + +void av1_copy_sms_part(const SimpleMotionData **part_dst, int *part_size_dst, + int *part_rate_dst, + const SimpleMotionData *const *part_src, + int part_size_src, int part_rate_src); + +struct PartitionBlkParams; +struct PartitionSearchState; +bool av1_prune_part_hv_with_sms( + AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x, + const struct PartitionSearchState *part_search_state, + const RD_STATS *best_rdc, const struct PartitionBlkParams *blk_params, + RECT_PART_TYPE rect_type, int part_rate); +#endif // CONFIG_EXT_RECUR_PARTITIONS + // A simplified version of set_offsets meant to be used for // simple_motion_search. static INLINE void set_offsets_for_motion_search(const AV1_COMP *const cpi, @@ -208,15 +270,15 @@ mi_row, mi_col); // Set up destination pointers. - av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0, - num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, NULL); // Set up limit values for MV components. // Mv beyond the range do not produce new/different prediction block. av1_set_mv_limits(mi_params, &x->mv_limits, mi_row, mi_col, mi_height, mi_width, cpi->oxcf.border_in_pixels); - set_plane_n4(xd, mi_width, mi_height, num_planes); + set_plane_n4(xd, mi_width, mi_height, num_planes, NULL); xd->mi_row = mi_row; xd->mi_col = mi_col; @@ -231,7 +293,7 @@ GET_MV_SUBPEL((mi_params->mi_cols - mi_width - mi_col) * MI_SIZE); // Set up source buffers. - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); + av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, NULL); } static INLINE void init_simple_motion_search_mvs( @@ -250,6 +312,15 @@ } } +PARTITION_TYPE av1_get_prev_partition(MACROBLOCK *x, int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE sb_size); + +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE void av1_init_sms_data_bufs(SimpleMotionDataBufs *data_bufs) { + memset(data_bufs, 0, sizeof(*data_bufs)); +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + static INLINE int is_full_sb(const CommonModeInfoParams *const mi_params, int mi_row, int mi_col, BLOCK_SIZE sb_size) { const int sb_mi_wide = mi_size_wide[sb_size];
diff --git a/av1/encoder/pickccso.c b/av1/encoder/pickccso.c index 3b90267..b744b4d 100644 --- a/av1/encoder/pickccso.c +++ b/av1/encoder/pickccso.c
@@ -448,8 +448,8 @@ int64_t rdmult_temp = (int64_t)rdmult * (int64_t)rdmult_weight; if (rdmult_temp < INT_MAX) rdmult = (int)rdmult_temp; const int num_planes = av1_num_planes(cm); - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, &cm->cur_frame->buf, - 0, 0, 0, num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, 0, 0, 0, num_planes, + NULL); ccso_stride = xd->plane[0].dst.width; ccso_stride_ext = xd->plane[0].dst.width + (CCSO_PADDING_SIZE << 1); derive_ccso_filter(cm, AOM_PLANE_U, xd, org_uv[AOM_PLANE_U - 1], ext_rec_y,
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c index e88cf87..2fb36b0 100644 --- a/av1/encoder/pickcdef.c +++ b/av1/encoder/pickcdef.c
@@ -431,8 +431,7 @@ const int total_strengths = nb_cdef_strengths[pick_method]; DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); const int num_planes = av1_num_planes(cm); - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, - num_planes); + av1_setup_dst_planes(xd->plane, frame, 0, 0, 0, num_planes, NULL); uint64_t(*mse[2])[TOTAL_STRENGTHS]; mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb); mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c index 0e4e0a0..9da1e9e 100644 --- a/av1/encoder/rd.c +++ b/av1/encoder/rd.c
@@ -114,6 +114,12 @@ fc->partition_cdf[i], NULL); #endif +#if CONFIG_EXT_RECUR_PARTITIONS + for (i = 0; i < PARTITION_CONTEXTS_REC; ++i) + av1_cost_tokens_from_cdf(mode_costs->partition_rec_cost[i], + fc->partition_rec_cdf[i], NULL); +#endif // CONFIG_EXT_RECUR_PARTITIONS + if (cm->current_frame.skip_mode_info.skip_mode_flag) { for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) { av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i], @@ -1269,19 +1275,11 @@ const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; for (int i = 0; i < num_planes; ++i) { -#if CONFIG_SDP - setup_pred_plane(dst + i, xd->mi[0]->sb_type[i > 0 ? 1 : 0], dst[i].buf, - i ? src->uv_crop_width : src->y_crop_width, - i ? src->uv_crop_height : src->y_crop_height, - dst[i].stride, mi_row, mi_col, i ? scale_uv : scale, - xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); -#else - setup_pred_plane(dst + i, xd->mi[0]->sb_type, dst[i].buf, - i ? src->uv_crop_width : src->y_crop_width, - i ? src->uv_crop_height : src->y_crop_height, - dst[i].stride, mi_row, mi_col, i ? scale_uv : scale, - xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); -#endif + setup_pred_plane( + dst + i, dst[i].buf, i ? src->uv_crop_width : src->y_crop_width, + i ? src->uv_crop_height : src->y_crop_height, dst[i].stride, mi_row, + mi_col, i ? scale_uv : scale, xd->plane[i].subsampling_x, + xd->plane[i].subsampling_y, &xd->mi[0]->chroma_ref_info); } }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 5a8b113..4c39cc3 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -755,10 +755,15 @@ if (plane && !xd->is_chroma_ref) break; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_SDP +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bs = get_mb_plane_block_size( + xd, mbmi, plane, pd->subsampling_x, pd->subsampling_y); +#elif CONFIG_SDP const BLOCK_SIZE bs = get_plane_block_size( mbmi->sb_type[plane > 0], pd->subsampling_x, pd->subsampling_y); #else + // TODO(chiyotsai, yuec): This appears to be wrong when EXT_RECUR_PARTITIONS + // is on? const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); #endif @@ -3983,7 +3988,7 @@ } if (num_planes > 1) { - av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); + av1_txfm_uvrd(cpi, x, &rd_stats_uv, INT64_MAX); } else { av1_init_rd_stats(&rd_stats_uv); } @@ -4242,6 +4247,37 @@ return 0; } +static AOM_INLINE int is_ref_frame_used_by_compound_ref( + int ref_frame, int skip_ref_frame_mask) { + for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { + if (!(skip_ref_frame_mask & (1 << r))) { + const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; + if (rf[0] == ref_frame || rf[1] == ref_frame) { + return 1; + } + } + } + return 0; +} + +#if CONFIG_EXT_RECUR_PARTITIONS +static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame, + const MB_MODE_INFO *mi_cache) { + if (!mi_cache) { + return 0; + } + + if (ref_frame < REF_FRAMES) { + return (ref_frame == mi_cache->ref_frame[0] || + ref_frame == mi_cache->ref_frame[1]); + } + + // if we are here, then the current mode is compound. + MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame); + return ref_frame == cached_ref_type; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + // Please add/modify parameter setting in this function, making it consistent // and easy to read and maintain. static AOM_INLINE void set_params_rd_pick_inter_mode( @@ -4271,18 +4307,14 @@ if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) { - if (skip_ref_frame_mask & (1 << ref_frame)) { - int skip = 1; - for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { - if (!(skip_ref_frame_mask & (1 << r))) { - const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; - if (rf[0] == ref_frame || rf[1] == ref_frame) { - skip = 0; - break; - } - } - } - if (skip) continue; + if (skip_ref_frame_mask & (1 << ref_frame) && + !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) +#if CONFIG_EXT_RECUR_PARTITIONS + && !(should_reuse_mode(x, REUSE_INTER_MODE_IN_INTERFRAME_FLAG) && + is_ref_frame_used_in_cache(ref_frame, x->inter_mode_cache)) +#endif // CONFIG_EXT_RECUR_PARTITIONS + ) { + continue; } } assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL); @@ -4308,7 +4340,12 @@ if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) { - if (skip_ref_frame_mask & (1 << ref_frame)) { + if (skip_ref_frame_mask & (1 << ref_frame) +#if CONFIG_EXT_RECUR_PARTITIONS + && !(should_reuse_mode(x, REUSE_INTER_MODE_IN_INTERFRAME_FLAG) && + is_ref_frame_used_in_cache(ref_frame, x->inter_mode_cache)) +#endif // CONFIG_EXT_RECUR_PARTITIONS + ) { continue; } } @@ -4332,7 +4369,7 @@ if (cpi->oxcf.motion_mode_cfg.enable_obmc && !cpi->sf.inter_sf.disable_obmc && !prune_obmc) { if (check_num_overlappable_neighbors(mbmi) && - is_motion_variation_allowed_bsize(bsize)) { + is_motion_variation_allowed_bsize(bsize, mi_row, mi_col)) { int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 }; @@ -4346,8 +4383,8 @@ dst_width2, dst_height2, args->left_pred_stride); const int num_planes = av1_num_planes(cm); - av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, - mi_col, 0, num_planes); + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, &mbmi->chroma_ref_info); calc_target_weighted_pred( cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0], args->left_pred_buf[0], args->left_pred_stride[0]); @@ -4523,6 +4560,84 @@ return picked_ref_frames_mask; } +#if CONFIG_EXT_RECUR_PARTITIONS +static INLINE int is_mode_intra(PREDICTION_MODE mode) { + return mode < INTRA_MODE_END; +} + +// Reuse the prediction mode in cache. +// Returns 0 if no pruning is done, 1 if we are skipping the current mod +// completely, 2 if we skip compound only, but still try single motion modes +static INLINE int skip_inter_mode_with_cached_mode( + const MACROBLOCK *x, PREDICTION_MODE mode, + const MV_REFERENCE_FRAME *ref_frame) { + const MB_MODE_INFO *cached_mi = x->inter_mode_cache; + + // If there is no cache, then no pruning is possible. + if (!cached_mi) { + return 0; + } + + const PREDICTION_MODE cached_mode = cached_mi->mode; + const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame; + const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME; + + // If the cached mode is intra, then we just need to match the mode. + if (should_reuse_mode(x, REUSE_INTRA_MODE_IN_INTERFRAME_FLAG) && + is_mode_intra(cached_mode) && mode != cached_mode) { + return 1; + } + + // Returns 0 here if we are not reusing inter_modes + if (!should_reuse_mode(x, REUSE_INTER_MODE_IN_INTERFRAME_FLAG) || + !cached_mi) { + return 0; + } + + // If the cached mode is single inter mode, then we match the mode and + // reference frame. + if (cached_mode_is_single) { + if (mode != cached_mode || ref_frame[0] != cached_frame[0]) { + return 1; + } + } else { + // If the cached mode is compound, then we need to consider several cases. + const int mode_is_single = ref_frame[1] <= INTRA_FRAME; + if (mode_is_single) { + // If the mode is single, we know the modes can't match. But we might + // still want to search it if compound mode depends on the current mode. + int skip_motion_mode_only = 0; +#if CONFIG_NEW_INTER_MODES + if (cached_mode == NEW_NEARMV) { +#else + if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) { +#endif + skip_motion_mode_only = (ref_frame[0] == cached_frame[0]); +#if CONFIG_NEW_INTER_MODES + } else if (cached_mode == NEAR_NEWMV) { +#else + } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) { +#endif + skip_motion_mode_only = (ref_frame[0] == cached_frame[1]); + } else if (cached_mode == NEW_NEWMV) { + skip_motion_mode_only = (ref_frame[0] == cached_frame[0] || + ref_frame[0] == cached_frame[1]); + } + + return 1 + skip_motion_mode_only; + } else { + // If both modes are compound, then everything must match. + if (mode != cached_mode || ref_frame[0] != cached_frame[0] || + ref_frame[1] != cached_frame[1]) { + return 1; + } + } + } + + return 0; +} +#endif // CONFIG_EXT_RECUR_PARTITIONS + // Case 1: return 0, means don't skip this mode // Case 2: return 1, means skip this mode completely // Case 3: return 2, means skip compound only, but still try single motion modes @@ -4546,6 +4661,13 @@ if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) { return 1; } +#if CONFIG_EXT_RECUR_PARTITIONS + const int cached_skip_ret = + skip_inter_mode_with_cached_mode(x, mode, ref_frame); + if (cached_skip_ret > 0) { + return cached_skip_ret; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0]; // If no valid mode has been found so far in PARTITION_NONE when finding a @@ -4576,6 +4698,18 @@ } } } +#if CONFIG_EXT_RECUR_PARTITIONS + // If we are reusing the prediction from cache, and the current frame is + // required by the cache, then we cannot prune it. + if (should_reuse_mode(x, REUSE_INTER_MODE_IN_INTERFRAME_FLAG) && + is_ref_frame_used_in_cache(ref_type, x->inter_mode_cache)) { + skip_ref = 0; + // If the cache only needs the current reference type for compound + // prediction, then we can skip motion mode search. + skip_motion_mode = (ref_type <= ALTREF_FRAME && + x->inter_mode_cache->ref_frame[1] > INTRA_FRAME); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS if (skip_ref) return 1; } @@ -5899,17 +6033,26 @@ // Only try palette mode when the best mode so far is an intra mode. #if CONFIG_SDP - const int try_palette = - cpi->oxcf.tool_cfg.enable_palette && - av1_allow_palette(features->allow_screen_content_tools, - mbmi->sb_type[PLANE_TYPE_Y]) && - !is_inter_mode(search_state.best_mbmode.mode); + int try_palette = cpi->oxcf.tool_cfg.enable_palette && + av1_allow_palette(features->allow_screen_content_tools, + mbmi->sb_type[PLANE_TYPE_Y]) && + !is_inter_mode(search_state.best_mbmode.mode) && + rd_cost->rate < INT_MAX; #else - const int try_palette = + int try_palette = cpi->oxcf.tool_cfg.enable_palette && av1_allow_palette(features->allow_screen_content_tools, mbmi->sb_type) && - !is_inter_mode(search_state.best_mbmode.mode); + !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate < INT_MAX; #endif +#if CONFIG_EXT_RECUR_PARTITIONS + const MB_MODE_INFO *cached_mode = x->inter_mode_cache; + if (should_reuse_mode(x, REUSE_INTRA_MODE_IN_INTERFRAME_FLAG) && + cached_mode && + !(cached_mode->mode == DC_PRED && + cached_mode->palette_mode_info.palette_size[0] > 0)) { + try_palette = 0; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS RD_STATS this_rd_cost; int this_skippable = 0; if (try_palette) { @@ -6110,7 +6253,8 @@ mbmi->motion_mode = SIMPLE_TRANSLATION; av1_count_overlappable_neighbors(cm, xd); - if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) { + if (is_motion_variation_allowed_bsize(bsize, mi_row, mi_col) && + !has_second_ref(mbmi)) { int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref); // Select the samples according to motion vector difference
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c index 288ab4c..4514c5e 100644 --- a/av1/encoder/reconinter_enc.c +++ b/av1/encoder/reconinter_enc.c
@@ -43,32 +43,54 @@ (void)mc_buf; const struct scale_factors *sf = inter_pred_params->scale_factors; - struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf; - int ssx = inter_pred_params->subsampling_x; - int ssy = inter_pred_params->subsampling_y; - int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS; - orig_pos_y += src_mv->row * (1 << (1 - ssy)); - int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS; - orig_pos_x += src_mv->col * (1 << (1 - ssx)); - int pos_y = sf->scale_value_y(orig_pos_y, sf); - int pos_x = sf->scale_value_x(orig_pos_x, sf); - pos_x += SCALE_EXTRA_OFF; - pos_y += SCALE_EXTRA_OFF; +#if CONFIG_EXT_RECUR_PARTITIONS + const int is_scaled = av1_is_scaled(sf); + if (is_scaled || !xd) { +#endif // CONFIG_EXT_RECUR_PARTITIONS + int ssx = inter_pred_params->subsampling_x; + int ssy = inter_pred_params->subsampling_y; + int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + orig_pos_y += src_mv->row * (1 << (1 - ssy)); + int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS; + orig_pos_x += src_mv->col * (1 << (1 - ssx)); + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; - const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy); - const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx); - const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; - const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; - pos_y = clamp(pos_y, top, bottom); - pos_x = clamp(pos_x, left, right); + const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy); + const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); - subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK; - subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; - subpel_params->xs = sf->x_step_q4; - subpel_params->ys = sf->y_step_q4; - *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + - (pos_x >> SCALE_SUBPEL_BITS); + subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; + subpel_params->xs = sf->x_step_q4; + subpel_params->ys = sf->y_step_q4; + *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + + (pos_x >> SCALE_SUBPEL_BITS); +#if CONFIG_EXT_RECUR_PARTITIONS + } else { + int pos_x = inter_pred_params->pix_col << SUBPEL_BITS; + int pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + const int bw = inter_pred_params->block_width; + const int bh = inter_pred_params->block_height; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, src_mv, bw, bh, inter_pred_params->subsampling_x, + inter_pred_params->subsampling_y); + subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS; + subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; + subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; + pos_x += mv_q4.col; + pos_y += mv_q4.row; + *pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride + + (pos_x >> SUBPEL_BITS); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS *src_stride = pre_buf->stride; } @@ -147,21 +169,15 @@ int mi_col_offset, MB_MODE_INFO *ref_mbmi, struct build_prediction_ctxt *ctxt, const int num_planes) { -#if CONFIG_SDP - const BLOCK_SIZE ref_bsize = - AOMMAX(BLOCK_8X8, ref_mbmi->sb_type[PLANE_TYPE_Y]); -#else - const BLOCK_SIZE ref_bsize = AOMMAX(BLOCK_8X8, ref_mbmi->sb_type); -#endif const int ref_mi_row = xd->mi_row + mi_row_offset; const int ref_mi_col = xd->mi_col + mi_col_offset; for (int plane = 0; plane < num_planes; ++plane) { struct macroblockd_plane *const pd = &xd->plane[plane]; - setup_pred_plane(&pd->dst, ref_bsize, ctxt->tmp_buf[plane], - ctxt->tmp_width[plane], ctxt->tmp_height[plane], - ctxt->tmp_stride[plane], mi_row_offset, mi_col_offset, - NULL, pd->subsampling_x, pd->subsampling_y); + setup_pred_plane(&pd->dst, ctxt->tmp_buf[plane], ctxt->tmp_width[plane], + ctxt->tmp_height[plane], ctxt->tmp_stride[plane], + mi_row_offset, mi_col_offset, NULL, pd->subsampling_x, + pd->subsampling_y, NULL); } const MV_REFERENCE_FRAME frame = ref_mbmi->ref_frame[0]; @@ -176,7 +192,7 @@ "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, 0, &ref_buf->buf, ref_mi_row, ref_mi_col, sf, - num_planes); + num_planes, NULL); } static INLINE void build_obmc_prediction(MACROBLOCKD *xd, int rel_mi_row, @@ -291,13 +307,8 @@ dst_stride1); av1_build_prediction_by_left_preds(cm, xd, dst_buf2, dst_width2, dst_height2, dst_stride2); -#if CONFIG_SDP - av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type[PLANE_TYPE_Y], - &cm->cur_frame->buf, mi_row, mi_col, 0, num_planes); -#else - av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, &cm->cur_frame->buf, - mi_row, mi_col, 0, num_planes); -#endif + av1_setup_dst_planes(xd->plane, &cm->cur_frame->buf, mi_row, mi_col, 0, + num_planes, &xd->mi[0]->chroma_ref_info); av1_build_obmc_inter_prediction(cm, xd, dst_buf1, dst_stride1, dst_buf2, dst_stride2); } @@ -305,7 +316,6 @@ void av1_build_inter_predictors_for_planes_single_buf( MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) { - assert(bsize < BLOCK_SIZES_ALL); const MB_MODE_INFO *mi = xd->mi[0]; const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; @@ -318,8 +328,18 @@ for (int plane = plane_from; plane <= plane_to; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, mi, plane, pd->subsampling_x, pd->subsampling_y); +#if CONFIG_SDP + assert(plane_bsize == + get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y)); +#endif // CONFIG_SDP + (void)bsize; +#else const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; @@ -443,10 +463,21 @@ uint8_t *ext_dst1[3], int ext_dst_stride1[3]) { int plane; - assert(bsize < BLOCK_SIZES_ALL); for (plane = plane_from; plane <= plane_to; ++plane) { +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, xd->mi[0], plane, xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); +#if CONFIG_SDP + assert(plane_bsize == get_plane_block_size(bsize, + xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y)); +#endif // CONFIG_SDP + (void)bsize; +#else const BLOCK_SIZE plane_bsize = get_plane_block_size( bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; build_wedge_inter_predictor_from_buf(
diff --git a/av1/encoder/reconinter_enc.h b/av1/encoder/reconinter_enc.h index fdc1f31..9347523 100644 --- a/av1/encoder/reconinter_enc.h +++ b/av1/encoder/reconinter_enc.h
@@ -54,6 +54,7 @@ void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd); +// TODO(any): Refactor bsize out of the function signature void av1_build_inter_predictors_for_planes_single_buf( MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]);
diff --git a/av1/encoder/segmentation.c b/av1/encoder/segmentation.c index a240b59..66b206c 100644 --- a/av1/encoder/segmentation.c +++ b/av1/encoder/segmentation.c
@@ -53,8 +53,9 @@ if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; xd->mi = mi; + assert(xd->mi && xd->mi[0]); set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, mi_params->mi_rows, - mi_params->mi_cols); + mi_params->mi_cols, &xd->mi[0]->chroma_ref_info); // Count the number of hits on each segment with no prediction const int segment_id = xd->mi[0]->segment_id; @@ -124,6 +125,19 @@ CSEGS(hbs, bs, 0, 0); CSEGS(hbs, bs, 0, hbs); break; +#if CONFIG_EXT_RECUR_PARTITIONS + case PARTITION_HORZ_3: + CSEGS(bs, qbs, 0, 0); + CSEGS(bs, hbs, qbs, 0); + if (mi_row + 3 * qbs < mi_params->mi_rows) CSEGS(bs, qbs, 3 * qbs, 0); + break; + + case PARTITION_VERT_3: + CSEGS(qbs, bs, 0, 0); + CSEGS(hbs, bs, 0, qbs); + if (mi_col + 3 * qbs < mi_params->mi_cols) CSEGS(qbs, bs, 0, 3 * qbs); + break; +#else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_HORZ_A: CSEGS(hbs, hbs, 0, 0); CSEGS(hbs, hbs, 0, hbs); @@ -150,14 +164,13 @@ CSEGS(bs, qbs, 2 * qbs, 0); if (mi_row + 3 * qbs < mi_params->mi_rows) CSEGS(bs, qbs, 3 * qbs, 0); break; - case PARTITION_VERT_4: CSEGS(qbs, bs, 0, 0); CSEGS(qbs, bs, 0, qbs); CSEGS(qbs, bs, 0, 2 * qbs); if (mi_col + 3 * qbs < mi_params->mi_cols) CSEGS(qbs, bs, 0, 3 * qbs); break; - +#endif // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_SPLIT: { const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); int n;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index cc42808..c47861d 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -153,7 +153,11 @@ else sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED; } else { +#if CONFIG_EXT_RECUR_PARTITIONS + sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; +#else // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; +#endif // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED; } @@ -181,9 +185,17 @@ if (is_720p_or_larger) { sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; } else if (is_480p_or_larger) { +#if CONFIG_EXT_RECUR_PARTITIONS + sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; +#else // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; +#endif // CONFIG_EXT_RECUR_PARTITIONS } else { +#if CONFIG_EXT_RECUR_PARTITIONS + sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; +#else // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; +#endif // CONFIG_EXT_RECUR_PARTITIONS } if (!is_720p_or_larger) { @@ -197,6 +209,9 @@ } if (speed >= 2) { +#if CONFIG_EXT_RECUR_PARTITIONS + sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; +#else // CONFIG_EXT_RECUR_PARTITIONS if (is_720p_or_larger) { sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; } else if (is_480p_or_larger) { @@ -204,6 +219,7 @@ } else { sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; } +#endif // CONFIG_EXT_RECUR_PARTITIONS if (is_720p_or_larger) { sf->part_sf.partition_search_breakout_dist_thr = (1 << 24); @@ -309,12 +325,25 @@ sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3; sf->part_sf.less_rectangular_check_level = 1; +#if CONFIG_EXT_RECUR_PARTITIONS + sf->part_sf.enable_fast_erp = 0; + sf->part_sf.ml_prune_4_partition = 0; + sf->part_sf.ml_prune_ab_partition = 0; + + sf->part_sf.prune_part_3_with_part_none = 1; +#else // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.ml_prune_4_partition = 1; sf->part_sf.ml_prune_ab_partition = 1; +#endif // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.ml_prune_rect_partition = 1; sf->part_sf.prune_ext_partition_types_search_level = 1; sf->part_sf.simple_motion_search_prune_rect = 1; +#if CONFIG_EXT_RECUR_PARTITIONS + sf->inter_sf.reuse_erp_mode_flag = + (REUSE_PARTITION_MODE_FLAG | REUSE_INTER_MODE_IN_INTERFRAME_FLAG | + REUSE_INTRA_MODE_IN_INTERFRAME_FLAG); +#endif // CONFIG_EXT_RECUR_PARTITIONS sf->inter_sf.disable_wedge_search_var_thresh = 0; // TODO(debargha): Test, tweak and turn on either 1 or 2 sf->inter_sf.inter_mode_rd_model_estimation = 1; @@ -362,7 +391,11 @@ sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2; sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1; +#if CONFIG_EXT_RECUR_PARTITIONS + sf->part_sf.intra_cnn_split = 0; +#else // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.intra_cnn_split = 1; +#endif // CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.simple_motion_search_early_term_none = 1; // TODO(Venkat): Clean-up frame type dependency for // simple_motion_search_split in partition search function and set the @@ -705,6 +738,10 @@ part_sf->prune_4_partition_using_split_info = 0; part_sf->prune_ab_partition_using_split_info = 0; part_sf->early_term_after_none_split = 0; +#if CONFIG_EXT_RECUR_PARTITIONS + part_sf->enable_fast_erp = 0; + part_sf->prune_part_3_with_part_none = 1; +#endif // CONFIG_EXT_RECUR_PARTITIONS } static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) { @@ -771,6 +808,9 @@ inter_sf->txfm_rd_gate_level = 0; inter_sf->prune_inter_modes_if_skippable = 0; inter_sf->disable_masked_comp = 0; +#if CONFIG_EXT_RECUR_PARTITIONS + inter_sf->reuse_erp_mode_flag = 0; +#endif // CONFIG_EXT_RECUR_PARTITIONS } static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) { @@ -897,7 +937,9 @@ part_sf->ml_prune_ab_partition = 0; part_sf->ml_prune_rect_partition = 0; part_sf->ml_early_term_after_part_split_level = 0; +#if !CONFIG_EXT_RECUR_PARTITIONS part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE; +#endif // !CONFIG_EXT_RECUR_PARTITIONS part_sf->intra_cnn_split = 0; part_sf->simple_motion_search_split = 0; part_sf->simple_motion_search_prune_rect = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h index d52d2fc..eb7db43 100644 --- a/av1/encoder/speed_features.h +++ b/av1/encoder/speed_features.h
@@ -310,6 +310,25 @@ } UENUM1BYTE(SUPERRES_AUTO_SEARCH_TYPE); /*!\endcond */ +#if CONFIG_EXT_RECUR_PARTITIONS +/*! \brief Used with \ref MACROBLOCK::reuse_inter_mode_cache_type to determine + * whether partition mode is reused. */ +#define REUSE_PARTITION_MODE_FLAG (1 << 0) + +/*! \brief Used with \ref MACROBLOCK::reuse_inter_mode_cache_type to determine + * whether the intra prediction_mode is reused. */ +#define REUSE_INTRA_MODE_IN_INTERFRAME_FLAG (1 << 1) + +/*! \brief Used with \ref MACROBLOCK::reuse_inter_mode_cache_type to determine + * whether the inter prediction_mode and ref frame are reused. */ +#define REUSE_INTER_MODE_IN_INTERFRAME_FLAG (1 << 2) + +/*! \brief Used with \ref MACROBLOCK::reuse_inter_mode_cache_type to signal + * reuse of inter and intra prediction_modes, as well as ref frame. */ +#define REUSE_INTERFRAME_FLAG \ + (REUSE_INTRA_MODE_IN_INTERFRAME_FLAG | REUSE_INTER_MODE_IN_INTERFRAME_FLAG) +#endif // CONFIG_EXT_RECUR_PARTITIONS + /*! * \brief Sequence/frame level speed vs quality features */ @@ -508,6 +527,13 @@ // Terminate partition search for child partition, // when NONE and SPLIT partition rd_costs are INT64_MAX. int early_term_after_none_split; + +#if CONFIG_EXT_RECUR_PARTITIONS + int enable_fast_erp; + + // Prunes PARTITION_3 if PARTITION_NONE is used instead of PARTITION_HORZ|VERT + int prune_part_3_with_part_none; +#endif // CONFIG_EXT_RECUR_PARTITIONS } PARTITION_SPEED_FEATURES; typedef struct MV_SPEED_FEATURES { @@ -758,6 +784,13 @@ // Enable/disable masked compound. int disable_masked_comp; + +#if CONFIG_EXT_RECUR_PARTITIONS + // Under ERP, determines whether to reuse partition mode and prediction mode + // if a block with the same (mi_row, mi_col, bsize) is visited more than one + // by the encoder. + int reuse_erp_mode_flag; +#endif // CONFIG_EXT_RECUR_PARTITIONS } INTER_MODE_SPEED_FEATURES; typedef struct INTERP_FILTER_SPEED_FEATURES {
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c index 1613bf8..de7a189 100644 --- a/av1/encoder/tokenize.c +++ b/av1/encoder/tokenize.c
@@ -141,7 +141,14 @@ const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; -#if CONFIG_SDP +#if CONFIG_SDP && CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize_base = get_bsize_base(xd, mbmi, plane); + const TX_SIZE plane_tx_size = + plane ? av1_get_max_uv_txsize(bsize_base, pd->subsampling_x, + pd->subsampling_y) + : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, + blk_col)]; +#elif CONFIG_SDP const TX_SIZE plane_tx_size = plane ? av1_get_max_uv_txsize(mbmi->sb_type[xd->tree_type == CHROMA_PART], pd->subsampling_x, pd->subsampling_y) @@ -149,24 +156,27 @@ blk_col)]; #else const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y) + plane ? av1_get_max_uv_txsize(mbmi->chroma_ref_info.bsize_base, + pd->subsampling_x, pd->subsampling_y) : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, blk_col)]; #endif if (tx_size == plane_tx_size || plane) { -#if CONFIG_SDP - plane_bsize = - get_plane_block_size(mbmi->sb_type[xd->tree_type == CHROMA_PART], - pd->subsampling_x, pd->subsampling_y); +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + plane_bsize = get_mb_plane_block_size(xd, mbmi, plane, pd->subsampling_x, + pd->subsampling_y); +#if !CONFIG_EXT_RECUR_PARTITIONS + assert(plane_bsize == + get_plane_block_size(mbmi->sb_type[xd->tree_type == CHROMA_PART], + pd->subsampling_x, pd->subsampling_y)); +#endif // !CONFIG_EXT_RECUR_PARTITIONS #else plane_bsize = get_plane_block_size(mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); -#endif +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP av1_update_and_record_txb_context(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); - } else { #if CONFIG_NEW_TX_PARTITION TX_SIZE sub_txs[MAX_TX_PARTITIONS] = { 0 }; @@ -240,8 +250,12 @@ #if CONFIG_SDP if (mbmi->skip_txfm[xd->tree_type == CHROMA_PART]) { #else + assert(mbmi->sb_type < BLOCK_SIZES_ALL); if (mbmi->skip_txfm) { #endif +#if CONFIG_SDP + assert(bsize == mbmi->sb_type[av1_get_sdp_idx(xd->tree_type)]); +#endif // CONFIG_SDP av1_reset_entropy_context(xd, bsize, num_planes); return; } @@ -256,7 +270,20 @@ const struct macroblockd_plane *const pd = &xd->plane[plane]; const int ss_x = pd->subsampling_x; const int ss_y = pd->subsampling_y; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE plane_bsize = + get_mb_plane_block_size(xd, mbmi, plane, ss_x, ss_y); +#if CONFIG_SDP + const BLOCK_SIZE bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base : bsize; + assert(plane_bsize == get_plane_block_size(bsize_base, ss_x, ss_y)); + (void)bsize_base; +#endif // CONFIG_SDP +#else + const BLOCK_SIZE bsize_base = + plane ? mbmi->chroma_ref_info.bsize_base : bsize; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize_base, ss_x, ss_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP assert(plane_bsize < BLOCK_SIZES_ALL); const int mi_width = mi_size_wide[plane_bsize]; const int mi_height = mi_size_high[plane_bsize];
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c index 6a2e5cc..3928c62 100644 --- a/av1/encoder/tpl_model.c +++ b/av1/encoder/tpl_model.c
@@ -275,9 +275,9 @@ set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, mi_row, mi_col); set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width, - cm->mi_params.mi_rows, cm->mi_params.mi_cols); - set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], - av1_num_planes(cm)); + cm->mi_params.mi_rows, cm->mi_params.mi_cols, NULL); + set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], av1_num_planes(cm), + NULL); #if CONFIG_SDP xd->mi[0]->sb_type[xd->tree_type == CHROMA_PART] = bsize; #else @@ -783,6 +783,12 @@ tpl_reset_src_ref_frames(tpl_data); av1_tile_init(&xd->tile, cm, 0, 0); + // TODO(any): The tiles are not being set correctly by av1_tile_init above as + // it always assumes the first tile is used. We set the tile size here as a + // hack. + xd->tile.mi_row_end = cm->mi_params.mi_rows; + xd->tile.mi_col_end = cm->mi_params.mi_cols; + // Setup scaling factor av1_setup_scale_factors_for_frame( &tpl_data->sf, this_frame->y_crop_width, this_frame->y_crop_height,
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c index 9b7ae7f..f109006 100644 --- a/av1/encoder/tx_search.c +++ b/av1/encoder/tx_search.c
@@ -9,6 +9,7 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +#include "av1/common/blockd.h" #include "av1/common/cfl.h" #include "av1/common/reconintra.h" #include "av1/encoder/block.h" @@ -907,8 +908,13 @@ for (int plane = 0; plane < num_planes; ++plane) { const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; +#if CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP + const BLOCK_SIZE bs = get_mb_plane_block_size( + xd, mbmi, plane, pd->subsampling_x, pd->subsampling_y); +#else const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_EXT_RECUR_PARTITIONS || CONFIG_SDP unsigned int sse; if (x->skip_chroma_rd && plane) continue; @@ -1337,7 +1343,12 @@ tx_size_wide[tx_size] == tx_size_high[tx_size]); #else assert(cpi->sf.tx_sf.use_intra_txb_hash && +#if CONFIG_SDP + frame_is_intra_only(&cpi->common) && + !is_inter_block(xd->mi[0], xd->tree_type) && +#else frame_is_intra_only(&cpi->common) && !is_inter_block(xd->mi[0]) && +#endif plane == 0 && tx_size_wide[tx_size] == tx_size_high[tx_size]); #endif const uint32_t intra_hash = @@ -2421,6 +2432,11 @@ // Therefore transform domain distortion is not valid for these // transform sizes. (txsize_sqr_up_map[tx_size] != TX_64X64) && +#if CONFIG_IST + // Use pixel domain distortion for IST + // TODO(any): Make IST compatible with tx domain distortion + !cm->seq_params.enable_ist && +#endif // Use pixel domain distortion for DC only blocks !dc_only_blk; // Flag to indicate if an extra calculation of distortion in the pixel domain @@ -3482,7 +3498,7 @@ if (plane == AOM_PLANE_Y && xd->cfl.store_y) { #endif assert(!is_inter || plane_bsize < BLOCK_8X8); - cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); + cfl_store_tx(xd, blk_row, blk_col, tx_size); } #if CONFIG_RD_DEBUG @@ -4086,7 +4102,7 @@ } int av1_txfm_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats, - BLOCK_SIZE bsize, int64_t ref_best_rd) { + int64_t ref_best_rd) { av1_init_rd_stats(rd_stats); if (ref_best_rd < 0) return 0; if (!x->e_mbd.is_chroma_ref) return 1; @@ -4100,8 +4116,13 @@ const int is_inter = is_inter_block(mbmi); #endif int64_t this_rd = 0, skip_txfm_rd = 0; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); +#if CONFIG_SDP || CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE plane_bsize = get_mb_plane_block_size( + xd, mbmi, AOM_PLANE_U, pd->subsampling_x, pd->subsampling_y); +#else + const BLOCK_SIZE plane_bsize = get_plane_block_size( + mbmi->chroma_ref_info.bsize_base, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_SDP || CONFIG_EXT_RECUR_PARTITIONS if (is_inter) { for (int plane = 1; plane < MAX_MB_PLANE; ++plane) @@ -4273,7 +4294,7 @@ AOMMIN(non_skip_txfm_rdcosty, skip_txfm_rdcosty)); } const int is_cost_valid_uv = - av1_txfm_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_chroma_rd); + av1_txfm_uvrd(cpi, x, rd_stats_uv, ref_best_chroma_rd); if (!is_cost_valid_uv) return 0; av1_merge_rd_stats(rd_stats, rd_stats_uv); }
diff --git a/av1/encoder/tx_search.h b/av1/encoder/tx_search.h index 5a5d259..f5d145f 100644 --- a/av1/encoder/tx_search.h +++ b/av1/encoder/tx_search.h
@@ -213,13 +213,12 @@ * \param[in] x Pointer to structure holding the data for the current encoding macroblock * \param[in] rd_stats Pointer to struct to keep track of the RD stats - * \param[in] bsize Current macroblock size * \param[in] ref_best_rd Best RD cost seen for this block so far * \return An integer value is returned. 0: early termination triggered, no valid rd cost available; 1: rd cost values are valid. */ int av1_txfm_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats, - BLOCK_SIZE bsize, int64_t ref_best_rd); + int64_t ref_best_rd); /*!\brief Transform type search with fixed transform size. *
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index ddab3cc..3399929 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -139,7 +139,8 @@ "AV2 experiment flag to remove dist_wtd_comp tool.") set_aom_config_var(CONFIG_REMOVE_DUAL_FILTER 1 "AV2 experiment flag to remove dual filter.") - +set_aom_config_var(CONFIG_EXT_RECUR_PARTITIONS 0 NUMBER + "AV2 Fully recursive partitions experiment flag") set_aom_config_var(CONFIG_SDP 1 NUMBER "AV2 Semi-Decoupled Partitioning.") set_aom_config_var(CONFIG_EXTQUANT 1 "AV2 extended quantization experiment flag")
diff --git a/test/intrabc_test.cc b/test/intrabc_test.cc index b57eb6f..2081f8b 100644 --- a/test/intrabc_test.cc +++ b/test/intrabc_test.cc
@@ -159,9 +159,7 @@ for (const DvTestCase &dv_case : kDvCases) { const int mi_row = xd.tile.mi_row_start + dv_case.mi_row_offset; const int mi_col = xd.tile.mi_col_start + dv_case.mi_col_offset; - xd.is_chroma_ref = is_chroma_reference(mi_row, mi_col, dv_case.bsize, - xd.plane[1].subsampling_x, - xd.plane[1].subsampling_y); + xd.is_chroma_ref = 1; EXPECT_EQ(static_cast<int>(dv_case.valid), av1_is_dv_valid(dv_case.dv, &cm, &xd, mi_row, mi_col, dv_case.bsize, MAX_MIB_SIZE_LOG2));
diff --git a/test/sad_test.cc b/test/sad_test.cc index 963098e..c734789 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc
@@ -2077,8 +2077,8 @@ make_tuple(32, 32, &aom_sad32x32x4d_sse2, -1), make_tuple(32, 16, &aom_sad32x16x4d_sse2, -1), make_tuple(16, 32, &aom_sad16x32x4d_sse2, -1), - make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1), make_tuple(16, 8, &aom_sad16x8x4d_sse2, -1), + make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1), make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1), make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1), make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 2fb783d..56a3686 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc
@@ -624,15 +624,15 @@ aom_dc_top_predictor_16x32_sse2, aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2, aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL) +INTRA_PRED_TEST(SSE2_5, TX_16X4, aom_dc_predictor_16x4_sse2, + aom_dc_left_predictor_16x4_sse2, aom_dc_top_predictor_16x4_sse2, + aom_dc_128_predictor_16x4_sse2, aom_v_predictor_16x4_sse2, + aom_h_predictor_16x4_sse2, NULL, NULL, NULL, NULL) INTRA_PRED_TEST(SSE2_4, TX_16X64, aom_dc_predictor_16x64_sse2, aom_dc_left_predictor_16x64_sse2, aom_dc_top_predictor_16x64_sse2, aom_dc_128_predictor_16x64_sse2, aom_v_predictor_16x64_sse2, aom_h_predictor_16x64_sse2, NULL, NULL, NULL, NULL) -INTRA_PRED_TEST(SSE2_5, TX_16X4, aom_dc_predictor_16x4_sse2, - aom_dc_left_predictor_16x4_sse2, aom_dc_top_predictor_16x4_sse2, - aom_dc_128_predictor_16x4_sse2, aom_v_predictor_16x4_sse2, - aom_h_predictor_16x4_sse2, NULL, NULL, NULL, NULL) #endif // HAVE_SSE2 #if HAVE_SSSE3
diff --git a/test/variance_test.cc b/test/variance_test.cc index e566f8f..3cc76cd 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc
@@ -2583,8 +2583,8 @@ SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0), SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0), SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0), - SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0), - SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2, + SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2, 0), + SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0))); #endif // HAVE_AVX2
diff --git a/tools/aom_entropy_optimizer.c b/tools/aom_entropy_optimizer.c index ff5ab22..ab279c2 100644 --- a/tools/aom_entropy_optimizer.c +++ b/tools/aom_entropy_optimizer.c
@@ -326,14 +326,34 @@ /* block partition */ cts_each_dim[0] = PARTITION_CONTEXTS; cts_each_dim[1] = EXT_PARTITION_TYPES; +#if CONFIG_EXT_RECUR_PARTITIONS + int part_types_each_ctx[PARTITION_CONTEXTS] = { + 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3 + }; +#else int part_types_each_ctx[PARTITION_CONTEXTS] = { 4, 4, 4, 4, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 8, 8, 8 }; +#endif // CONFIG_EXT_RECUR_PARTITIONS optimize_cdf_table_var_modes_2d( &fc.partition[0][0], probsfile, 2, cts_each_dim, part_types_each_ctx, "static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS]" "[CDF_SIZE(EXT_PARTITION_TYPES)]"); +#if CONFIG_EXT_RECUR_PARTITIONS + cts_each_dim[0] = PARTITION_CONTEXTS_REC; + cts_each_dim[1] = PARTITION_TYPES_REC; + int part_types_each_ctx_rec[PARTITION_CONTEXTS_REC] = { 2, 2, 2, 2, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, + 4, 4, 3, 3, 3, 3 }; + optimize_cdf_table_var_modes_2d( + &fc.partition_rec[0][0], probsfile, 2, cts_each_dim, + part_types_each_ctx_rec, + "static const aom_cdf_prob " + "default_partition_rec_cdf[PARTITION_CONTEXTS_REC]" + "[CDF_SIZE(PARTITION_TYPES_REC)]"); +#endif // CONFIG_EXT_RECUR_PARTITIONS + /* tx type */ cts_each_dim[0] = EXT_TX_SETS_INTRA; cts_each_dim[1] = EXT_TX_SIZES;