Merge "Extend warp_frame functions to average compound predictions" into nextgenv2
diff --git a/aom/aomdx.h b/aom/aomdx.h
index f9d1566..19256fb 100644
--- a/aom/aomdx.h
+++ b/aom/aomdx.h
@@ -37,6 +37,10 @@
extern aom_codec_iface_t *aom_codec_av1_dx(void);
/*!@} - end algorithm interface member group*/
+/** Data structure that stores bit accounting for debug
+ */
+typedef struct Accounting Accounting;
+
/*!\enum aom_dec_control_id
* \brief AOM decoder control functions
*
@@ -103,6 +107,14 @@
*/
AV1_SET_SKIP_LOOP_FILTER,
+ /** control function to retrieve a pointer to the Accounting struct. When
+ * compiled without --enable-accounting, this returns AOM_CODEC_INCAPABLE.
+ * If called before a frame has been decoded, this returns AOM_CODEC_ERROR.
+ * The caller should ensure that AOM_CODEC_OK is returned before attempting
+ * to dereference the Accounting pointer.
+ */
+ AV1_GET_ACCOUNTING,
+
AOM_DECODER_CTRL_ID_MAX,
/** control function to set the range of tile decoding. A value that is
@@ -163,12 +175,14 @@
#define AOM_CTRL_AV1D_GET_FRAME_SIZE
AOM_CTRL_USE_TYPE(AV1_INVERT_TILE_DECODE_ORDER, int)
#define AOM_CTRL_AV1_INVERT_TILE_DECODE_ORDER
+AOM_CTRL_USE_TYPE(AV1_GET_ACCOUNTING, Accounting **)
+#define AOM_CTRL_AV1_GET_ACCOUNTING
AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_ROW, int)
#define AOM_CTRL_AV1_SET_DECODE_TILE_ROW
AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_COL, int)
#define AOM_CTRL_AV1_SET_DECODE_TILE_COL
/*!\endcond */
-/*! @} - end defgroup vp8_decoder */
+/*! @} - end defgroup aom_decoder */
#ifdef __cplusplus
} // extern "C"
diff --git a/aom_dsp/ans.c b/aom_dsp/ans.c
index 30f115c..6d705cd 100644
--- a/aom_dsp/ans.c
+++ b/aom_dsp/ans.c
@@ -39,7 +39,7 @@
const int out_syms = in_syms + 1;
assert(src_pdf != out_pdf);
- out_pdf[0] = node_prob << (10 - 8);
+ out_pdf[0] = node_prob << (RANS_PROB_BITS - ANS_P8_SHIFT);
adjustment -= out_pdf[0];
for (i = 0; i < in_syms; ++i) {
int p = (p1 * src_pdf[i] + round_fact) >> ANS_P8_SHIFT;
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 28e7f12..4735199 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -205,6 +205,7 @@
ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
endif
+DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.h
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/txfm_common_avx2.h
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h
diff --git a/aom_dsp/x86/fwd_txfm_avx2.c b/aom_dsp/x86/fwd_txfm_avx2.c
index 670f864..d381a6e 100644
--- a/aom_dsp/x86/fwd_txfm_avx2.c
+++ b/aom_dsp/x86/fwd_txfm_avx2.c
@@ -17,6 +17,14 @@
#undef FDCT32x32_2D_AVX2
#undef FDCT32x32_HIGH_PRECISION
+// TODO(luoyi): The following macro hides an error. The second parameter type of
+// function,
+// void FDCT32x32_2D_AVX2(const int16_t *, int16_t*, int);
+// is different from the one in,
+// void aom_fdct32x32_avx2(const int16_t *, tran_low_t*, int);
+// In CONFIG_AOM_HIGHBITDEPTH=1 build, the second parameter type should be
+// int32_t.
+// This function should be removed after av1_fht32x32 scaling/rounding fix.
#define FDCT32x32_2D_AVX2 aom_fdct32x32_avx2
#define FDCT32x32_HIGH_PRECISION 1
#include "aom_dsp/x86/fwd_dct32x32_impl_avx2.h" // NOLINT
diff --git a/aom_dsp/x86/fwd_txfm_avx2.h b/aom_dsp/x86/fwd_txfm_avx2.h
new file mode 100644
index 0000000..2c3cfc8
--- /dev/null
+++ b/aom_dsp/x86/fwd_txfm_avx2.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_X86_FWD_TXFM_AVX2_H
+#define AOM_DSP_X86_FWD_TXFM_AVX2_H
+
+#include "./aom_config.h"
+
+static INLINE void storeu_output_avx2(const __m256i *coeff, tran_low_t *out) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i sign = _mm256_cmpgt_epi16(zero, *coeff);
+
+ __m256i x0 = _mm256_unpacklo_epi16(*coeff, sign);
+ __m256i x1 = _mm256_unpackhi_epi16(*coeff, sign);
+
+ __m256i y0 = _mm256_permute2x128_si256(x0, x1, 0x20);
+ __m256i y1 = _mm256_permute2x128_si256(x0, x1, 0x31);
+
+ _mm256_storeu_si256((__m256i *)out, y0);
+ _mm256_storeu_si256((__m256i *)(out + 8), y1);
+#else
+ _mm256_storeu_si256((__m256i *)out, *coeff);
+#endif
+}
+
+#endif // AOM_DSP_X86_FWD_TXFM_AVX2_H
diff --git a/aom_scale/generic/aom_scale.c b/aom_scale/generic/aom_scale.c
index 28604ac..9007459 100644
--- a/aom_scale/generic/aom_scale.c
+++ b/aom_scale/generic/aom_scale.c
@@ -68,7 +68,6 @@
unsigned int source_scale, unsigned int source_length,
unsigned char *dest, int dest_step,
unsigned int dest_scale, unsigned int dest_length) {
- const unsigned int source_pitch = source_step;
const unsigned char *const dest_end = dest + dest_length * dest_step;
(void)source_length;
(void)source_scale;
@@ -81,9 +80,9 @@
dest += dest_step;
while (dest < dest_end) {
- const unsigned int a = 3 * source[-source_pitch];
+ const unsigned int a = 3 * source[-source_step];
const unsigned int b = 10 * source[0];
- const unsigned int c = 3 * source[source_pitch];
+ const unsigned int c = 3 * source[source_step];
*dest = (unsigned char)((8 + a + b + c) >> 4);
source += source_step;
dest += dest_step;
diff --git a/aomenc.c b/aomenc.c
index 497c8d5..742f264 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -40,12 +40,12 @@
#include "aom/aomdx.h"
#endif
-#include "aom/aom_integer.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_ports/aom_timer.h"
-#include "./rate_hist.h"
#include "./aomstats.h"
+#include "./rate_hist.h"
#include "./warnings.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem_ops.h"
#if CONFIG_WEBM_IO
#include "./webmenc.h"
#endif
@@ -1860,6 +1860,9 @@
uint64_t cx_time = 0;
int stream_cnt = 0;
int res = 0;
+#if CONFIG_AOM_HIGHBITDEPTH
+ int profile_updated = 0;
+#endif
memset(&input, 0, sizeof(input));
exec_name = argv_[0];
@@ -1963,6 +1966,39 @@
{ stream->config.cfg.g_input_bit_depth = input.bit_depth; });
}
+#if CONFIG_AOM_HIGHBITDEPTH
+ /* Automatically set the codec bit depth to match the input bit depth.
+ * Upgrade the profile if required. */
+ FOREACH_STREAM({
+ if (stream->config.cfg.g_input_bit_depth >
+ (unsigned int)stream->config.cfg.g_bit_depth) {
+ stream->config.cfg.g_bit_depth = stream->config.cfg.g_input_bit_depth;
+ }
+ if (stream->config.cfg.g_bit_depth > 8) {
+ switch (stream->config.cfg.g_profile) {
+ case 0:
+ stream->config.cfg.g_profile = 2;
+ profile_updated = 1;
+ break;
+ case 1:
+ stream->config.cfg.g_profile = 3;
+ profile_updated = 1;
+ break;
+ default: break;
+ }
+ }
+ if (stream->config.cfg.g_profile > 1) {
+ stream->config.use_16bit_internal = 1;
+ }
+ if (profile_updated) {
+ fprintf(stderr,
+ "Warning: automatically upgrading to profile %d to "
+ "match input format.\n",
+ stream->config.cfg.g_profile);
+ }
+ });
+#endif
+
FOREACH_STREAM(set_stream_dimensions(stream, input.width, input.height));
FOREACH_STREAM(validate_stream_config(stream, &global));
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index fae7d04..bacb23c 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -932,6 +932,7 @@
return flags;
}
+const size_t kMinCompressedSize = 8192;
static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
const aom_image_t *img,
aom_codec_pts_t pts,
@@ -952,14 +953,16 @@
// failure condition, encoder setup is done fully in init() currently.
if (res == AOM_CODEC_OK) {
#if CONFIG_EXT_REFS
- data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img);
+ data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
+ ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img);
#else
// There's no codec control for multiple alt-refs so check the encoder
// instance for its status to determine the compressed data size.
- data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
+ data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
+ ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img) / 8 *
(cpi->multi_arf_allowed ? 8 : 2);
#endif // CONFIG_EXT_REFS
- if (data_sz < 4096) data_sz = 4096;
+ if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize;
if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
ctx->cx_data_sz = data_sz;
free(ctx->cx_data);
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 43cc3a2..2caed90 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -1083,6 +1083,24 @@
return AOM_CODEC_OK;
}
+static aom_codec_err_t ctrl_get_accounting(aom_codec_alg_priv_t *ctx,
+ va_list args) {
+#if !CONFIG_ACCOUNTING
+ (void)ctx;
+ (void)args;
+ return AOM_CODEC_INCAPABLE;
+#else
+ if (ctx->frame_workers) {
+ AVxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ AV1Decoder *pbi = frame_worker_data->pbi;
+ Accounting **acct = va_arg(args, Accounting **);
+ *acct = &pbi->accounting;
+ return AOM_CODEC_OK;
+ }
+ return AOM_CODEC_ERROR;
+#endif
+}
static aom_codec_err_t ctrl_set_decode_tile_row(aom_codec_alg_priv_t *ctx,
va_list args) {
ctx->decode_tile_row = va_arg(args, int);
@@ -1119,6 +1137,7 @@
{ AV1D_GET_DISPLAY_SIZE, ctrl_get_render_size },
{ AV1D_GET_BIT_DEPTH, ctrl_get_bit_depth },
{ AV1D_GET_FRAME_SIZE, ctrl_get_frame_size },
+ { AV1_GET_ACCOUNTING, ctrl_get_accounting },
{ AV1_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image },
{ -1, NULL },
diff --git a/av1/common/accounting.h b/av1/common/accounting.h
index 04be326..1fe1d9a 100644
--- a/av1/common/accounting.h
+++ b/av1/common/accounting.h
@@ -54,14 +54,16 @@
AccountingDictionary dictionary;
} AccountingSymbols;
-typedef struct {
+typedef struct Accounting Accounting;
+
+struct Accounting {
AccountingSymbols syms;
/** Size allocated for symbols (not all may be used). */
int num_syms_allocated;
int16_t hash_dictionary[AOM_ACCOUNTING_HASH_SIZE];
AccountingSymbolContext context;
uint32_t last_tell_frac;
-} Accounting;
+};
void aom_accounting_init(Accounting *accounting);
void aom_accounting_reset(Accounting *accounting);
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index 6332fed..c5eb85d 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c
@@ -95,21 +95,22 @@
TX_SIZE tx_size, int has_eob, int aoff, int loff) {
ENTROPY_CONTEXT *const a = pd->above_context + aoff;
ENTROPY_CONTEXT *const l = pd->left_context + loff;
- const int tx_size_in_blocks = 1 << tx_size;
+ const int txs_wide = tx_size_wide_unit[tx_size];
+ const int txs_high = tx_size_high_unit[tx_size];
// above
if (has_eob && xd->mb_to_right_edge < 0) {
int i;
const int blocks_wide =
pd->n4_w + (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- int above_contexts = tx_size_in_blocks;
+ int above_contexts = txs_wide;
if (above_contexts + aoff > blocks_wide)
above_contexts = blocks_wide - aoff;
for (i = 0; i < above_contexts; ++i) a[i] = has_eob;
- for (i = above_contexts; i < tx_size_in_blocks; ++i) a[i] = 0;
+ for (i = above_contexts; i < txs_wide; ++i) a[i] = 0;
} else {
- memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * txs_wide);
}
// left
@@ -117,13 +118,13 @@
int i;
const int blocks_high =
pd->n4_h + (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- int left_contexts = tx_size_in_blocks;
+ int left_contexts = txs_high;
if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff;
for (i = 0; i < left_contexts; ++i) l[i] = has_eob;
- for (i = left_contexts; i < tx_size_in_blocks; ++i) l[i] = 0;
+ for (i = left_contexts; i < txs_high; ++i) l[i] = 0;
} else {
- memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * txs_high);
}
}
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index bc1970c..8d7c7f8 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -307,6 +307,8 @@
uint16_t n4_w, n4_h;
// log2 of n4_w, n4_h
uint8_t n4_wl, n4_hl;
+ // block size in pixels
+ uint8_t width, height;
#if CONFIG_AOM_QM
const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index f068ee7..88bfb0a 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -41,7 +41,13 @@
0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)
};
-// Width/height lookup tables in units of varios block sizes
+// Width/height lookup tables in units of various block sizes
+static const uint8_t block_size_wide[BLOCK_SIZES] = {
+ 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, IF_EXT_PARTITION(64, 128, 128)
+};
+static const uint8_t block_size_high[BLOCK_SIZES] = {
+ 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64, IF_EXT_PARTITION(128, 64, 128)
+};
static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = {
1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)
};
@@ -442,7 +448,37 @@
#endif // CONFIG_EXT_TX
};
-static const int tx_size_1d[TX_SIZES] = { 4, 8, 16, 32 };
+// Transform block width in pixels
+static const int tx_size_wide[TX_SIZES_ALL] = {
+ 4, 8, 16, 32,
+#if CONFIG_EXT_TX
+ 4, 8, 8, 16, 16, 32,
+#endif
+};
+
+// Transform block height in pixels
+static const int tx_size_high[TX_SIZES_ALL] = {
+ 4, 8, 16, 32,
+#if CONFIG_EXT_TX
+ 8, 4, 16, 8, 32, 16,
+#endif
+};
+
+// Transform block width in unit
+static const int tx_size_wide_unit[TX_SIZES_ALL] = {
+ 1, 2, 4, 8,
+#if CONFIG_EXT_TX
+ 1, 2, 2, 4, 4, 8,
+#endif
+};
+
+// Transform block height in unit
+static const int tx_size_high_unit[TX_SIZES_ALL] = {
+ 1, 2, 4, 8,
+#if CONFIG_EXT_TX
+ 2, 1, 4, 2, 8, 4,
+#endif
+};
static const int tx_size_2d[TX_SIZES_ALL] = {
16, 64, 256, 1024,
@@ -453,8 +489,6 @@
static const uint8_t tx_size_1d_log2[TX_SIZES] = { 2, 3, 4, 5 };
-static const int tx_size_1d_in_unit[TX_SIZES] = { 1, 2, 4, 8 };
-
// TODO(jingning): Temporary table during the construction.
static const int tx_size_1d_in_unit_log2[TX_SIZES] = { 0, 1, 2, 3 };
diff --git a/av1/common/entropymv.c b/av1/common/entropymv.c
index dfe798e..a80165e 100644
--- a/av1/common/entropymv.c
+++ b/av1/common/entropymv.c
@@ -61,8 +61,8 @@
{ { 0 }, { 0 } }, // class0_fp_cdf is computed in av1_init_mv_probs()
{ 0 }, // fp_cdf is computed from fp in av1_init_mv_probs()
#endif
- 160, // class0_hp bit
- 128, // hp
+ 160, // class0_hp bit
+ 128, // hp
},
{
// Horizontal component
@@ -79,8 +79,8 @@
{ { 0 }, { 0 } }, // class0_fp_cdf is computed in av1_init_mv_probs()
{ 0 }, // fp_cdf is computed from fp in av1_init_mv_probs()
#endif
- 160, // class0_hp bit
- 128, // hp
+ 160, // class0_hp bit
+ 128, // hp
} },
};
diff --git a/av1/common/enums.h b/av1/common/enums.h
index b02c814..0a1f7a3 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -58,6 +58,10 @@
#define MAX_TILE_COLS 64
#endif // CONFIG_EXT_TILE
+#if CONFIG_VAR_TX
+#define MAX_VARTX_DEPTH 2
+#endif
+
// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
// 00: Profile 0. 8-bit 4:2:0 only.
// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index b6e73cd..be1cbc1 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -531,6 +531,8 @@
xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y;
xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x;
xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y;
+ xd->plane[i].width = xd->plane[i].n4_w * 4;
+ xd->plane[i].height = xd->plane[i].n4_h * 4;
}
}
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index f808340..ad9b462 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -710,16 +710,16 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, x_step, y_step, ref,
- &mi->mbmi.interp_filter, xs, ys, xd->bd);
+ mi->mbmi.interp_filter, xs, ys, xd->bd);
} else {
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, x_step, y_step, ref,
- &mi->mbmi.interp_filter, xs, ys);
+ mi->mbmi.interp_filter, xs, ys);
}
#else
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, x_step, y_step, ref,
- &mi->mbmi.interp_filter, xs, ys);
+ mi->mbmi.interp_filter, xs, ys);
#endif
}
}
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 96ffb08..cfd283f 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -228,7 +228,7 @@
TX_SIZE txsz, int y, int x, int ss_x) {
const int wl = mi_width_log2_lookup[bsize];
const int w = AOMMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
- const int step = 1 << txsz;
+ const int step = tx_size_wide_unit[txsz];
// TODO(bshacklett, huisu): Currently the RD loop traverses 4X8 blocks in
// inverted N order while in the bitstream the subblocks are stored in Z
@@ -238,41 +238,39 @@
// blocks in inverted N order, and then update this function appropriately.
if (bsize == BLOCK_4X8 && y == 1) return 0;
- if (!right_available) {
- return 0;
- } else {
- // Handle block size 4x8 and 4x4
- if (ss_x == 0 && num_4x4_blocks_wide_lookup[bsize] < 2 && x == 0) return 1;
+ if (!right_available) return 0;
- if (y == 0) {
- const int hl = mi_height_log2_lookup[bsize];
- const uint8_t *order;
- int my_order, tr_order;
+ // Handle block size 4x8 and 4x4
+ if (ss_x == 0 && num_4x4_blocks_wide_lookup[bsize] < 2 && x == 0) return 1;
+
+ if (y == 0) {
+ const int hl = mi_height_log2_lookup[bsize];
+ const uint8_t *order;
+ int my_order, tr_order;
#if CONFIG_EXT_PARTITION_TYPES
- if (partition == PARTITION_VERT_A)
- order = orders_verta[bsize];
- else
+ if (partition == PARTITION_VERT_A)
+ order = orders_verta[bsize];
+ else
#endif // CONFIG_EXT_PARTITION_TYPES
- order = orders[bsize];
+ order = orders[bsize];
- if (x + step < w) return 1;
+ if (x + step < w) return 1;
- mi_row = (mi_row & MAX_MIB_MASK) >> hl;
- mi_col = (mi_col & MAX_MIB_MASK) >> wl;
+ mi_row = (mi_row & MAX_MIB_MASK) >> hl;
+ mi_col = (mi_col & MAX_MIB_MASK) >> wl;
- // If top row of coding unit
- if (mi_row == 0) return 1;
+ // If top row of coding unit
+ if (mi_row == 0) return 1;
- // If rightmost column of coding unit
- if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) return 0;
+ // If rightmost column of coding unit
+ if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) return 0;
- my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0];
- tr_order = order[((mi_row - 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 1];
+ my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0];
+ tr_order = order[((mi_row - 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 1];
- return my_order > tr_order;
- } else {
- return x + step < w;
- }
+ return my_order > tr_order;
+ } else {
+ return x + step < w;
}
}
@@ -285,7 +283,7 @@
const int wl = mi_width_log2_lookup[bsize];
const int hl = mi_height_log2_lookup[bsize];
const int h = 1 << (hl + 1 - ss_y);
- const int step = 1 << txsz;
+ const int step = tx_size_wide_unit[txsz];
const uint8_t *order = orders[bsize];
int my_order, bl_order;
@@ -673,7 +671,7 @@
INTRA_FILTER filter_type) {
const int dx = get_dx(angle);
const int dy = get_dy(angle);
- const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int bs = tx_size_wide[tx_size];
assert(angle > 0 && angle < 270);
if (angle > 0 && angle < 90) {
@@ -1202,7 +1200,7 @@
DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
- const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int bs = tx_size_wide[tx_size];
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
@@ -1363,7 +1361,7 @@
DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
- const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int bs = tx_size_wide[tx_size];
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
@@ -1509,25 +1507,21 @@
}
}
-void av1_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+void av1_predict_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
TX_SIZE tx_size, PREDICTION_MODE mode,
const uint8_t *ref, int ref_stride, uint8_t *dst,
int dst_stride, int col_off, int row_off,
int plane) {
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int txw = num_4x4_blocks_wide_txsize_lookup[tx_size];
- const int txh = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int txw = tx_size_wide_unit[tx_size];
+ const int txh = tx_size_high_unit[tx_size];
const int have_top = row_off || xd->up_available;
const int have_left = col_off || xd->left_available;
const int x = col_off * 4;
const int y = row_off * 4;
- const int bw = pd->subsampling_x ? 1 << bwl_in : AOMMAX(2, 1 << bwl_in);
- const int bh = pd->subsampling_y ? 1 << bhl_in : AOMMAX(2, 1 << bhl_in);
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
- const int wpx = 4 * bw;
- const int hpx = 4 * bh;
const int txwpx = 4 * txw;
const int txhpx = 4 * txh;
// Distance between the right edge of this prediction block to
@@ -1555,8 +1549,8 @@
#if CONFIG_PALETTE
if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
- const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
- const int stride = 4 * (1 << bwl_in);
+ const int bs = tx_size_wide[tx_size];
+ const int stride = wpx;
int r, c;
uint8_t *map = NULL;
#if CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index 7778874..23bad1c 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h
@@ -21,7 +21,7 @@
void av1_init_intra_predictors(void);
-void av1_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+void av1_predict_intra_block(const MACROBLOCKD *xd, int bw, int bh,
TX_SIZE tx_size, PREDICTION_MODE mode,
const uint8_t *ref, int ref_stride, uint8_t *dst,
int dst_stride, int aoff, int loff, int plane);
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 945c65e..1281843 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -4305,7 +4305,7 @@
void av1_update_neighbors(int tx_size, const int16_t *scan,
const int16_t *iscan, int16_t *neighbors) {
- const int tx1d_size = tx_size_1d[tx_size];
+ const int tx1d_size = tx_size_wide[tx_size];
const int tx2d_size = tx_size_2d[tx_size];
int scan_idx;
for (scan_idx = 0; scan_idx < tx2d_size; ++scan_idx) {
@@ -4336,7 +4336,7 @@
void av1_update_sort_order(TX_SIZE tx_size, const uint32_t *non_zero_prob,
int16_t *sort_order) {
uint32_t temp[COEFF_IDX_SIZE];
- const int tx1d_size = tx_size_1d[tx_size];
+ const int tx1d_size = tx_size_wide[tx_size];
const int tx2d_size = tx_size_2d[tx_size];
int sort_idx;
assert(tx2d_size <= COEFF_IDX_SIZE);
@@ -4354,7 +4354,7 @@
int coeff_idx;
int scan_idx;
int sort_idx;
- const int tx1d_size = tx_size_1d[tx_size];
+ const int tx1d_size = tx_size_wide[tx_size];
const int tx2d_size = tx_size_2d[tx_size];
for (coeff_idx = 0; coeff_idx < tx2d_size; ++coeff_idx) {
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 1c453cd..165609a 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -251,34 +251,26 @@
static void inverse_transform_block(MACROBLOCKD *xd, int plane,
const TX_TYPE tx_type,
const TX_SIZE tx_size, uint8_t *dst,
- int stride, int eob) {
+ int stride, int16_t scan_line, int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- if (eob > 0) {
- tran_low_t *const dqcoeff = pd->dqcoeff;
- INV_TXFM_PARAM inv_txfm_param;
- inv_txfm_param.tx_type = tx_type;
- inv_txfm_param.tx_size = tx_size;
- inv_txfm_param.eob = eob;
- inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ INV_TXFM_PARAM inv_txfm_param;
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = eob;
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
#if CONFIG_AOM_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- inv_txfm_param.bd = xd->bd;
- highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
- } else {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
+ } else {
#endif // CONFIG_AOM_HIGHBITDEPTH
- inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
+ inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
#if CONFIG_AOM_HIGHBITDEPTH
- }
-#endif // CONFIG_AOM_HIGHBITDEPTH
-
- // TODO(jingning): This cleans up different reset requests from various
- // experiments, but incurs unnecessary memset size.
- if (eob == 1)
- dqcoeff[0] = 0;
- else
- memset(dqcoeff, 0, tx_size_2d[tx_size] * sizeof(dqcoeff[0]));
}
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
}
static void predict_and_reconstruct_intra_block(AV1_COMMON *cm,
@@ -301,23 +293,26 @@
if (mbmi->sb_type < BLOCK_8X8)
if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
- av1_predict_intra_block(xd, pd->n4_wl, pd->n4_hl, tx_size, mode, dst,
+ av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
pd->dst.stride, dst, pd->dst.stride, col, row, plane);
if (!mbmi->skip) {
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
- const int eob = av1_decode_block_tokens(
- xd, plane, scan_order, col, row, tx_size, tx_type, r, mbmi->segment_id);
- inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
- eob);
+ int16_t max_scan_line = 0;
+ const int eob =
+ av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size,
+ tx_type, &max_scan_line, r, mbmi->segment_id);
+ if (eob)
+ inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
+ max_scan_line, eob);
}
}
#if CONFIG_VAR_TX
static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd,
aom_reader *r, MB_MODE_INFO *const mbmi,
- int plane, BLOCK_SIZE plane_bsize, int block,
+ int plane, BLOCK_SIZE plane_bsize,
int blk_row, int blk_col, TX_SIZE tx_size,
int *eob_total) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -339,15 +334,17 @@
if (tx_size == plane_tx_size) {
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, plane_tx_size);
+ int block_idx = (blk_row << 1) + blk_col;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, plane_tx_size);
const SCAN_ORDER *sc = get_scan(cm, plane_tx_size, tx_type, 1);
+ int16_t max_scan_line = 0;
const int eob =
av1_decode_block_tokens(xd, plane, sc, blk_col, blk_row, plane_tx_size,
- tx_type, r, mbmi->segment_id);
+ tx_type, &max_scan_line, r, mbmi->segment_id);
inverse_transform_block(
xd, plane, tx_type, plane_tx_size,
&pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
- pd->dst.stride, eob);
+ pd->dst.stride, max_scan_line, eob);
*eob_total += eob;
} else {
int bsl = b_width_log2_lookup[bsize];
@@ -359,13 +356,11 @@
for (i = 0; i < 4; ++i) {
const int offsetr = blk_row + ((i >> 1) << bsl);
const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
- decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize,
- block + i * step, offsetr, offsetc, tx_size - 1,
- eob_total);
+ decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, offsetr,
+ offsetc, tx_size - 1, eob_total);
}
}
}
@@ -385,12 +380,14 @@
int block_idx = (row << 1) + col;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
- const int eob = av1_decode_block_tokens(xd, plane, scan_order, col, row,
- tx_size, tx_type, r, segment_id);
-
- inverse_transform_block(xd, plane, tx_type, tx_size,
- &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
- pd->dst.stride, eob);
+ int16_t max_scan_line = 0;
+ const int eob =
+ av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, tx_type,
+ &max_scan_line, r, segment_id);
+ if (eob)
+ inverse_transform_block(xd, plane, tx_type, tx_size,
+ &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+ pd->dst.stride, max_scan_line, eob);
return eob;
}
#endif // !CONFIG_VAR_TX || CONFIG_SUPER_TX
@@ -1167,11 +1164,12 @@
const int bh = 1 << (bhl - 1);
const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
+ MB_MODE_INFO *mbmi;
+
#if CONFIG_ACCOUNTING
aom_accounting_set_context(&pbi->accounting, mi_col, mi_row);
#endif
#if CONFIG_SUPERTX
- MB_MODE_INFO *mbmi;
if (supertx_enabled) {
mbmi = set_mb_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis);
} else {
@@ -1183,8 +1181,8 @@
#endif
av1_read_mode_info(pbi, xd, supertx_enabled, mi_row, mi_col, r, x_mis, y_mis);
#else
- MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis,
- y_mis, bwl, bhl);
+ mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl,
+ bhl);
#if CONFIG_EXT_PARTITION_TYPES
xd->mi[0]->mbmi.partition = partition;
#endif
@@ -1244,8 +1242,8 @@
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
const int num_4x4_w = pd->n4_w;
const int num_4x4_h = pd->n4_h;
- const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
- const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int stepr = tx_size_high_unit[tx_size];
+ const int stepc = tx_size_wide_unit[tx_size];
int row, col;
const int max_blocks_wide =
num_4x4_w + (xd->mb_to_right_edge >= 0
@@ -1327,16 +1325,14 @@
const BLOCK_SIZE plane_bsize =
get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
- const int bw_var_tx = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
- const int bh_var_tx = num_4x4_blocks_high_txsize_lookup[max_tx_size];
- const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
- int block = 0;
+ const int bw_var_tx = tx_size_high_unit[max_tx_size];
+ const int bh_var_tx = tx_size_wide_unit[max_tx_size];
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_rect_tx(mbmi->tx_size)) {
const TX_SIZE tx_size =
plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
- const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
- const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int stepr = tx_size_high_unit[tx_size];
+ const int stepc = tx_size_wide_unit[tx_size];
const int max_blocks_wide =
num_4x4_w +
(xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >>
@@ -1352,21 +1348,18 @@
plane, row, col, tx_size);
} else {
#endif
- for (row = 0; row < num_4x4_h; row += bh_var_tx) {
- for (col = 0; col < num_4x4_w; col += bw_var_tx) {
- decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, block,
- row, col, max_tx_size, &eobtotal);
- block += step;
- }
- }
+ for (row = 0; row < num_4x4_h; row += bh_var_tx)
+ for (col = 0; col < num_4x4_w; col += bw_var_tx)
+ decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, row,
+ col, max_tx_size, &eobtotal);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
}
#endif
#else
const TX_SIZE tx_size =
plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
- const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
- const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int stepr = tx_size_high_unit[tx_size];
+ const int stepc = tx_size_wide_unit[tx_size];
const int max_blocks_wide =
num_4x4_w + (xd->mb_to_right_edge >= 0
? 0
@@ -1859,7 +1852,7 @@
if (bsize == BLOCK_64X64) {
if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain =
- aom_read_literal(r, DERING_REFINEMENT_BITS);
+ aom_read_literal(r, DERING_REFINEMENT_BITS, ACCT_STR);
} else {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain =
0;
@@ -2793,7 +2786,9 @@
pbi->allocated_tiles = n_tiles;
}
#if CONFIG_ACCOUNTING
- aom_accounting_reset(&pbi->accounting);
+ if (pbi->acct_enabled) {
+ aom_accounting_reset(&pbi->accounting);
+ }
#endif
// Load all tile information into tile_data.
for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
@@ -2818,7 +2813,11 @@
&td->bit_reader, pbi->decrypt_cb, pbi->decrypt_state);
#endif
#if CONFIG_ACCOUNTING
- tile_data->bit_reader.accounting = &pbi->accounting;
+ if (pbi->acct_enabled) {
+ tile_data->bit_reader.accounting = &pbi->accounting;
+ } else {
+ tile_data->bit_reader.accounting = NULL;
+ }
#endif
av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
#if CONFIG_PALETTE
@@ -2839,8 +2838,10 @@
const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col;
TileData *const td = pbi->tile_data + tile_cols * row + col;
#if CONFIG_ACCOUNTING
- tile_data->bit_reader.accounting->last_tell_frac =
- aom_reader_tell_frac(&tile_data->bit_reader);
+ if (pbi->acct_enabled) {
+ tile_data->bit_reader.accounting->last_tell_frac =
+ aom_reader_tell_frac(&tile_data->bit_reader);
+ }
#endif
av1_tile_set_col(&tile_info, cm, col);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 07c745d..3993e72 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -276,8 +276,8 @@
#if CONFIG_VAR_TX
static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
MB_MODE_INFO *mbmi, FRAME_COUNTS *counts,
- TX_SIZE tx_size, int blk_row, int blk_col,
- aom_reader *r) {
+ TX_SIZE tx_size, int depth, int blk_row,
+ int blk_col, aom_reader *r) {
int is_split = 0;
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
@@ -294,6 +294,19 @@
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+ if (depth == MAX_VARTX_DEPTH) {
+ int idx, idy;
+ inter_tx_size[0][0] = tx_size;
+ for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
+ inter_tx_size[idy][idx] = tx_size;
+ mbmi->tx_size = tx_size;
+ if (counts) ++counts->txfm_partition[ctx][0];
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ return;
+ }
+
is_split = aom_read(r, cm->fc->txfm_partition_prob[ctx], ACCT_STR);
if (is_split) {
@@ -316,8 +329,8 @@
for (i = 0; i < 4; ++i) {
int offsetr = blk_row + ((i >> 1) << bsl);
int offsetc = blk_col + ((i & 0x01) << bsl);
- read_tx_size_vartx(cm, xd, mbmi, counts, tx_size - 1, offsetr, offsetc,
- r);
+ read_tx_size_vartx(cm, xd, mbmi, counts, tx_size - 1, depth + 1, offsetr,
+ offsetc, r);
}
} else {
int idx, idy;
@@ -1792,12 +1805,11 @@
const int width = num_4x4_blocks_wide_lookup[bsize];
const int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
- int tx_size_cat = inter_tx_size_cat_lookup[bsize];
#if CONFIG_EXT_TX && CONFIG_RECT_TX
int is_rect_tx_allowed = inter_block && is_rect_tx_allowed_bsize(bsize) &&
!xd->lossless[mbmi->segment_id];
int use_rect_tx = 0;
-
+ int tx_size_cat = inter_tx_size_cat_lookup[bsize];
if (is_rect_tx_allowed) {
use_rect_tx = aom_read(r, cm->fc->rect_tx_prob[tx_size_cat], ACCT_STR);
if (xd->counts) {
@@ -1812,16 +1824,11 @@
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
for (idy = 0; idy < height; idy += bs)
for (idx = 0; idx < width; idx += bs)
- read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, idy, idx,
- r);
+ read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size,
+ height != width, idy, idx, r);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
}
#endif
- if (xd->counts) {
- const int ctx = get_tx_size_context(xd);
- ++xd->counts->tx_size[tx_size_cat][ctx]
- [txsize_sqr_up_map[mbmi->tx_size]];
- }
} else {
if (inter_block)
mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index bcc6a1b..c3099ba 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -99,10 +99,13 @@
cm->error.setjmp = 1;
- CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)aom_calloc(1, sizeof(*cm->fc)));
- CHECK_MEM_ERROR(
- cm, cm->frame_contexts,
- (FRAME_CONTEXT *)aom_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
+ CHECK_MEM_ERROR(cm, cm->fc,
+ (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(cm, cm->frame_contexts,
+ (FRAME_CONTEXT *)aom_memalign(
+ 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
+ memset(cm->fc, 0, sizeof(*cm->fc));
+ memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));
pbi->need_resync = 1;
once(initialize_dec);
@@ -131,6 +134,7 @@
av1_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_ACCOUNTING
+ pbi->acct_enabled = 1;
aom_accounting_init(&pbi->accounting);
#endif
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 7575260..fd68d13 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -104,6 +104,7 @@
int dec_tile_row, dec_tile_col;
#endif // CONFIG_EXT_TILE
#if CONFIG_ACCOUNTING
+ int acct_enabled;
Accounting accounting;
#endif
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index f2f74f5..9c01b93 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -61,7 +61,7 @@
dequant_val_type_nuq *dq_val,
#endif // CONFIG_NEW_QUANT
int ctx, const int16_t *scan, const int16_t *nb,
- aom_reader *r)
+ int16_t *max_scan_line, aom_reader *r)
#endif
{
FRAME_COUNTS *counts = xd->counts;
@@ -166,6 +166,9 @@
dqv_val = &dq_val[band][0];
#endif // CONFIG_NEW_QUANT
}
+
+ *max_scan_line = AOMMAX(*max_scan_line, scan[c]);
+
#if CONFIG_RANS
cdf = &coef_cdfs[band][ctx];
token = ONE_TOKEN +
@@ -327,7 +330,8 @@
int av1_decode_block_tokens(MACROBLOCKD *const xd, int plane,
const SCAN_ORDER *sc, int x, int y, TX_SIZE tx_size,
- TX_TYPE tx_type, aom_reader *r, int seg_id) {
+ TX_TYPE tx_type, int16_t *max_scan_line,
+ aom_reader *r, int seg_id) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx =
@@ -339,16 +343,16 @@
#endif // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
- const int eob =
- decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
- ctx, sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
+ const int eob = decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size,
+ tx_type, dequant, ctx, sc->scan, sc->neighbors,
+ &sc->max_scan_line, r, pd->seg_iqmatrix[seg_id]);
#else
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
#if CONFIG_NEW_QUANT
pd->seg_dequant_nuq[seg_id][dq],
#endif // CONFIG_NEW_QUANT
- ctx, sc->scan, sc->neighbors, r);
+ ctx, sc->scan, sc->neighbors, max_scan_line, r);
#endif // CONFIG_AOM_QM
av1_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index 9c08ff9..1eb1e6c 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h
@@ -28,7 +28,7 @@
int av1_decode_block_tokens(MACROBLOCKD *const xd, int plane,
const SCAN_ORDER *sc, int x, int y, TX_SIZE tx_size,
- TX_TYPE tx_type,
+ TX_TYPE tx_type, int16_t *max_scan_line,
#if CONFIG_ANS
struct AnsDecoder *const r,
#else
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 5ae920b..df27fbc 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -70,37 +70,9 @@
};
#endif // CONFIG_EXT_INTER
#if CONFIG_PALETTE
-static const struct av1_token palette_size_encodings[] = {
- { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 62, 6 }, { 63, 6 },
-};
-static const struct av1_token
- palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = {
- { { 0, 1 }, { 1, 1 } }, // 2 colors
- { { 0, 1 }, { 2, 2 }, { 3, 2 } }, // 3 colors
- { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }, // 4 colors
- { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 } }, // 5 colors
- { { 0, 1 },
- { 2, 2 },
- { 6, 3 },
- { 14, 4 },
- { 30, 5 },
- { 31, 5 } }, // 6 colors
- { { 0, 1 },
- { 2, 2 },
- { 6, 3 },
- { 14, 4 },
- { 30, 5 },
- { 62, 6 },
- { 63, 6 } }, // 7 colors
- { { 0, 1 },
- { 2, 2 },
- { 6, 3 },
- { 14, 4 },
- { 30, 5 },
- { 62, 6 },
- { 126, 7 },
- { 127, 7 } }, // 8 colors
- };
+static struct av1_token palette_size_encodings[PALETTE_MAX_SIZE - 1];
+static struct av1_token palette_color_encodings[PALETTE_MAX_SIZE - 1]
+ [PALETTE_MAX_SIZE];
#endif // CONFIG_PALETTE
static const struct av1_token tx_size_encodings[TX_SIZES - 1][TX_SIZES] = {
{ { 0, 1 }, { 1, 1 } }, // Max tx_size is 8X8
@@ -145,8 +117,10 @@
#endif // CONFIG_LOOP_RESTORATION
void av1_encode_token_init(void) {
-#if CONFIG_EXT_TX
+#if CONFIG_EXT_TX || CONFIG_PALETTE
int s;
+#endif // CONFIG_EXT_TX || CONFIG_PALETTE
+#if CONFIG_EXT_TX
for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
av1_tokens_from_tree(ext_tx_inter_encodings[s], av1_ext_tx_inter_tree[s]);
}
@@ -163,6 +137,13 @@
av1_tokens_from_tree(inter_mode_encodings, av1_inter_mode_tree);
#endif
+#if CONFIG_PALETTE
+ av1_tokens_from_tree(palette_size_encodings, av1_palette_size_tree);
+ for (s = 0; s < PALETTE_MAX_SIZE - 1; ++s) {
+ av1_tokens_from_tree(palette_color_encodings[s], av1_palette_color_tree[s]);
+ }
+#endif // CONFIG_PALETTE
+
#if CONFIG_EXT_INTRA
av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree);
#endif // CONFIG_EXT_INTRA
@@ -365,7 +346,8 @@
#if CONFIG_VAR_TX
static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
const MB_MODE_INFO *mbmi, TX_SIZE tx_size,
- int blk_row, int blk_col, aom_writer *w) {
+ int depth, int blk_row, int blk_col,
+ aom_writer *w) {
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
@@ -378,6 +360,12 @@
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+ if (depth == MAX_VARTX_DEPTH) {
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ return;
+ }
+
if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) {
aom_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
txfm_partition_update(xd->above_txfm_context + tx_col,
@@ -399,7 +387,8 @@
for (i = 0; i < 4; ++i) {
int offsetr = blk_row + ((i >> 1) << bsl);
int offsetc = blk_col + ((i & 0x01) << bsl);
- write_tx_size_vartx(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w);
+ write_tx_size_vartx(cm, xd, mbmi, tx_size - 1, depth + 1, offsetr,
+ offsetc, w);
}
}
}
@@ -1229,7 +1218,8 @@
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
for (idy = 0; idy < height; idy += bs)
for (idx = 0; idx < width; idx += bs)
- write_tx_size_vartx(cm, xd, mbmi, max_tx_size, idy, idx, w);
+ write_tx_size_vartx(cm, xd, mbmi, max_tx_size, height != width, idy,
+ idx, w);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 310325e..dcdf97e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -81,7 +81,7 @@
MB_MODE_INFO_EXT *mbmi_ext;
int skip_block;
int select_tx_size;
- int q_index;
+ int qindex;
// The equivalent error at the current rdmult of one whole bit (not one
// bitcost unit).
@@ -98,6 +98,10 @@
int *m_search_count_ptr;
int *ex_search_count_ptr;
+#if CONFIG_VAR_TX
+ unsigned int txb_split_count;
+#endif
+
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
BLOCK_SIZE min_partition_size;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9dc6a2e..acca6f1 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -852,7 +852,6 @@
if (cyclic_refresh_segment_id_boosted(segment_id)) {
int q = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- assert(q == xd->qindex[segment_id]);
set_vbp_thresholds(cpi, thresholds, q);
}
}
@@ -1606,7 +1605,6 @@
av1_init_plane_quantizers(cpi, x, segment_id);
aom_clear_system_state();
segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- assert(segment_qindex == x->e_mbd.qindex[segment_id]);
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
@@ -4675,7 +4673,6 @@
: cm->base_qindex;
xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
- xd->qindex[i] = qindex;
}
if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0;
@@ -4720,6 +4717,7 @@
cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
#if CONFIG_VAR_TX
+ x->txb_split_count = 0;
#if CONFIG_REF_MV
av1_zero(x->blk_skip_drl);
#endif
@@ -4853,7 +4851,10 @@
}
}
-#if !CONFIG_VAR_TX
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
+ cm->tx_mode = ALLOW_32X32;
+#else
if (cm->tx_mode == TX_MODE_SELECT) {
int count4x4 = 0;
int count8x8_lp = 0, count8x8_8x8p = 0;
@@ -4956,8 +4957,9 @@
}
#if CONFIG_VAR_TX
-static void update_txfm_count(MACROBLOCKD *xd, FRAME_COUNTS *counts,
- TX_SIZE tx_size, int blk_row, int blk_col) {
+static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
+ FRAME_COUNTS *counts, TX_SIZE tx_size,
+ int blk_row, int blk_col) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
@@ -4982,6 +4984,7 @@
int bh = num_4x4_blocks_high_lookup[bsize];
int i;
++counts->txfm_partition[ctx][1];
+ ++x->txb_split_count;
if (tx_size == TX_8X8) {
mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
@@ -4994,16 +4997,16 @@
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
- update_txfm_count(xd, counts, tx_size - 1, blk_row + offsetr,
+ update_txfm_count(x, xd, counts, tx_size - 1, blk_row + offsetr,
blk_col + offsetc);
}
}
}
-static void tx_partition_count_update(const AV1_COMMON *const cm,
- MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
- int mi_row, int mi_col,
- FRAME_COUNTS *td_counts) {
+static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE plane_bsize, int mi_row,
+ int mi_col, FRAME_COUNTS *td_counts) {
+ MACROBLOCKD *xd = &x->e_mbd;
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
@@ -5017,7 +5020,7 @@
for (idy = 0; idy < mi_height; idy += bh)
for (idx = 0; idx < mi_width; idx += bh)
- update_txfm_count(xd, td_counts, max_tx_size, idy, idx);
+ update_txfm_count(x, xd, td_counts, max_tx_size, idy, idx);
}
static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
@@ -5259,13 +5262,19 @@
}
if (!is_rect_tx_allowed(xd, mbmi) || !is_rect_tx(mbmi->tx_size)) {
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
- if (is_inter)
- tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
+ if (is_inter) {
+ tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts);
+ } else {
+ ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+ if (mbmi->tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count;
+ }
#if CONFIG_EXT_TX && CONFIG_RECT_TX
}
#endif
#endif
+#if !CONFIG_VAR_TX
++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+#endif
} else {
int i, j;
TX_SIZE tx_size;
@@ -5288,7 +5297,12 @@
for (i = 0; i < mi_width; i++)
if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
mi_8x8[mis * j + i]->mbmi.tx_size = tx_size;
+
+#if CONFIG_VAR_TX
+ if (mbmi->tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count;
+#endif
}
+
++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]];
++td->counts
->tx_size_totals[txsize_sqr_map[get_uv_tx_size(mbmi, &xd->plane[1])]];
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8914ba5..6b7e72c 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -95,8 +95,7 @@
#endif
const int shift = get_tx_scale(xd, tx_type, tx_size);
#if CONFIG_NEW_QUANT
- int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
- ref, plane_type);
+ int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
#else
const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
@@ -123,8 +122,7 @@
int shortcut = 0;
int next_shortcut = 0;
- assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
- (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
token_costs += band;
@@ -518,8 +516,7 @@
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
- is_inter, plane_type);
+ int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
@@ -527,8 +524,7 @@
FWD_TXFM_PARAM fwd_txfm_param;
- assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
- (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
fwd_txfm_param.tx_type = tx_type;
fwd_txfm_param.tx_size = tx_size;
@@ -588,8 +584,7 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
- int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
- is_inter, plane_type);
+ int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -600,8 +595,7 @@
FWD_TXFM_PARAM fwd_txfm_param;
- assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
- (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
fwd_txfm_param.tx_type = tx_type;
fwd_txfm_param.tx_size = tx_size;
@@ -665,13 +659,11 @@
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
- int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
- is_inter, plane_type);
+ int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
FWD_TXFM_PARAM fwd_txfm_param;
- assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
- (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
fwd_txfm_param.tx_type = tx_type;
fwd_txfm_param.tx_size = tx_size;
@@ -730,13 +722,11 @@
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
- int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
- is_inter, plane_type);
+ int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
FWD_TXFM_PARAM fwd_txfm_param;
- assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
- (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
fwd_txfm_param.tx_type = tx_type;
fwd_txfm_param.tx_size = tx_size;
@@ -1077,16 +1067,14 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
PREDICTION_MODE mode;
- const int bwl = b_width_log2_lookup[plane_bsize];
- const int bhl = b_height_log2_lookup[plane_bsize];
- const int diff_stride = 4 * (1 << bwl);
+ const int diff_stride = block_size_wide[plane_bsize];
uint8_t *src, *dst;
int16_t *src_diff;
uint16_t *eob = &p->eobs[block];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- const int tx1d_width = num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
- const int tx1d_height = num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+ const int tx1d_width = tx_size_wide[tx_size];
+ const int tx1d_height = tx_size_high[tx_size];
ENTROPY_CONTEXT *a = NULL, *l = NULL;
int ctx;
@@ -1097,10 +1085,9 @@
dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
-
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
- av1_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst,
- dst_stride, blk_col, blk_row, plane);
+ av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
+ dst_stride, dst, dst_stride, blk_col, blk_row, plane);
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
aom_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index f1a6f72..33c536d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2040,10 +2040,13 @@
cm->free_mi = av1_enc_free_mi;
cm->setup_mi = av1_enc_setup_mi;
- CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)aom_calloc(1, sizeof(*cm->fc)));
- CHECK_MEM_ERROR(
- cm, cm->frame_contexts,
- (FRAME_CONTEXT *)aom_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
+ CHECK_MEM_ERROR(cm, cm->fc,
+ (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(cm, cm->frame_contexts,
+ (FRAME_CONTEXT *)aom_memalign(
+ 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
+ memset(cm->fc, 0, sizeof(*cm->fc));
+ memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));
cpi->resize_state = 0;
cpi->resize_avg_qp = 0;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 0c66905..1bf7ff4 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -751,12 +751,10 @@
#endif // CONFIG_EXT_REFS
static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
- // TODO(JBB): double check we can't exceed this token count if we have a
- // 32x32 transform crossing a boundary at a multiple of 16.
- // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
- // resolution. We assume up to 1 token per pixel, and then allow
- // a head room of 1 EOSB token per 8x8 block per plane.
- return mb_rows * mb_cols * (16 * 16 + 4) * 3;
+ // We assume 3 planes all at full resolution. We assume up to 1 token per
+ // pixel, and then allow a head room of 1 EOSB token per 4x4 block per plane,
+ // plus EOSB_TOKEN per plane.
+ return mb_rows * mb_cols * (16 * 16 + 17) * 3;
}
// Get the allocated token size for a tile. It does the same calculation as in
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 466cb9c..dc97ddf 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -579,13 +579,15 @@
set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize],
mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
cm->mi_rows, cm->mi_cols);
+ set_plane_n4(xd, num_8x8_blocks_wide_lookup[bsize],
+ num_8x8_blocks_high_lookup[bsize],
+ mi_width_log2_lookup[bsize], mi_height_log2_lookup[bsize]);
// Do intra 16x16 prediction.
xd->mi[0]->mbmi.segment_id = 0;
#if CONFIG_SUPERTX
xd->mi[0]->mbmi.segment_id_supertx = 0;
#endif // CONFIG_SUPERTX
- xd->qindex[xd->mi[0]->mbmi.segment_id] = qindex;
xd->lossless[xd->mi[0]->mbmi.segment_id] = (qindex == 0);
xd->mi[0]->mbmi.mode = DC_PRED;
xd->mi[0]->mbmi.tx_size =
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
index 9bbed2b..1fd1682 100644
--- a/av1/encoder/mbgraph.c
+++ b/av1/encoder/mbgraph.c
@@ -149,7 +149,7 @@
unsigned int err;
xd->mi[0]->mbmi.mode = mode;
- av1_predict_intra_block(xd, 2, 2, TX_16X16, mode, x->plane[0].src.buf,
+ av1_predict_intra_block(xd, 16, 16, TX_16X16, mode, x->plane[0].src.buf,
x->plane[0].src.stride, xd->plane[0].dst.buf,
xd->plane[0].dst.stride, 0, 0, 0);
err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c
index db2fdb8..771f94b 100644
--- a/av1/encoder/quantize.c
+++ b/av1/encoder/quantize.c
@@ -1293,11 +1293,11 @@
}
x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
- x->q_index = qindex;
+ x->qindex = qindex;
set_error_per_bit(x, rdmult);
- av1_initialize_me_consts(cpi, x, x->q_index);
+ av1_initialize_me_consts(cpi, x, qindex);
}
void av1_frame_init_quantizer(AV1_COMP *cpi) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 627352b..a1bac85 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1944,8 +1944,9 @@
int16_t *const src_diff =
av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
xd->mi[0]->bmi[block].as_mode = mode;
- av1_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
- dst_stride, col + idx, row + idy, 0);
+ av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
+ dst_stride, dst, dst_stride, col + idx,
+ row + idy, 0);
aom_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
dst_stride, xd->bd);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -2064,8 +2065,9 @@
int16_t *const src_diff =
av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
xd->mi[0]->bmi[block].as_mode = mode;
- av1_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
- dst_stride, col + idx, row + idy, 0);
+ av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
+ dst_stride, dst, dst_stride, col + idx,
+ row + idy, 0);
aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -3020,11 +3022,11 @@
static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
int blk_col, int plane, int block, TX_SIZE tx_size,
- BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
- ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
- TXFM_CONTEXT *tx_left, int *rate, int64_t *dist,
- int64_t *bsse, int *skip, int64_t ref_best_rd,
- int *is_cost_valid) {
+ int depth, BLOCK_SIZE plane_bsize,
+ ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
+ TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
+ int *rate, int64_t *dist, int64_t *bsse, int *skip,
+ int64_t ref_best_rd, int *is_cost_valid) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
@@ -3116,13 +3118,13 @@
*skip = 0;
}
- if (tx_size > TX_4X4)
+ if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
*rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
tmp_eob = p->eobs[block];
}
- if (tx_size > TX_4X4) {
+ if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bsl = b_height_log2_lookup[bsize];
int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
@@ -3141,9 +3143,10 @@
int offsetr = (i >> 1) << bsl;
int offsetc = (i & 0x01) << bsl;
select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
- block + i * sub_step, tx_size - 1, plane_bsize, ta, tl,
- tx_above, tx_left, &this_rate, &this_dist, &this_bsse,
- &this_skip, ref_best_rd - tmp_rd, &this_cost_valid);
+ block + i * sub_step, tx_size - 1, depth + 1, plane_bsize,
+ ta, tl, tx_above, tx_left, &this_rate, &this_dist,
+ &this_bsse, &this_skip, ref_best_rd - tmp_rd,
+ &this_cost_valid);
sum_rate += this_rate;
sum_dist += this_dist;
sum_bsse += this_bsse;
@@ -3219,9 +3222,10 @@
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
select_tx_block(cpi, x, idy, idx, 0, block,
- max_txsize_lookup[plane_bsize], plane_bsize, ctxa, ctxl,
- tx_above, tx_left, &pnrate, &pndist, &pnsse, &pnskip,
- ref_best_rd - this_rd, &is_cost_valid);
+ max_txsize_lookup[plane_bsize], mi_height != mi_width,
+ plane_bsize, ctxa, ctxl, tx_above, tx_left, &pnrate,
+ &pndist, &pnsse, &pnskip, ref_best_rd - this_rd,
+ &is_cost_valid);
*rate += pnrate;
*distortion += pndist;
*sse += pnsse;
@@ -7586,7 +7590,7 @@
// Y cost and distortion
av1_subtract_plane(x, bsize, 0);
#if CONFIG_VAR_TX
- if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
+ if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
bsize, ref_best_rd);
} else {
@@ -8989,10 +8993,6 @@
clamp_mv2(&cur_mv.as_mv, xd);
if (!mv_check_bounds(x, &cur_mv.as_mv)) {
- InterpFilter dummy_single_inter_filter[MB_MODE_COUNT]
- [TOTAL_REFS_PER_FRAME] = {
- { 0 }
- };
int dummy_single_skippable[MB_MODE_COUNT]
[TOTAL_REFS_PER_FRAME] = { { 0 } };
#if CONFIG_EXT_INTER
@@ -9020,8 +9020,7 @@
#else
dummy_single_newmv,
#endif
- dummy_single_inter_filter, dummy_single_skippable, &tmp_sse,
- best_rd);
+ single_inter_filter, dummy_single_skippable, &tmp_sse, best_rd);
}
for (i = 0; i < mbmi->ref_mv_idx; ++i) {
@@ -9376,7 +9375,7 @@
int best_rate_nocoef;
#endif
int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
- int skippable = 0, rate_overhead = 0;
+ int skippable = 0, rate_overhead_palette = 0;
TX_SIZE best_tx_size, uv_tx;
TX_TYPE best_tx_type;
PALETTE_MODE_INFO palette_mode_info;
@@ -9384,13 +9383,12 @@
x->palette_buffer->best_palette_color_map;
uint8_t *const color_map = xd->plane[0].color_index_map;
- rate_overhead = 0;
mbmi->mode = DC_PRED;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE;
palette_mode_info.palette_size[0] = 0;
- rate_overhead = rd_pick_palette_intra_sby(
+ rate_overhead_palette = rd_pick_palette_intra_sby(
cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
&palette_mode_info, best_palette_color_map, &best_tx_size,
&best_tx_type, &mode_selected, &dummy_rd);
@@ -9439,7 +9437,7 @@
#endif // CONFIG_FILTER_INTRA
skippable = skippable && skip_uvs[uv_tx];
distortion2 = distortion_y + dist_uvs[uv_tx];
- rate2 = rate_y + rate_overhead + rate_uv_intra[uv_tx];
+ rate2 = rate_y + rate_overhead_palette + rate_uv_intra[uv_tx];
rate2 += ref_costs_single[INTRA_FRAME];
if (skippable) {
@@ -9785,6 +9783,7 @@
#else // CONFIG_GLOBAL_MOTION
mbmi->mv[0].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
+ mbmi->tx_size = max_txsize_lookup[bsize];
x->skip = 1;
#if CONFIG_REF_MV
diff --git a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
index 928af13..f4bd142 100644
--- a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@@ -14,6 +14,7 @@
#include "./av1_rtcd.h"
#include "./aom_dsp_rtcd.h"
+#include "aom_dsp/x86/fwd_txfm_avx2.h"
#include "aom_dsp/txfm_common.h"
#include "aom_dsp/x86/txfm_common_avx2.h"
@@ -273,24 +274,11 @@
in[15] = _mm256_slli_epi16(in[15], 2);
}
-static INLINE void write_buffer_16x16(const __m256i *in, int stride,
- tran_low_t *output) {
- _mm256_storeu_si256((__m256i *)output, in[0]);
- _mm256_storeu_si256((__m256i *)(output + stride), in[1]);
- _mm256_storeu_si256((__m256i *)(output + 2 * stride), in[2]);
- _mm256_storeu_si256((__m256i *)(output + 3 * stride), in[3]);
- _mm256_storeu_si256((__m256i *)(output + 4 * stride), in[4]);
- _mm256_storeu_si256((__m256i *)(output + 5 * stride), in[5]);
- _mm256_storeu_si256((__m256i *)(output + 6 * stride), in[6]);
- _mm256_storeu_si256((__m256i *)(output + 7 * stride), in[7]);
- _mm256_storeu_si256((__m256i *)(output + 8 * stride), in[8]);
- _mm256_storeu_si256((__m256i *)(output + 9 * stride), in[9]);
- _mm256_storeu_si256((__m256i *)(output + 10 * stride), in[10]);
- _mm256_storeu_si256((__m256i *)(output + 11 * stride), in[11]);
- _mm256_storeu_si256((__m256i *)(output + 12 * stride), in[12]);
- _mm256_storeu_si256((__m256i *)(output + 13 * stride), in[13]);
- _mm256_storeu_si256((__m256i *)(output + 14 * stride), in[14]);
- _mm256_storeu_si256((__m256i *)(output + 15 * stride), in[15]);
+static INLINE void write_buffer_16x16(const __m256i *in, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i) {
+ storeu_output_avx2(&in[i], output + (i << 4));
+ }
}
static void right_shift_16x16(__m256i *in) {
@@ -1253,7 +1241,7 @@
default: assert(0); break;
}
mm256_transpose_16x16(in);
- write_buffer_16x16(in, 16, output);
+ write_buffer_16x16(in, output);
_mm256_zeroupper();
}
@@ -1623,12 +1611,13 @@
}
static INLINE void write_buffer_32x32(const __m256i *in0, const __m256i *in1,
- int stride, tran_low_t *output) {
+ tran_low_t *output) {
int i = 0;
+ const int stride = 32;
tran_low_t *coeff = output;
while (i < 32) {
- _mm256_storeu_si256((__m256i *)coeff, in0[i]);
- _mm256_storeu_si256((__m256i *)(coeff + 16), in1[i]);
+ storeu_output_avx2(&in0[i], coeff);
+ storeu_output_avx2(&in1[i], coeff + 16);
coeff += stride;
i += 1;
}
@@ -1885,6 +1874,6 @@
default: assert(0); break;
}
nr_right_shift_32x32(in0, in1);
- write_buffer_32x32(in0, in1, 32, output);
+ write_buffer_32x32(in0, in1, output);
_mm256_zeroupper();
}
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 9811955..b4bb14f 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -533,10 +533,10 @@
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_AOM_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
- const uint32_t diff = dst[j] - src[j];
+ const int diff = dst[j] - src[j];
#endif // CONFIG_AOM_HIGHBITDEPTH
const uint32_t error = diff * diff;
EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error
@@ -589,10 +589,10 @@
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_AOM_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
- const uint32_t diff = dst[j] - ref[j];
+ const int diff = dst[j] - ref[j];
#endif // CONFIG_AOM_HIGHBITDEPTH
const uint32_t error = diff * diff;
EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error "
diff --git a/test/fht32x32_test.cc b/test/fht32x32_test.cc
index 3d07b44..1f85761 100644
--- a/test/fht32x32_test.cc
+++ b/test/fht32x32_test.cc
@@ -90,8 +90,14 @@
IhtFunc inv_txfm_;
};
+// TODO(luoyi): Owing to the range check in DCT_DCT of av1_fht32x32_avx2, as
+// input is out of the range, we use aom_fdct32x32_avx2. However this function
+// does not support CONFIG_AOM_HIGHBITDEPTH. I need to fix the scaling/rounding
+// of av1_fht32x32_avx2 then add this test on CONFIG_AOM_HIGHBITDEPTH.
+#if !CONFIG_AOM_HIGHBITDEPTH
TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
+#endif
#if CONFIG_AOM_HIGHBITDEPTH
class AV1HighbdTrans32x32HT
diff --git a/test/transform_test_base.h b/test/transform_test_base.h
index 195058e..540136c 100644
--- a/test/transform_test_base.h
+++ b/test/transform_test_base.h
@@ -90,11 +90,11 @@
for (int j = 0; j < num_coeffs_; ++j) {
#if CONFIG_AOM_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
ASSERT_EQ(AOM_BITS_8, bit_depth_);
- const uint32_t diff = dst[j] - src[j];
+ const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
if (max_error < error) max_error = error;
@@ -309,10 +309,10 @@
for (int j = 0; j < num_coeffs_; ++j) {
#if CONFIG_AOM_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
- const uint32_t diff = dst[j] - src[j];
+ const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
EXPECT_GE(static_cast<uint32_t>(limit), error)