Merge "Reduced y_dequant, uv_dequant size" into experimental
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index a8139cb..151a38b 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -8,6 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "./vpx_config.h"
@@ -16,10 +20,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
}
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
namespace {
typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
@@ -46,27 +46,27 @@
// Reference 8-tap subpixel filter, slightly modified to fit into this test.
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
-static uint8_t clip_pixel(int x) {
+uint8_t clip_pixel(int x) {
return x < 0 ? 0 :
x > 255 ? 255 :
x;
}
-static void filter_block2d_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter,
- const int16_t *VFilter,
- uint8_t *dst_ptr,
- unsigned int dst_stride,
- unsigned int output_width,
- unsigned int output_height) {
+void filter_block2d_8_c(const uint8_t *src_ptr,
+ const unsigned int src_stride,
+ const int16_t *HFilter,
+ const int16_t *VFilter,
+ uint8_t *dst_ptr,
+ unsigned int dst_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
// Between passes, we use an intermediate buffer whose height is extended to
// have enough horizontally filtered values as input for the vertical pass.
// This buffer is allocated to be big enough for the largest block type we
// support.
const int kInterp_Extend = 4;
const unsigned int intermediate_height =
- (kInterp_Extend - 1) + output_height + kInterp_Extend;
+ (kInterp_Extend - 1) + output_height + kInterp_Extend;
/* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
* where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
@@ -87,15 +87,15 @@
for (i = 0; i < intermediate_height; ++i) {
for (j = 0; j < output_width; ++j) {
// Apply filter...
- int temp = ((int)src_ptr[0] * HFilter[0]) +
- ((int)src_ptr[1] * HFilter[1]) +
- ((int)src_ptr[2] * HFilter[2]) +
- ((int)src_ptr[3] * HFilter[3]) +
- ((int)src_ptr[4] * HFilter[4]) +
- ((int)src_ptr[5] * HFilter[5]) +
- ((int)src_ptr[6] * HFilter[6]) +
- ((int)src_ptr[7] * HFilter[7]) +
- (VP9_FILTER_WEIGHT >> 1); // Rounding
+ const int temp = (src_ptr[0] * HFilter[0]) +
+ (src_ptr[1] * HFilter[1]) +
+ (src_ptr[2] * HFilter[2]) +
+ (src_ptr[3] * HFilter[3]) +
+ (src_ptr[4] * HFilter[4]) +
+ (src_ptr[5] * HFilter[5]) +
+ (src_ptr[6] * HFilter[6]) +
+ (src_ptr[7] * HFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
// Normalize back to 0-255...
*output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
@@ -115,15 +115,15 @@
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
// Apply filter...
- int temp = ((int)src_ptr[0] * VFilter[0]) +
- ((int)src_ptr[1] * VFilter[1]) +
- ((int)src_ptr[2] * VFilter[2]) +
- ((int)src_ptr[3] * VFilter[3]) +
- ((int)src_ptr[4] * VFilter[4]) +
- ((int)src_ptr[5] * VFilter[5]) +
- ((int)src_ptr[6] * VFilter[6]) +
- ((int)src_ptr[7] * VFilter[7]) +
- (VP9_FILTER_WEIGHT >> 1); // Rounding
+ const int temp = (src_ptr[0] * VFilter[0]) +
+ (src_ptr[1] * VFilter[1]) +
+ (src_ptr[2] * VFilter[2]) +
+ (src_ptr[3] * VFilter[3]) +
+ (src_ptr[4] * VFilter[4]) +
+ (src_ptr[5] * VFilter[5]) +
+ (src_ptr[6] * VFilter[6]) +
+ (src_ptr[7] * VFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
// Normalize back to 0-255...
*dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
@@ -135,12 +135,12 @@
}
}
-static void block2d_average_c(uint8_t *src,
- unsigned int src_stride,
- uint8_t *output_ptr,
- unsigned int output_stride,
- unsigned int output_width,
- unsigned int output_height) {
+void block2d_average_c(uint8_t *src,
+ unsigned int src_stride,
+ uint8_t *output_ptr,
+ unsigned int output_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
@@ -150,15 +150,15 @@
}
}
-static void filter_average_block2d_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter,
- const int16_t *VFilter,
- uint8_t *dst_ptr,
- unsigned int dst_stride,
- unsigned int output_width,
- unsigned int output_height) {
- uint8_t tmp[64*64];
+void filter_average_block2d_8_c(const uint8_t *src_ptr,
+ const unsigned int src_stride,
+ const int16_t *HFilter,
+ const int16_t *VFilter,
+ uint8_t *dst_ptr,
+ unsigned int dst_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ uint8_t tmp[64 * 64];
assert(output_width <= 64);
assert(output_height <= 64);
@@ -173,10 +173,9 @@
static void SetUpTestCase() {
// Force input_ to be unaligned, output to be 16 byte aligned.
input_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kOuterBlockSize * kOuterBlockSize + 1))
- + 1;
+ vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
output_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kOuterBlockSize * kOuterBlockSize));
+ vpx_memalign(kDataAlignment, kOutputBufferSize));
}
static void TearDownTestCase() {
@@ -186,62 +185,63 @@
output_ = NULL;
}
- protected:
- static const int kDataAlignment = 16;
- static const int kOuterBlockSize = 128;
- static const int kInputStride = kOuterBlockSize;
- static const int kOutputStride = kOuterBlockSize;
- static const int kMaxDimension = 64;
+ protected:
+ static const int kDataAlignment = 16;
+ static const int kOuterBlockSize = 128;
+ static const int kInputStride = kOuterBlockSize;
+ static const int kOutputStride = kOuterBlockSize;
+ static const int kMaxDimension = 64;
+ static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
+ static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
- int Width() const { return GET_PARAM(0); }
- int Height() const { return GET_PARAM(1); }
- int BorderLeft() const {
- const int center = (kOuterBlockSize - Width()) / 2;
- return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
- }
- int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
+ int Width() const { return GET_PARAM(0); }
+ int Height() const { return GET_PARAM(1); }
+ int BorderLeft() const {
+ const int center = (kOuterBlockSize - Width()) / 2;
+ return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
+ }
+ int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
- bool IsIndexInBorder(int i) {
- return (i < BorderTop() * kOuterBlockSize ||
- i >= (BorderTop() + Height()) * kOuterBlockSize ||
- i % kOuterBlockSize < BorderLeft() ||
- i % kOuterBlockSize >= (BorderLeft() + Width()));
+ bool IsIndexInBorder(int i) {
+ return (i < BorderTop() * kOuterBlockSize ||
+ i >= (BorderTop() + Height()) * kOuterBlockSize ||
+ i % kOuterBlockSize < BorderLeft() ||
+ i % kOuterBlockSize >= (BorderLeft() + Width()));
+ }
+
+ virtual void SetUp() {
+ UUT_ = GET_PARAM(2);
+ /* Set up guard blocks for an inner block cetered in the outer block */
+ for (int i = 0; i < kOutputBufferSize; ++i) {
+ if (IsIndexInBorder(i))
+ output_[i] = 255;
+ else
+ output_[i] = 0;
}
- virtual void SetUp() {
- UUT_ = GET_PARAM(2);
- memset(input_, 0, sizeof(input_));
- /* Set up guard blocks for an inner block cetered in the outer block */
- for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) {
- if (IsIndexInBorder(i))
- output_[i] = 255;
- else
- output_[i] = 0;
- }
+ ::libvpx_test::ACMRandom prng;
+ for (int i = 0; i < kInputBufferSize; ++i)
+ input_[i] = prng.Rand8Extremes();
+ }
- ::libvpx_test::ACMRandom prng;
- for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i)
- input_[i] = prng.Rand8Extremes();
+ void CheckGuardBlocks() {
+ for (int i = 0; i < kOutputBufferSize; ++i) {
+ if (IsIndexInBorder(i))
+ EXPECT_EQ(255, output_[i]);
}
+ }
- void CheckGuardBlocks() {
- for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) {
- if (IsIndexInBorder(i))
- EXPECT_EQ(255, output_[i]);
- }
- }
+ uint8_t* input() const {
+ return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ }
- uint8_t* input() {
- return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
- }
+ uint8_t* output() const {
+ return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ }
- uint8_t* output() {
- return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
- }
-
- const ConvolveFunctions* UUT_;
- static uint8_t* input_;
- static uint8_t* output_;
+ const ConvolveFunctions* UUT_;
+ static uint8_t* input_;
+ static uint8_t* output_;
};
uint8_t* ConvolveTest::input_ = NULL;
uint8_t* ConvolveTest::output_ = NULL;
@@ -309,7 +309,7 @@
vp9_sub_pel_filters_8lp
};
const int kNumFilterBanks = sizeof(kTestFilterList) /
- sizeof(kTestFilterList[0]);
+ sizeof(kTestFilterList[0]);
const int kNumFilters = 16;
TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
@@ -525,7 +525,6 @@
make_tuple(64, 32, &convolve8_c),
make_tuple(32, 64, &convolve8_c),
make_tuple(64, 64, &convolve8_c)));
-}
#if HAVE_SSSE3
const ConvolveFunctions convolve8_ssse3(
@@ -548,3 +547,4 @@
make_tuple(32, 64, &convolve8_ssse3),
make_tuple(64, 64, &convolve8_ssse3)));
#endif
+} // namespace
diff --git a/test/superframe_test.cc b/test/superframe_test.cc
index 52faddb..062ec6c 100644
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@@ -30,7 +30,7 @@
}
virtual void TearDown() {
- delete modified_buf_;
+ delete[] modified_buf_;
}
virtual bool Continue() const {
@@ -59,7 +59,7 @@
buffer[pkt->data.frame.sz - index_sz] == marker) {
// frame is a superframe. strip off the index.
if (modified_buf_)
- delete modified_buf_;
+ delete[] modified_buf_;
modified_buf_ = new uint8_t[pkt->data.frame.sz - index_sz];
memcpy(modified_buf_, pkt->data.frame.buf,
pkt->data.frame.sz - index_sz);
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 426699e..7d323eed 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -453,7 +453,7 @@
static INLINE void update_partition_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type,
BLOCK_SIZE_TYPE sb_size) {
- int bsl = mi_width_log2(sb_size), bs = 1 << bsl;
+ int bsl = mi_width_log2(sb_size), bs;
int bwl = mi_width_log2(sb_type);
int bhl = mi_height_log2(sb_type);
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
@@ -462,6 +462,12 @@
if (bsl == 0)
return;
+#if CONFIG_SB8X8
+ bs = 1 << (bsl - 1);
+#else
+ bs = 1 << bsl;
+#endif
+
// update the partition context at the end notes. set partition bits
// of block sizes larger than the current one to be one, and partition
// bits of smaller block sizes to be zero.
@@ -492,14 +498,26 @@
static INLINE int partition_plane_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type) {
- int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
+ int bsl = mi_width_log2(sb_type), bs;
int above = 0, left = 0, i;
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
+#if CONFIG_SB8X8
+ bs = 1 << (bsl - 1);
+#else
+ bs = 1 << bsl;
+#endif
+
assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
assert(bsl >= 0);
assert(boffset >= 0);
+#if CONFIG_SB8X8
+ bs = 1 << (bsl - 1);
+#else
+ bs = 1 << bsl;
+#endif
+
for (i = 0; i < bs; i++)
above |= (xd->above_seg_context[i] & (1 << boffset));
for (i = 0; i < bs; i++)
@@ -511,6 +529,55 @@
return (left * 2 + above) + (bsl - 1) * PARTITION_PLOFFSET;
}
+static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
+ PARTITION_TYPE partition) {
+ BLOCK_SIZE_TYPE subsize;
+ switch (partition) {
+ case PARTITION_NONE:
+ subsize = bsize;
+ break;
+ case PARTITION_HORZ:
+ if (bsize == BLOCK_SIZE_SB64X64)
+ subsize = BLOCK_SIZE_SB64X32;
+ else if (bsize == BLOCK_SIZE_SB32X32)
+ subsize = BLOCK_SIZE_SB32X16;
+#if CONFIG_SB8X8
+ else if (bsize == BLOCK_SIZE_MB16X16)
+ subsize = BLOCK_SIZE_SB16X8;
+#endif
+ else
+ assert(0);
+ break;
+ case PARTITION_VERT:
+ if (bsize == BLOCK_SIZE_SB64X64)
+ subsize = BLOCK_SIZE_SB32X64;
+ else if (bsize == BLOCK_SIZE_SB32X32)
+ subsize = BLOCK_SIZE_SB16X32;
+#if CONFIG_SB8X8
+ else if (bsize == BLOCK_SIZE_MB16X16)
+ subsize = BLOCK_SIZE_SB8X16;
+#endif
+ else
+ assert(0);
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_SIZE_SB64X64)
+ subsize = BLOCK_SIZE_SB32X32;
+ else if (bsize == BLOCK_SIZE_SB32X32)
+ subsize = BLOCK_SIZE_MB16X16;
+#if CONFIG_SB8X8
+ else if (bsize == BLOCK_SIZE_MB16X16)
+ subsize = BLOCK_SIZE_SB8X8;
+#endif
+ else
+ assert(0);
+ break;
+ default:
+ assert(0);
+ }
+ return subsize;
+}
+
#define ACTIVE_HT 110 // quantization stepsize threshold
#define ACTIVE_HT8 300
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index c8dd6eb..87879ea 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -18,6 +18,7 @@
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_quant_common.h"
#if CONFIG_POSTPROC
#include "vp9/common/vp9_postproc.h"
@@ -31,13 +32,6 @@
void vp9_initialize_common(void);
-#define MINQ 0
-
-#define MAXQ 255
-#define QINDEX_BITS 8
-
-#define QINDEX_RANGE (MAXQ + 1)
-
#if CONFIG_MULTIPLE_ARF
#define NUM_REF_FRAMES 8
#define NUM_REF_FRAMES_LG2 3
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 2e9e4ca..5907b4f 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -10,6 +10,7 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_seg_common.h"
static int16_t dc_qlookup[QINDEX_RANGE];
static int16_t ac_qlookup[QINDEX_RANGE];
@@ -44,3 +45,16 @@
int16_t vp9_ac_quant(int qindex, int delta) {
return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
}
+
+
+int vp9_get_qindex(MACROBLOCKD *xd, int segment_id, int base_qindex) {
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
+ const int data = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
+ return xd->mb_segment_abs_delta == SEGMENT_ABSDATA ?
+ data : // Abs value
+ clamp(base_qindex + data, 0, MAXQ); // Delta value
+ } else {
+ return base_qindex;
+ }
+}
+
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index 7daf15d..ded9426 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -12,11 +12,17 @@
#define VP9_COMMON_VP9_QUANT_COMMON_H_
#include "vp9/common/vp9_blockd.h"
-#include "vp9/common/vp9_onyxc_int.h"
+
+#define MINQ 0
+#define MAXQ 255
+#define QINDEX_RANGE (MAXQ - MINQ + 1)
+#define QINDEX_BITS 8
void vp9_init_quant_tables();
int16_t vp9_dc_quant(int qindex, int delta);
int16_t vp9_ac_quant(int qindex, int delta);
+int vp9_get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex);
+
#endif // VP9_COMMON_VP9_QUANT_COMMON_H_
diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c
index 4119450..4ab4f39 100644
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@@ -31,9 +31,10 @@
void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
int stride) {
assert(pred_ptr == dst_ptr);
- recon(4, 4, diff_ptr, 16, dst_ptr, stride);
+ recon(4, 4, diff_ptr, 16 >> CONFIG_SB8X8, dst_ptr, stride);
}
+#if !CONFIG_SB8X8
void vp9_recon_uv_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
int stride) {
assert(pred_ptr == dst_ptr);
@@ -51,6 +52,7 @@
assert(pred_ptr == dst_ptr);
recon(4, 8, diff_ptr, 8, dst_ptr, stride);
}
+#endif
static void recon_plane(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, int plane) {
const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index a0155d9..daeb6b5 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -545,34 +545,35 @@
void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE bsize) {
- const int bwl = mi_width_log2(bsize), bw = MI_SIZE << bwl;
- const int bhl = mi_height_log2(bsize), bh = MI_SIZE << bhl;
+ const int bwl = b_width_log2(bsize), bw = 4 << bwl;
+ const int bhl = b_height_log2(bsize), bh = 4 << bhl;
vp9_build_intra_predictors(xd->plane[0].dst.buf, xd->plane[0].dst.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
xd->mode_info_context->mbmi.mode,
bw, bh,
xd->up_available, xd->left_available,
- xd->right_available);
+ 0 /*xd->right_available*/);
}
void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE bsize) {
- const int bwl = mi_width_log2(bsize), bw = MI_UV_SIZE << bwl;
- const int bhl = mi_height_log2(bsize), bh = MI_UV_SIZE << bhl;
+ const int bwl = b_width_log2(bsize), bw = 2 << bwl;
+ const int bhl = b_height_log2(bsize), bh = 2 << bhl;
vp9_build_intra_predictors(xd->plane[1].dst.buf, xd->plane[1].dst.stride,
xd->plane[1].dst.buf, xd->plane[1].dst.stride,
xd->mode_info_context->mbmi.uv_mode,
bw, bh, xd->up_available,
- xd->left_available, xd->right_available);
+ xd->left_available, 0 /*xd->right_available*/);
vp9_build_intra_predictors(xd->plane[2].dst.buf, xd->plane[1].dst.stride,
xd->plane[2].dst.buf, xd->plane[1].dst.stride,
xd->mode_info_context->mbmi.uv_mode,
bw, bh, xd->up_available,
- xd->left_available, xd->right_available);
+ xd->left_available, 0 /*xd->right_available*/);
}
+#if !CONFIG_SB8X8
void vp9_intra8x8_predict(MACROBLOCKD *xd,
int block4x4_idx,
int mode,
@@ -587,14 +588,18 @@
mode, 8, 8, have_top, have_left,
have_right);
}
+#endif
#if !CONFIG_NEWBINTRAMODES
void vp9_intra4x4_predict(MACROBLOCKD *xd,
int block_idx,
int mode,
uint8_t *predictor, int pre_stride) {
- const int have_top = (block_idx >> 2) || xd->up_available;
- const int have_left = (block_idx & 3) || xd->left_available;
- const int have_right = ((block_idx & 3) != 3);
+ const int have_top =
+ (block_idx >> (2 >> CONFIG_SB8X8)) || xd->up_available;
+ const int have_left =
+ (block_idx & (3 >> CONFIG_SB8X8)) || xd->left_available;
+ const int have_right =
+ ((block_idx & (3 >> CONFIG_SB8X8)) != (3 >> CONFIG_SB8X8));
vp9_build_intra_predictors(predictor, pre_stride,
predictor, pre_stride,
@@ -602,6 +607,7 @@
have_right);
}
#endif
+#if !CONFIG_SB8X8
void vp9_intra_uv4x4_predict(MACROBLOCKD *xd,
int block4x4_idx,
int mode,
@@ -616,3 +622,4 @@
mode, 4, 4, have_top, have_left,
have_right);
}
+#endif
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 745cc69..b1acc04 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -63,6 +63,8 @@
prototype void vp9_recon_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
specialize vp9_recon_b
+if [ "$CONFIG_SB8X8" != "yes" ]; then
+
prototype void vp9_recon_uv_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
specialize vp9_recon_uv_b
@@ -76,6 +78,8 @@
specialize vp9_recon4b
# specialize vp9_recon4b sse2
+fi
+
prototype void vp9_recon_sb "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize"
specialize vp9_recon_sb
@@ -97,12 +101,16 @@
prototype void vp9_intra4x4_predict "struct macroblockd *xd, int block, int b_mode, uint8_t *predictor, int pre_stride"
specialize vp9_intra4x4_predict;
+if [ "$CONFIG_SB8X8" != "yes" ]; then
+
prototype void vp9_intra8x8_predict "struct macroblockd *xd, int block, int b_mode, uint8_t *predictor, int pre_stride"
specialize vp9_intra8x8_predict;
prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, int block, int b_mode, uint8_t *predictor, int pre_stride"
specialize vp9_intra_uv4x4_predict;
+fi
+
if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
prototype void vp9_add_residual_4x4 "const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_4x4 sse2
@@ -342,6 +350,12 @@
vp9_variance8x8_sse2=vp9_variance8x8_wmt
vp9_variance8x8_mmx=vp9_variance8x8_mmx
+prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance8x4
+
+prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_variance4x8
+
prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance4x4 mmx sse2
vp9_variance4x4_sse2=vp9_variance4x4_wmt
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
index 9ed3e2d..02b7053 100644
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -107,4 +107,10 @@
}
#endif
+const vp9_tree_index vp9_segment_tree[14] = {
+ 2, 4, 6, 8, 10, 12,
+ 0, -1, -2, -3, -4, -5, -6, -7
+};
+
+
// TBD? Functions to read and write segment data with range / validity checking
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index 4550dd1..53d22a3 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -59,5 +59,8 @@
void vp9_implicit_segment_map_update(VP9_COMMON * cm);
#endif
+
+extern const vp9_tree_index vp9_segment_tree[14];
+
#endif // VP9_COMMON_VP9_SEG_COMMON_H_
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 474250c..f1b214d 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -76,23 +76,7 @@
}
static int read_mb_segid(vp9_reader *r, MACROBLOCKD *xd) {
- const vp9_prob *const p = xd->mb_segment_tree_probs;
- int ret_val;
-
- if (vp9_read(r, p[0])) {
- if (vp9_read(r, p[4])) {
- ret_val = 6 + vp9_read(r, p[6]);
- } else {
- ret_val = 4 + vp9_read(r, p[5]);
- }
- } else {
- if (vp9_read(r, p[1])) {
- ret_val = 2 + vp9_read(r, p[3]);
- } else {
- ret_val = vp9_read(r, p[2]);
- }
- }
- return ret_val;
+ return treed_read(r, vp9_segment_tree, xd->mb_segment_tree_probs);
}
static void set_segment_id(VP9_COMMON *cm, MB_MODE_INFO *mbmi,
@@ -146,7 +130,11 @@
m->mbmi.mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
// luma mode
+#if CONFIG_SB8X8
+ m->mbmi.mode = m->mbmi.sb_type > BLOCK_SIZE_SB8X8 ?
+#else
m->mbmi.mode = m->mbmi.sb_type > BLOCK_SIZE_MB16X16 ?
+#endif
read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]):
read_kf_mb_ymode(r, cm->kf_ymode_prob[cm->kf_ymode_probs_index]);
@@ -154,11 +142,10 @@
if (m->mbmi.mode == I4X4_PRED) {
int i;
- for (i = 0; i < 16; ++i) {
+ for (i = 0; i < (16 >> (2 * CONFIG_SB8X8)); ++i) {
const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
const B_PREDICTION_MODE l = xd->left_available || (i & 3) ?
left_block_mode(m, i) : B_DC_PRED;
-
m->bmi[i].as_mode.first = read_kf_bmode(r, cm->kf_bmode_prob[a][l]);
}
}
@@ -202,7 +189,11 @@
} else if (cm->txfm_mode >= ALLOW_32X32 &&
m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
m->mbmi.txfm_size = TX_32X32;
- } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) {
+ } else if (cm->txfm_mode >= ALLOW_16X16 &&
+#if CONFIG_SB8X8
+ m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 &&
+#endif
+ m->mbmi.mode <= TM_PRED) {
m->mbmi.txfm_size = TX_16X16;
} else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != I4X4_PRED) {
m->mbmi.txfm_size = TX_8X8;
@@ -685,7 +676,12 @@
if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
} else {
- mbmi->mode = mbmi->sb_type > BLOCK_SIZE_MB16X16 ?
+ mbmi->mode =
+#if CONFIG_SB8X8
+ mbmi->sb_type > BLOCK_SIZE_SB8X8 ?
+#else
+ mbmi->sb_type > BLOCK_SIZE_MB16X16 ?
+#endif
read_sb_mv_ref(r, mv_ref_p)
: read_mv_ref(r, mv_ref_p);
vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]);
@@ -875,7 +871,11 @@
}
*/
-#if !CONFIG_SB8X8
+#if CONFIG_SB8X8
+ mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
+#else
{
/* Fill (uniform) modes, mvs of jth subset.
Must do it here because ensuing subsets can
@@ -961,7 +961,12 @@
// required for left and above block mv
mv0->as_int = 0;
- if (mbmi->sb_type > BLOCK_SIZE_MB16X16) {
+#if CONFIG_SB8X8
+ if (mbmi->sb_type > BLOCK_SIZE_SB8X8)
+#else
+ if (mbmi->sb_type > BLOCK_SIZE_MB16X16)
+#endif
+ {
mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
cm->fc.sb_ymode_counts[mbmi->mode]++;
} else {
@@ -979,7 +984,7 @@
if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
#endif
cm->fc.bmode_counts[m]++;
- } while (++j < 16);
+ } while (++j < (16 >> (2 * CONFIG_SB8X8)));
}
#if !CONFIG_SB8X8
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 522ea9f..c9b65b6 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -177,22 +177,10 @@
}
}
-static int get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex) {
- // Set the Q baseline allowing for any segment level adjustment
- if (vp9_segfeature_active(mb, segment_id, SEG_LVL_ALT_Q)) {
- const int data = vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q);
- return mb->mb_segment_abs_delta == SEGMENT_ABSDATA ?
- data : // Abs value
- clamp(base_qindex + data, 0, MAXQ); // Delta value
- } else {
- return base_qindex;
- }
-}
-
static void mb_init_dequantizer(VP9_COMMON *pc, MACROBLOCKD *xd) {
int i;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- xd->q_index = get_qindex(xd, segment_id, pc->base_qindex);
+ xd->q_index = vp9_get_qindex(xd, segment_id, pc->base_qindex);
xd->plane[0].dequant = pc->y_dequant[xd->q_index];
for (i = 1; i < MAX_MB_PLANE; i++)
@@ -251,9 +239,10 @@
}
#endif
-static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) {
+static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx,
+ BLOCK_SIZE_TYPE bsize) {
struct macroblockd_plane *const y = &xd->plane[0];
- uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, idx,
+ uint8_t* const dst = raster_block_offset_uint8(xd, bsize, 0, idx,
xd->plane[0].dst.buf,
xd->plane[0].dst.stride);
if (tx_type != DCT_DCT) {
@@ -284,7 +273,7 @@
vp9_intra8x8_predict(xd, ib, i8x8mode, dst, xd->plane[0].dst.stride);
for (j = 0; j < 4; j++) {
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
- dequant_add_y(xd, tx_type, ib + iblock[j]);
+ dequant_add_y(xd, tx_type, ib + iblock[j], BLOCK_SIZE_MB16X16);
}
dst = raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 1, i,
xd->plane[1].dst.buf,
@@ -368,7 +357,7 @@
vp9_intra4x4_predict(xd, i, b_mode, dst, xd->plane[0].dst.stride);
// TODO(jingning): refactor to use foreach_transformed_block_in_plane_
tx_type = get_tx_type_4x4(xd, i);
- dequant_add_y(xd, tx_type, i);
+ dequant_add_y(xd, tx_type, i, bsize);
}
#if CONFIG_NEWBINTRAMODES
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
@@ -572,10 +561,12 @@
set_refs(pbi, mi_row, mi_col);
#if CONFIG_SB8X8
- if (bsize >= BLOCK_SIZE_SB8X8)
- decode_sb(pbi, xd, mi_row, mi_col, r, bsize);
+ if (bsize == BLOCK_SIZE_SB8X8 &&
+ (xd->mode_info_context->mbmi.mode == SPLITMV ||
+ xd->mode_info_context->mbmi.mode == I4X4_PRED))
+ decode_atom(pbi, xd, mi_row, mi_col, r, bsize);
else
- decode_atom(pbi, xd, mi_row, mi_col, r, BLOCK_SIZE_SB8X8);
+ decode_sb(pbi, xd, mi_row, mi_col, r, bsize);
#else
// TODO(jingning): merge decode_sb_ and decode_mb_
if (bsize > BLOCK_SIZE_MB16X16) {
@@ -612,7 +603,11 @@
if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols)
return;
+#if CONFIG_SB8X8
+ if (bsize > BLOCK_SIZE_SB8X8) {
+#else
if (bsize > BLOCK_SIZE_MB16X16) {
+#endif
int pl;
// read the partition information
xd->left_seg_context =
@@ -624,34 +619,35 @@
pc->fc.partition_counts[pl][partition]++;
}
+ subsize = get_subsize(bsize, partition);
switch (partition) {
case PARTITION_NONE:
- subsize = bsize;
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
break;
case PARTITION_HORZ:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 :
- BLOCK_SIZE_SB32X16;
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
if ((mi_row + bs) < pc->mi_rows)
decode_modes_b(pbi, mi_row + bs, mi_col, r, subsize);
break;
case PARTITION_VERT:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 :
- BLOCK_SIZE_SB16X32;
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
if ((mi_col + bs) < pc->mi_cols)
decode_modes_b(pbi, mi_row, mi_col + bs, r, subsize);
break;
case PARTITION_SPLIT:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X32 :
- BLOCK_SIZE_MB16X16;
for (n = 0; n < 4; n++) {
int j = n >> 1, i = n & 0x01;
if (subsize == BLOCK_SIZE_SB32X32)
xd->sb_index = n;
+#if CONFIG_SB8X8
+ else if (subsize == BLOCK_SIZE_MB16X16)
+ xd->mb_index = n;
+ else
+ xd->b_index = n;
+#else
else
xd->mb_index = n;
+#endif
decode_modes_sb(pbi, mi_row + j * bs, mi_col + i * bs, r, subsize);
}
break;
@@ -659,7 +655,11 @@
assert(0);
}
// update partition context
+#if CONFIG_SB8X8
+ if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
+#else
if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
+#endif
return;
xd->left_seg_context = pc->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 3c0bab2..7eb35da 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -512,60 +512,9 @@
// It should only be called if a segment map update is indicated.
static void write_mb_segid(vp9_writer *bc,
const MB_MODE_INFO *mi, const MACROBLOCKD *xd) {
- // Encode the MB segment id.
- int seg_id = mi->segment_id;
-
- if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
- switch (seg_id) {
- case 0:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[2]);
- break;
- case 1:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[2]);
- break;
- case 2:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[3]);
- break;
- case 3:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[3]);
- break;
- case 4:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[5]);
- break;
- case 5:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[5]);
- break;
- case 6:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[6]);
- break;
- case 7:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[6]);
- break;
-
- // TRAP.. This should not happen
- default:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[2]);
- break;
- }
- }
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map)
+ treed_write(bc, vp9_segment_tree, xd->mb_segment_tree_probs,
+ mi->segment_id, 3);
}
// This function encodes the reference frame
@@ -722,7 +671,11 @@
active_section = 6;
#endif
+#if CONFIG_SB8X8
+ if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
+#else
if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16)
+#endif
write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
else
write_ymode(bc, mode, pc->fc.ymode_prob);
@@ -761,7 +714,11 @@
// If segment skip is not enabled code the mode.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
+#if CONFIG_SB8X8
+ if (mi->sb_type > BLOCK_SIZE_SB8X8) {
+#else
if (mi->sb_type > BLOCK_SIZE_MB16X16) {
+#endif
write_sb_mv_ref(bc, mode, mv_ref_p);
} else {
write_mv_ref(bc, mode, mv_ref_p);
@@ -944,7 +901,11 @@
vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP));
}
+#if CONFIG_SB8X8
+ if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
+#else
if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16)
+#endif
sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
else
kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
@@ -960,7 +921,6 @@
#ifdef ENTROPY_STATS
++intra_mode_stats [A] [L] [bm];
#endif
-
write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]);
} while (++i < (16 >> (CONFIG_SB8X8 * 2)));
}
@@ -1210,7 +1170,11 @@
else
assert(0);
+#if CONFIG_SB8X8
+ if (bsize > BLOCK_SIZE_SB8X8) {
+#else
if (bsize > BLOCK_SIZE_MB16X16) {
+#endif
int pl;
xd->left_seg_context =
cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
@@ -1221,34 +1185,23 @@
vp9_partition_encodings + partition);
}
+ subsize = get_subsize(bsize, partition);
+
switch (partition) {
case PARTITION_NONE:
- subsize = bsize;
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
break;
case PARTITION_HORZ:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 :
- BLOCK_SIZE_SB32X16;
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
if ((mi_row + bh) < cm->mi_rows)
write_modes_b(cpi, m + bh * mis, bc, tok, tok_end, mi_row + bh, mi_col);
break;
case PARTITION_VERT:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 :
- BLOCK_SIZE_SB16X32;
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
if ((mi_col + bw) < cm->mi_cols)
write_modes_b(cpi, m + bw, bc, tok, tok_end, mi_row, mi_col + bw);
break;
case PARTITION_SPLIT:
- // TODO(jingning): support recursive partitioning down to 16x16 as for
- // now. need to merge in 16x8, 8x16, 8x8, and smaller partitions.
- if (bsize == BLOCK_SIZE_SB64X64)
- subsize = BLOCK_SIZE_SB32X32;
- else if (bsize == BLOCK_SIZE_SB32X32)
- subsize = BLOCK_SIZE_MB16X16;
- else
- assert(0);
for (n = 0; n < 4; n++) {
int j = n >> 1, i = n & 0x01;
write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end,
@@ -1260,7 +1213,11 @@
}
// update partition context
+#if CONFIG_SB8X8
+ if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
+#else
if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
+#endif
return;
xd->left_seg_context = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 40ad680..83c1102 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -29,7 +29,7 @@
B_PREDICTION_MODE mode;
int_mv mv;
int_mv second_mv;
- } bmi[16];
+ } bmi[16 >> (2 * CONFIG_SB8X8)];
} PARTITION_INFO;
// Structure to hold snapshot of coding context during the mode picking process
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 95bba21..3537e27 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -395,17 +395,6 @@
x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].mv.as_int;
mbmi->mv[1].as_int =
x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].second_mv.as_int;
-#if CONFIG_SB8X8
- vpx_memcpy(x->partition_info + mis, &ctx->partition_info,
- sizeof(PARTITION_INFO));
- vpx_memcpy(x->partition_info + 1, &ctx->partition_info,
- sizeof(PARTITION_INFO));
- vpx_memcpy(x->partition_info + mis + 1, &ctx->partition_info,
- sizeof(PARTITION_INFO));
- xd->mode_info_context[1].mbmi =
- xd->mode_info_context[mis].mbmi =
- xd->mode_info_context[1 + mis].mbmi = *mbmi;
-#endif
}
x->skip = ctx->skip;
@@ -499,12 +488,15 @@
mbmi->best_mv.as_int = best_mv.as_int;
mbmi->best_second_mv.as_int = best_second_mv.as_int;
vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
-#if CONFIG_SB8X8
- xd->mode_info_context[1].mbmi =
- xd->mode_info_context[mis].mbmi =
- xd->mode_info_context[1 + mis].mbmi = *mbmi;
-#endif
}
+#if CONFIG_SB8X8
+ if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) {
+ int i, j;
+ for (j = 0; j < bh; ++j)
+ for (i = 0; i < bw; ++i)
+ xd->mode_info_context[mis * j + i].mbmi = *mbmi;
+ }
+#endif
#if CONFIG_COMP_INTERINTRA_PRED
if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV &&
mbmi->second_ref_frame <= INTRA_FRAME) {
@@ -1024,6 +1016,7 @@
int j;
ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE];
+ sb_partitioning[i] = BLOCK_SIZE_MB16X16;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
@@ -1044,7 +1037,6 @@
}
/* Encode MBs in raster order within the SB */
- sb_partitioning[i] = BLOCK_SIZE_MB16X16;
for (j = 0; j < 4; j++) {
const int x_idx_m = x_idx + ((j & 1) << CONFIG_SB8X8);
const int y_idx_m = y_idx + ((j >> 1) << CONFIG_SB8X8);
@@ -1052,6 +1044,8 @@
#if CONFIG_SB8X8
int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
+
+ mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
#endif
if (mi_row + y_idx_m >= cm->mi_rows ||
@@ -1077,13 +1071,12 @@
sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
}
- mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
for (k = 0; k < 4; k++) {
xd->b_index = k;
// try 8x8 coding
- pick_sb_modes(cpi, mi_row + y_idx_m + (k & 1),
- mi_col + x_idx_m + (k >> 1),
+ pick_sb_modes(cpi, mi_row + y_idx_m + (k >> 1),
+ mi_col + x_idx_m + (k & 1),
tp, &r, &d, BLOCK_SIZE_SB8X8,
&x->sb8_context[xd->sb_index][xd->mb_index]
[xd->b_index]);
@@ -1093,7 +1086,8 @@
[xd->b_index],
BLOCK_SIZE_SB8X8, 0);
encode_superblock(cpi, tp,
- 0, mi_row + y_idx_m, mi_col + x_idx_m,
+ 0, mi_row + y_idx_m + (k >> 1),
+ mi_col + x_idx_m + (k & 1),
BLOCK_SIZE_SB8X8);
}
set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
@@ -1140,8 +1134,8 @@
r2 += x->partition_cost[pl][PARTITION_VERT];
if (RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
- mb16_rate = r;
- mb16_dist = d;
+ mb16_rate = r2;
+ mb16_dist = d2;
mb_partitioning[i][j] = BLOCK_SIZE_SB8X16;
}
for (p = 0; p < MAX_MB_PLANE; p++) {
@@ -1185,8 +1179,8 @@
r2 += x->partition_cost[pl][PARTITION_HORZ];
if (RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
- mb16_rate = r;
- mb16_dist = d;
+ mb16_rate = r2;
+ mb16_dist = d2;
mb_partitioning[i][j] = BLOCK_SIZE_SB16X8;
}
for (p = 0; p < MAX_MB_PLANE; p++) {
@@ -1226,7 +1220,7 @@
// Dummy encode, do not do the tokenization
#if CONFIG_SB8X8
- encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0,
+ encode_sb(cpi, tp, mi_row + y_idx_m, mi_col + x_idx_m, 0,
BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL);
#else
encode_macroblock(cpi, tp, 0, mi_row + y_idx_m,
@@ -2082,7 +2076,12 @@
#endif
#endif
- if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_MB16X16) {
+#if CONFIG_SB8X8
+ if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8)
+#else
+ if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_MB16X16)
+#endif
+ {
++cpi->sb_ymode_count[m];
} else {
++cpi->ymode_count[m];
@@ -2441,7 +2440,8 @@
vp9_subtract_sbuv(x, bsize);
vp9_transform_sbuv_4x4(x, bsize);
vp9_quantize_sbuv_4x4(x, bsize);
- vp9_optimize_sbuv(cm, x, bsize);
+ if (x->optimize)
+ vp9_optimize_sbuv(cm, x, bsize);
vp9_inverse_transform_sbuv_4x4(xd, bsize);
vp9_recon_sbuv(xd, bsize);
@@ -2614,6 +2614,9 @@
sz = TX_16X16;
if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
sz = TX_8X8;
+ if (sz == TX_8X8 && (xd->mode_info_context->mbmi.mode == SPLITMV ||
+ xd->mode_info_context->mbmi.mode == I4X4_PRED))
+ sz = TX_4X4;
for (y = 0; y < bh; y++) {
for (x = 0; x < bw; x++) {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index ceca60d..738d6e6 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1647,6 +1647,12 @@
BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8,
NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
+#if CONFIG_SB8X8
+ BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL)
+ BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 541127e..9d1e984 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -283,6 +283,8 @@
enum BlockSize {
#if CONFIG_SB8X8
BLOCK_4X4,
+ BLOCK_4X8,
+ BLOCK_8X4,
BLOCK_8X8,
BLOCK_8X16,
BLOCK_16X8,
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 5b401d7..6815289 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -320,27 +320,10 @@
void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
int i;
- int qindex;
MACROBLOCKD *xd = &x->e_mbd;
int zbin_extra;
int segment_id = xd->mode_info_context->mbmi.segment_id;
-
- // Select the baseline MB Q index allowing for any segment level change.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
- if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) {
- // Abs Value
- qindex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
- } else {
- // Delta Value
- qindex = cpi->common.base_qindex +
- vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
-
- // Clamp to valid range
- qindex = clamp(qindex, 0, MAXQ);
- }
- } else {
- qindex = cpi->common.base_qindex;
- }
+ const int qindex = vp9_get_qindex(xd, segment_id, cpi->common.base_qindex);
// Y
zbin_extra = (cpi->common.y_dequant[qindex][1] *
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index fd7a4bb..fb74cbd 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -22,10 +22,6 @@
#define prototype_quantize_mb(sym) \
void (sym)(MACROBLOCK *x)
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/vp9_quantize_x86.h"
-#endif
-
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
int y_blocks);
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index da78be1..f971d91 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -13,8 +13,8 @@
#include <math.h>
#include <limits.h>
#include <assert.h>
-#include "vp9/common/vp9_pragmas.h"
+#include "vp9/common/vp9_pragmas.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
@@ -34,7 +34,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
-
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
@@ -42,8 +41,6 @@
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_common.h"
-#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
-
#define INVALID_MV 0x80008000
/* Factor to weigh the rate for switchable interp filters */
@@ -157,11 +154,9 @@
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
- for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
- vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
- p[i][j][k][l],
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++)
+ vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
vp9_coef_tree);
- }
}
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
@@ -184,7 +179,7 @@
for (i = 0; i < QINDEX_RANGE; i++) {
sad_per_bit16lut[i] =
(int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
- sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
+ sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
}
}
@@ -208,7 +203,7 @@
// for key frames, golden frames and arf frames.
// if (cpi->common.refresh_golden_frame ||
// cpi->common.refresh_alt_ref_frame)
- qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
+ qindex = clamp(qindex, 0, MAXQ);
cpi->RDMULT = compute_rd_mult(qindex);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
@@ -1874,7 +1869,7 @@
raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
x->plane[0].src.buf,
x->plane[0].src.stride);
- assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0xf) == 0);
+ assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
x->e_mbd.plane[0].pre[0].buf =
raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
x->e_mbd.plane[0].pre[0].buf,
@@ -2890,24 +2885,24 @@
/* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
* according to the closeness of 2 MV. */
/* block 8X16 */
- sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
+ sr = MAX(abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row) >> 3,
+ abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col) >> 3);
cal_step_param(sr, &bsi.sv_istep[0]);
- sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+ sr = MAX(abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row) >> 3,
+ abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col) >> 3);
cal_step_param(sr, &bsi.sv_istep[1]);
rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
seg_mvs[PARTITIONING_8X16], txfm_cache);
/* block 16X8 */
- sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
+ sr = MAX(abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row) >> 3,
+ abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col) >> 3);
cal_step_param(sr, &bsi.sv_istep[0]);
- sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
+ sr = MAX(abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row) >> 3,
+ abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col) >> 3);
cal_step_param(sr, &bsi.sv_istep[1]);
rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
@@ -3295,6 +3290,11 @@
case BLOCK_32X16: return BLOCK_16X8;
case BLOCK_16X32: return BLOCK_8X16;
case BLOCK_16X16: return BLOCK_8X8;
+#if CONFIG_SB8X8
+ case BLOCK_16X8: return BLOCK_8X4;
+ case BLOCK_8X16: return BLOCK_4X8;
+ case BLOCK_8X8: return BLOCK_4X4;
+#endif
default:
assert(0);
return -1;
@@ -3310,6 +3310,11 @@
case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
+#if CONFIG_SB8X8
+ case BLOCK_SIZE_SB16X8: return BLOCK_16X8;
+ case BLOCK_SIZE_SB8X16: return BLOCK_8X16;
+ case BLOCK_SIZE_SB8X8: return BLOCK_8X8;
+#endif
default:
assert(0);
return -1;
@@ -4860,6 +4865,8 @@
cpi->common.y_dc_delta_q);
#if CONFIG_SB8X8
int_mv seg_mvs[4][MAX_REF_FRAMES - 1];
+ union b_mode_info best_bmodes[4];
+ PARTITION_INFO best_partition;
#endif
#if CONFIG_SB8X8
@@ -4927,7 +4934,9 @@
if (cpi->Speed == 0
|| (cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
mbmi->mode = DC_PRED;
- for (i = 0; i <= ((bsize < BLOCK_SIZE_SB64X64) ? TX_16X16 : TX_32X32);
+ for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
+ (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
+ (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
i++) {
mbmi->txfm_size = i;
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
@@ -5097,6 +5106,8 @@
bsize, txfm_cache);
uv_tx = mbmi->txfm_size;
+ if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
+ uv_tx = TX_4X4;
if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
uv_tx = TX_8X8;
else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
@@ -5397,6 +5408,15 @@
*returndistortion = distortion2;
best_rd = this_rd;
vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+#if CONFIG_SB8X8
+ vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
+
+ if (this_mode == I4X4_PRED || this_mode == SPLITMV) {
+ for (i = 0; i < 4; i++) {
+ best_bmodes[i] = xd->mode_info_context->bmi[i];
+ }
+ }
+#endif
}
#if 0
// Testing this mode gave rise to an improvement in best error score.
@@ -5563,7 +5583,28 @@
// macroblock modes
vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+#if CONFIG_SB8X8
+ if (best_mbmode.mode == I4X4_PRED) {
+ for (i = 0; i < 4; i++) {
+ xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
+ }
+ }
+ if (best_mbmode.mode == SPLITMV) {
+ for (i = 0; i < 4; i++)
+ xd->mode_info_context->bmi[i].as_mv[0].as_int =
+ best_bmodes[i].as_mv[0].as_int;
+ if (mbmi->second_ref_frame > 0)
+ for (i = 0; i < 4; i++)
+ xd->mode_info_context->bmi[i].as_mv[1].as_int =
+ best_bmodes[i].as_mv[1].as_int;
+
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
+
+ mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
+ mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
+ }
+#endif
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
best_pred_diff[i] = INT_MIN;
@@ -5585,7 +5626,12 @@
end:
set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
scale_factor);
- store_coding_context(x, ctx, best_mode_index, NULL,
+ store_coding_context(x, ctx, best_mode_index,
+#if CONFIG_SB8X8
+ &best_partition,
+#else
+ NULL,
+#endif
&mbmi->ref_mvs[mbmi->ref_frame][0],
&mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
mbmi->second_ref_frame][0],
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index e04980c..86af268 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -57,61 +57,57 @@
}
// Based on set of segment counts calculate a probability tree
-static void calc_segtree_probs(MACROBLOCKD *xd,
- int *segcounts,
+static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts,
vp9_prob *segment_tree_probs) {
// Work out probabilities of each segment
- segment_tree_probs[0] =
- get_binary_prob(segcounts[0] + segcounts[1] + segcounts[2] + segcounts[3],
- segcounts[4] + segcounts[5] + segcounts[6] + segcounts[7]);
- segment_tree_probs[1] =
- get_binary_prob(segcounts[0] + segcounts[1], segcounts[2] + segcounts[3]);
- segment_tree_probs[2] = get_binary_prob(segcounts[0], segcounts[1]);
- segment_tree_probs[3] = get_binary_prob(segcounts[2], segcounts[3]);
- segment_tree_probs[4] =
- get_binary_prob(segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]);
+ const int c01 = segcounts[0] + segcounts[1];
+ const int c23 = segcounts[2] + segcounts[3];
+ const int c45 = segcounts[4] + segcounts[5];
+ const int c67 = segcounts[6] + segcounts[7];
+
+ segment_tree_probs[0] = get_binary_prob(c01 + c23, c45 + c67);
+ segment_tree_probs[1] = get_binary_prob(c01, c23);
+ segment_tree_probs[2] = get_binary_prob(c45, c67);
+ segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]);
+ segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]);
segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]);
segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
}
// Based on set of segment counts and probabilities calculate a cost estimate
-static int cost_segmap(MACROBLOCKD *xd,
- int *segcounts,
- vp9_prob *probs) {
- int cost;
- int count1, count2;
+static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) {
+ const int c01 = segcounts[0] + segcounts[1];
+ const int c23 = segcounts[2] + segcounts[3];
+ const int c45 = segcounts[4] + segcounts[5];
+ const int c67 = segcounts[6] + segcounts[7];
+ const int c0123 = c01 + c23;
+ const int c4567 = c45 + c67;
// Cost the top node of the tree
- count1 = segcounts[0] + segcounts[1] + segcounts[2] + segcounts[3];
- count2 = segcounts[3] + segcounts[4] + segcounts[5] + segcounts[6];
- cost = count1 * vp9_cost_zero(probs[0]) +
- count2 * vp9_cost_one(probs[0]);
+ int cost = c0123 * vp9_cost_zero(probs[0]) +
+ c4567 * vp9_cost_one(probs[0]);
// Cost subsequent levels
- if (count1 > 0) {
- count1 = segcounts[0] + segcounts[1];
- count2 = segcounts[2] + segcounts[3];
- cost += count1 * vp9_cost_zero(probs[1]) +
- count2 * vp9_cost_one(probs[1]);
+ if (c0123 > 0) {
+ cost += c01 * vp9_cost_zero(probs[1]) +
+ c23 * vp9_cost_one(probs[1]);
- if (count1 > 0)
- cost += segcounts[0] * vp9_cost_zero(probs[2]) +
- segcounts[1] * vp9_cost_one(probs[2]);
- if (count2 > 0)
- cost += segcounts[2] * vp9_cost_zero(probs[3]) +
- segcounts[3] * vp9_cost_one(probs[3]);
+ if (c01 > 0)
+ cost += segcounts[0] * vp9_cost_zero(probs[3]) +
+ segcounts[1] * vp9_cost_one(probs[3]);
+ if (c23 > 0)
+ cost += segcounts[2] * vp9_cost_zero(probs[4]) +
+ segcounts[3] * vp9_cost_one(probs[4]);
}
- if (count2 > 0) {
- count1 = segcounts[4] + segcounts[5];
- count2 = segcounts[6] + segcounts[7];
- cost += count1 * vp9_cost_zero(probs[4]) +
- count2 * vp9_cost_one(probs[4]);
+ if (c4567 > 0) {
+ cost += c45 * vp9_cost_zero(probs[2]) +
+ c67 * vp9_cost_one(probs[2]);
- if (count1 > 0)
+ if (c45 > 0)
cost += segcounts[4] * vp9_cost_zero(probs[5]) +
segcounts[5] * vp9_cost_one(probs[5]);
- if (count2 > 0)
+ if (c67 > 0)
cost += segcounts[6] * vp9_cost_zero(probs[6]) +
segcounts[7] * vp9_cost_one(probs[6]);
}
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index c4c70df..c2a6004 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -239,6 +239,32 @@
return (var - (((unsigned int)avg * avg) >> 6));
}
+unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 5));
+}
+
+unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 5));
+}
+
unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
diff --git a/vp9/encoder/x86/vp9_quantize_mmx.asm b/vp9/encoder/x86/vp9_quantize_mmx.asm
deleted file mode 100644
index 22e2356..0000000
--- a/vp9/encoder/x86/vp9_quantize_mmx.asm
+++ /dev/null
@@ -1,286 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;int vp9_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
-; short *qcoeff_ptr,short *dequant_ptr,
-; short *scan_mask, short *round_ptr,
-; short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp9_fast_quantize_b_impl_mmx) PRIVATE
-sym(vp9_fast_quantize_b_impl_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- push rsi
- push rdi
- ; end prolog
-
-
- mov rsi, arg(0) ;coeff_ptr
- movq mm0, [rsi]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm1, [rax]
-
- movq mm3, mm0
- psraw mm0, 15
-
- pxor mm3, mm0
- psubw mm3, mm0 ; abs
-
- movq mm2, mm3
- pcmpgtw mm1, mm2
-
- pandn mm1, mm2
- movq mm3, mm1
-
- mov rdx, arg(6) ;quant_ptr
- movq mm1, [rdx]
-
- mov rcx, arg(5) ;round_ptr
- movq mm2, [rcx]
-
- paddw mm3, mm2
- pmulhuw mm3, mm1
-
- pxor mm3, mm0
- psubw mm3, mm0 ;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
- movq mm0, mm3
-
- movq [rdi], mm3
-
- mov rax, arg(3) ;dequant_ptr
- movq mm2, [rax]
-
- pmullw mm3, mm2
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax], mm3
-
- ; next 8
- movq mm4, [rsi+8]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm5, [rax+8]
-
- movq mm7, mm4
- psraw mm4, 15
-
- pxor mm7, mm4
- psubw mm7, mm4 ; abs
-
- movq mm6, mm7
- pcmpgtw mm5, mm6
-
- pandn mm5, mm6
- movq mm7, mm5
-
- movq mm5, [rdx+8]
- movq mm6, [rcx+8]
-
- paddw mm7, mm6
- pmulhuw mm7, mm5
-
- pxor mm7, mm4
- psubw mm7, mm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movq mm1, mm7
- movq [rdi+8], mm7
-
- mov rax, arg(3) ;dequant_ptr
- movq mm6, [rax+8]
-
- pmullw mm7, mm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax+8], mm7
-
-
- ; next 8
- movq mm4, [rsi+16]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm5, [rax+16]
-
- movq mm7, mm4
- psraw mm4, 15
-
- pxor mm7, mm4
- psubw mm7, mm4 ; abs
-
- movq mm6, mm7
- pcmpgtw mm5, mm6
-
- pandn mm5, mm6
- movq mm7, mm5
-
- movq mm5, [rdx+16]
- movq mm6, [rcx+16]
-
- paddw mm7, mm6
- pmulhuw mm7, mm5
-
- pxor mm7, mm4
- psubw mm7, mm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movq mm1, mm7
- movq [rdi+16], mm7
-
- mov rax, arg(3) ;dequant_ptr
- movq mm6, [rax+16]
-
- pmullw mm7, mm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax+16], mm7
-
-
- ; next 8
- movq mm4, [rsi+24]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm5, [rax+24]
-
- movq mm7, mm4
- psraw mm4, 15
-
- pxor mm7, mm4
- psubw mm7, mm4 ; abs
-
- movq mm6, mm7
- pcmpgtw mm5, mm6
-
- pandn mm5, mm6
- movq mm7, mm5
-
- movq mm5, [rdx+24]
- movq mm6, [rcx+24]
-
- paddw mm7, mm6
- pmulhuw mm7, mm5
-
- pxor mm7, mm4
- psubw mm7, mm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movq mm1, mm7
- movq [rdi+24], mm7
-
- mov rax, arg(3) ;dequant_ptr
- movq mm6, [rax+24]
-
- pmullw mm7, mm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax+24], mm7
-
-
-
- mov rdi, arg(4) ;scan_mask
- mov rsi, arg(2) ;qcoeff_ptr
-
- pxor mm5, mm5
- pxor mm7, mm7
-
- movq mm0, [rsi]
- movq mm1, [rsi+8]
-
- movq mm2, [rdi]
- movq mm3, [rdi+8];
-
- pcmpeqw mm0, mm7
- pcmpeqw mm1, mm7
-
- pcmpeqw mm6, mm6
- pxor mm0, mm6
-
- pxor mm1, mm6
- psrlw mm0, 15
-
- psrlw mm1, 15
- pmaddwd mm0, mm2
-
- pmaddwd mm1, mm3
- movq mm5, mm0
-
- paddd mm5, mm1
-
- movq mm0, [rsi+16]
- movq mm1, [rsi+24]
-
- movq mm2, [rdi+16]
- movq mm3, [rdi+24];
-
- pcmpeqw mm0, mm7
- pcmpeqw mm1, mm7
-
- pcmpeqw mm6, mm6
- pxor mm0, mm6
-
- pxor mm1, mm6
- psrlw mm0, 15
-
- psrlw mm1, 15
- pmaddwd mm0, mm2
-
- pmaddwd mm1, mm3
- paddd mm5, mm0
-
- paddd mm5, mm1
- movq mm0, mm5
-
- psrlq mm5, 32
- paddd mm0, mm5
-
- ; eob adjustment begins here
- movq rcx, mm0
- and rcx, 0xffff
-
- xor rdx, rdx
- sub rdx, rcx ; rdx=-rcx
-
- bsr rax, rcx
- inc rax
-
- sar rdx, 31
- and rax, rdx
- ; Substitute the sse assembly for the old mmx mixed assembly/C. The
- ; following is kept as reference
- ; movq rcx, mm0
- ; bsr rax, rcx
- ;
- ; mov eob, rax
- ; mov eee, rcx
- ;
- ;if(eee==0)
- ;{
- ; eob=-1;
- ;}
- ;else if(eee<0)
- ;{
- ; eob=15;
- ;}
- ;d->eob = eob+1;
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm
deleted file mode 100644
index 700e64b..0000000
--- a/vp9/encoder/x86/vp9_quantize_sse2.asm
+++ /dev/null
@@ -1,379 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-; void vp9_regular_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp9_regular_quantize_b_sse2) PRIVATE
-sym(vp9_regular_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- SAVE_XMM 7
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %endif
-%endif
-
- ALIGN_STACK 16, rax
- %define zrun_zbin_boost 0 ; 8
- %define abs_minus_zbin 8 ; 32
- %define temp_qcoeff 40 ; 32
- %define qcoeff 72 ; 32
- %define stack_size 104
- sub rsp, stack_size
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr
- mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr
- movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value
-
- ; z
- movdqa xmm0, [rdx]
- movdqa xmm4, [rdx + 16]
- mov rdx, [rdi + vp9_block_round] ; round_ptr
-
- pshuflw xmm7, xmm7, 0
- punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
-
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; (z ^ sz)
- pxor xmm1, xmm0
- pxor xmm5, xmm4
-
- ; x = abs(z)
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
- mov rcx, [rdi + vp9_block_quant] ; quant_ptr
-
- ; *zbin_ptr + zbin_oq_value
- paddw xmm2, xmm7
- paddw xmm3, xmm7
-
- ; x - (*zbin_ptr + zbin_oq_value)
- psubw xmm1, xmm2
- psubw xmm5, xmm3
- movdqa [rsp + abs_minus_zbin], xmm1
- movdqa [rsp + abs_minus_zbin + 16], xmm5
-
- ; add (zbin_ptr + zbin_oq_value) back
- paddw xmm1, xmm2
- paddw xmm5, xmm3
-
- movdqa xmm2, [rdx]
- movdqa xmm6, [rdx + 16]
-
- movdqa xmm3, [rcx]
- movdqa xmm7, [rcx + 16]
-
- ; x + round
- paddw xmm1, xmm2
- paddw xmm5, xmm6
-
- ; y = x * quant_ptr >> 16
- pmulhw xmm3, xmm1
- pmulhw xmm7, xmm5
-
- ; y += x
- paddw xmm1, xmm3
- paddw xmm5, xmm7
-
- movdqa [rsp + temp_qcoeff], xmm1
- movdqa [rsp + temp_qcoeff + 16], xmm5
-
- pxor xmm6, xmm6
- ; zero qcoeff
- movdqa [rsp + qcoeff], xmm6
- movdqa [rsp + qcoeff + 16], xmm6
-
- mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr
- mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr
- mov [rsp + zrun_zbin_boost], rdx
-
-%macro ZIGZAG_LOOP 1
- ; x
- movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]
-
- ; if (x >= zbin)
- sub cx, WORD PTR[rdx] ; x - zbin
- lea rdx, [rdx + 2] ; zbin_boost_ptr++
- jl .rq_zigzag_loop_%1 ; x < zbin
-
- movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
-
- ; downshift by quant_shift[rc]
- movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
- sar edi, cl ; also sets Z bit
- je .rq_zigzag_loop_%1 ; !y
- mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
- mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost
-.rq_zigzag_loop_%1:
-%endmacro
-; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c
-ZIGZAG_LOOP 0
-ZIGZAG_LOOP 1
-ZIGZAG_LOOP 4
-ZIGZAG_LOOP 8
-ZIGZAG_LOOP 5
-ZIGZAG_LOOP 2
-ZIGZAG_LOOP 3
-ZIGZAG_LOOP 6
-ZIGZAG_LOOP 9
-ZIGZAG_LOOP 12
-ZIGZAG_LOOP 13
-ZIGZAG_LOOP 10
-ZIGZAG_LOOP 7
-ZIGZAG_LOOP 11
-ZIGZAG_LOOP 14
-ZIGZAG_LOOP 15
-
- movdqa xmm2, [rsp + qcoeff]
- movdqa xmm3, [rsp + qcoeff + 16]
-
- mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr
- mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr
-
- ; y ^ sz
- pxor xmm2, xmm0
- pxor xmm3, xmm4
- ; x = (y ^ sz) - sz
- psubw xmm2, xmm0
- psubw xmm3, xmm4
-
- ; dequant
- movdqa xmm0, [rcx]
- movdqa xmm1, [rcx + 16]
-
- mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr
-
- pmullw xmm0, xmm2
- pmullw xmm1, xmm3
-
- movdqa [rcx], xmm2 ; store qcoeff
- movdqa [rcx + 16], xmm3
- movdqa [rdi], xmm0 ; store dqcoeff
- movdqa [rdi + 16], xmm1
-
- ; select the last value (in zig_zag order) for EOB
- pcmpeqw xmm2, xmm6
- pcmpeqw xmm3, xmm6
- ; !
- pcmpeqw xmm6, xmm6
- pxor xmm2, xmm6
- pxor xmm3, xmm6
- ; mask inv_zig_zag
- pand xmm2, [GLOBAL(inv_zig_zag)]
- pand xmm3, [GLOBAL(inv_zig_zag + 16)]
- ; select the max value
- pmaxsw xmm2, xmm3
- pshufd xmm3, xmm2, 00001110b
- pmaxsw xmm2, xmm3
- pshuflw xmm3, xmm2, 00001110b
- pmaxsw xmm2, xmm3
- pshuflw xmm3, xmm2, 00000001b
- pmaxsw xmm2, xmm3
- movd eax, xmm2
- and eax, 0xff
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
- add rsp, stack_size
- pop rsp
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
- RESTORE_GOT
- RESTORE_XMM
- pop rbp
- ret
-
-; void vp9_fast_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp9_fast_quantize_b_sse2) PRIVATE
-sym(vp9_fast_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %else
- ; these registers are used for passing arguments
- %endif
-%endif
-
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp9_block_coeff]
- mov rcx, [rdi + vp9_block_round]
- mov rdx, [rdi + vp9_block_quant_fast]
-
- ; z = coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; dup z so we can save sz
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; x = abs(z) = (z ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; x += round
- paddw xmm1, [rcx]
- paddw xmm5, [rcx + 16]
-
- mov rax, [rsi + vp9_blockd_qcoeff]
- mov rcx, [rsi + vp9_blockd_dequant]
- mov rdi, [rsi + vp9_blockd_dqcoeff]
-
- ; y = x * quant >> 16
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- ; x = (y ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; qcoeff = x
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- ; x * dequant
- movdqa xmm2, xmm1
- movdqa xmm3, xmm5
- pmullw xmm2, [rcx]
- pmullw xmm3, [rcx + 16]
-
- ; dqcoeff = x * dequant
- movdqa [rdi], xmm2
- movdqa [rdi + 16], xmm3
-
- pxor xmm4, xmm4 ;clear all bits
- pcmpeqw xmm1, xmm4
- pcmpeqw xmm5, xmm4
-
- pcmpeqw xmm4, xmm4 ;set all bits
- pxor xmm1, xmm4
- pxor xmm5, xmm4
-
- pand xmm1, [GLOBAL(inv_zig_zag)]
- pand xmm5, [GLOBAL(inv_zig_zag + 16)]
-
- pmaxsw xmm1, xmm5
-
- ; now down to 8
- pshufd xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; only 4 left
- pshuflw xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; okay, just 2!
- pshuflw xmm5, xmm1, 00000001b
-
- pmaxsw xmm1, xmm5
-
- movd eax, xmm1
- and eax, 0xff
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-inv_zig_zag:
- dw 0x0001, 0x0002, 0x0006, 0x0007
- dw 0x0003, 0x0005, 0x0008, 0x000d
- dw 0x0004, 0x0009, 0x000c, 0x000e
- dw 0x000a, 0x000b, 0x000f, 0x0010
diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm
deleted file mode 100644
index 4c14e5f..0000000
--- a/vp9/encoder/x86/vp9_quantize_sse4.asm
+++ /dev/null
@@ -1,253 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-; void vp9_regular_quantize_b_sse4 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp9_regular_quantize_b_sse4) PRIVATE
-sym(vp9_regular_quantize_b_sse4):
-
-%if ABI_IS_32BIT
- push rbp
- mov rbp, rsp
- GET_GOT rbx
- push rdi
- push rsi
-
- ALIGN_STACK 16, rax
- %define qcoeff 0 ; 32
- %define stack_size 32
- sub rsp, stack_size
-%else
- %if LIBVPX_YASM_WIN64
- SAVE_XMM 8, u
- push rdi
- push rsi
- %endif
-%endif
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp9_block_coeff]
- mov rcx, [rdi + vp9_block_zbin]
- mov rdx, [rdi + vp9_block_round]
- movd xmm7, [rdi + vp9_block_zbin_extra]
-
- ; z
- movdqa xmm0, [rax]
- movdqa xmm1, [rax + 16]
-
- ; duplicate zbin_oq_value
- pshuflw xmm7, xmm7, 0
- punpcklwd xmm7, xmm7
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
-
- ; sz
- psraw xmm0, 15
- psraw xmm1, 15
-
- ; (z ^ sz)
- pxor xmm2, xmm0
- pxor xmm3, xmm1
-
- ; x = abs(z)
- psubw xmm2, xmm0
- psubw xmm3, xmm1
-
- ; zbin
- movdqa xmm4, [rcx]
- movdqa xmm5, [rcx + 16]
-
- ; *zbin_ptr + zbin_oq_value
- paddw xmm4, xmm7
- paddw xmm5, xmm7
-
- movdqa xmm6, xmm2
- movdqa xmm7, xmm3
-
- ; x - (*zbin_ptr + zbin_oq_value)
- psubw xmm6, xmm4
- psubw xmm7, xmm5
-
- ; round
- movdqa xmm4, [rdx]
- movdqa xmm5, [rdx + 16]
-
- mov rax, [rdi + vp9_block_quant_shift]
- mov rcx, [rdi + vp9_block_quant]
- mov rdx, [rdi + vp9_block_zrun_zbin_boost]
-
- ; x + round
- paddw xmm2, xmm4
- paddw xmm3, xmm5
-
- ; quant
- movdqa xmm4, [rcx]
- movdqa xmm5, [rcx + 16]
-
- ; y = x * quant_ptr >> 16
- pmulhw xmm4, xmm2
- pmulhw xmm5, xmm3
-
- ; y += x
- paddw xmm2, xmm4
- paddw xmm3, xmm5
-
- pxor xmm4, xmm4
-%if ABI_IS_32BIT
- movdqa [rsp + qcoeff], xmm4
- movdqa [rsp + qcoeff + 16], xmm4
-%else
- pxor xmm8, xmm8
-%endif
-
- ; quant_shift
- movdqa xmm5, [rax]
-
- ; zrun_zbin_boost
- mov rax, rdx
-
-%macro ZIGZAG_LOOP 5
- ; x
- pextrw ecx, %4, %2
-
- ; if (x >= zbin)
- sub cx, WORD PTR[rdx] ; x - zbin
- lea rdx, [rdx + 2] ; zbin_boost_ptr++
- jl .rq_zigzag_loop_%1 ; x < zbin
-
- pextrw edi, %3, %2 ; y
-
- ; downshift by quant_shift[rc]
- pextrb ecx, xmm5, %1 ; quant_shift[rc]
- sar edi, cl ; also sets Z bit
- je .rq_zigzag_loop_%1 ; !y
-%if ABI_IS_32BIT
- mov WORD PTR[rsp + qcoeff + %1 *2], di
-%else
- pinsrw %5, edi, %2 ; qcoeff[rc]
-%endif
- mov rdx, rax ; reset to b->zrun_zbin_boost
-.rq_zigzag_loop_%1:
-%endmacro
-; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c
-ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
-
- mov rcx, [rsi + vp9_blockd_dequant]
- mov rdi, [rsi + vp9_blockd_dqcoeff]
-
-%if ABI_IS_32BIT
- movdqa xmm4, [rsp + qcoeff]
- movdqa xmm5, [rsp + qcoeff + 16]
-%else
- %define xmm5 xmm8
-%endif
-
- ; y ^ sz
- pxor xmm4, xmm0
- pxor xmm5, xmm1
- ; x = (y ^ sz) - sz
- psubw xmm4, xmm0
- psubw xmm5, xmm1
-
- ; dequant
- movdqa xmm0, [rcx]
- movdqa xmm1, [rcx + 16]
-
- mov rcx, [rsi + vp9_blockd_qcoeff]
-
- pmullw xmm0, xmm4
- pmullw xmm1, xmm5
-
- ; store qcoeff
- movdqa [rcx], xmm4
- movdqa [rcx + 16], xmm5
-
- ; store dqcoeff
- movdqa [rdi], xmm0
- movdqa [rdi + 16], xmm1
-
- ; select the last value (in zig_zag order) for EOB
- pxor xmm6, xmm6
- pcmpeqw xmm4, xmm6
- pcmpeqw xmm5, xmm6
-
- packsswb xmm4, xmm5
- pshufb xmm4, [GLOBAL(zig_zag1d)]
- pmovmskb edx, xmm4
- xor rdi, rdi
- mov eax, -1
- xor dx, ax
- bsr eax, edx
- sub edi, edx
- sar edi, 31
- add eax, 1
- and eax, edi
-
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
-%if ABI_IS_32BIT
- add rsp, stack_size
- pop rsp
-
- pop rsi
- pop rdi
- RESTORE_GOT
- pop rbp
-%else
- %undef xmm5
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- RESTORE_XMM
- %endif
-%endif
-
- ret
-
-SECTION_RODATA
-align 16
-; vp9/common/vp9_entropy.c: vp9_default_zig_zag1d
-zig_zag1d:
- db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
deleted file mode 100644
index 1fa0521..0000000
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ /dev/null
@@ -1,137 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-; void vp9_fast_quantize_b_ssse3 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-;
-
-global sym(vp9_fast_quantize_b_ssse3) PRIVATE
-sym(vp9_fast_quantize_b_ssse3):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %endif
-%endif
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp9_block_coeff]
- mov rcx, [rdi + vp9_block_round]
- mov rdx, [rdi + vp9_block_quant_fast]
-
- ; coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; round
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
-
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- pabsw xmm1, xmm1
- pabsw xmm5, xmm5
-
- paddw xmm1, xmm2
- paddw xmm5, xmm3
-
- ; quant_fast
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- mov rax, [rsi + vp9_blockd_qcoeff]
- mov rdi, [rsi + vp9_blockd_dequant]
- mov rcx, [rsi + vp9_blockd_dqcoeff]
-
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- movdqa xmm2, [rdi]
- movdqa xmm3, [rdi + 16]
-
- pxor xmm4, xmm4
- pmullw xmm2, xmm1
- pmullw xmm3, xmm5
-
- pcmpeqw xmm1, xmm4 ;non zero mask
- pcmpeqw xmm5, xmm4 ;non zero mask
- packsswb xmm1, xmm5
- pshufb xmm1, [GLOBAL(zz_shuf)]
-
- pmovmskb edx, xmm1
-
- xor rdi, rdi
- mov eax, -1
- xor dx, ax ;flip the bits for bsr
- bsr eax, edx
-
- movdqa [rcx], xmm2 ;store dqcoeff
- movdqa [rcx + 16], xmm3 ;store dqcoeff
-
- sub edi, edx ;check for all zeros in bit mask
- sar edi, 31 ;0 or -1
- add eax, 1
- and eax, edi ;if the bit mask was all zero,
- ;then eob = 0
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-zz_shuf:
- db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp9/encoder/x86/vp9_quantize_x86.h b/vp9/encoder/x86/vp9_quantize_x86.h
deleted file mode 100644
index d1db173..0000000
--- a/vp9/encoder/x86/vp9_quantize_x86.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-#ifndef VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_
-#define VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-
-#endif /* HAVE_MMX */
-
-
-#if HAVE_SSE2
-extern prototype_quantize_block(vp9_regular_quantize_b_sse2);
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_quantize_quantb
-#define vp9_quantize_quantb vp9_regular_quantize_b_sse2
-#endif /* !CONFIG_RUNTIME_CPU_DETECT */
-
-#endif /* HAVE_SSE2 */
-
-
-#if HAVE_SSE4_1
-extern prototype_quantize_block(vp9_regular_quantize_b_sse4);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_quantize_quantb
-#define vp9_quantize_quantb vp9_regular_quantize_b_sse4
-
-#endif /* !CONFIG_RUNTIME_CPU_DETECT */
-
-#endif /* HAVE_SSE4_1 */
-
-#endif /* QUANTIZE_X86_H */
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 63c6ed8..39f836f 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -17,15 +17,6 @@
VP9_CX_SRCS-yes += vp9_cx_iface.c
-# encoder
-#INCLUDES += algo/vpx_common/vpx_mem/include
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += common
-#INCLUDES += algo/vpx_ref/cpu_id/include
-#INCLUDES += common
-#INCLUDES += encoder
-
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c
@@ -81,7 +72,6 @@
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h
-VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_x86.h
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_x86_csystemdependent.c
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
@@ -94,17 +84,13 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm
-#VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_ssse3.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_impl_ssse3.asm
-#VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
-#VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm
-VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
diff --git a/vpxdec.c b/vpxdec.c
index df0b819..41c654f 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -49,8 +49,8 @@
static const char *exec_name;
-#define VP8_FOURCC (0x30385056)
-#define VP9_FOURCC (0x30395056)
+#define VP8_FOURCC (0x00385056)
+#define VP9_FOURCC (0x00395056)
static const struct {
char const *name;
const vpx_codec_iface_t *(*iface)(void);