1 LR 3 neighbors 1 table
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index 8cfb826..143e811 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -279,10 +279,11 @@
dif -= vw;
ret++;
} else {
+ bit++;
break;
}
}
- return od_ec_dec_bypass_normalize(dec, dif, bit + 1, ret);
+ return od_ec_dec_bypass_normalize(dec, dif, bit, ret);
}
#endif // CONFIG_BYPASS_IMPROVEMENT
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 41976c2..30186d1 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -94,9 +94,11 @@
"${AOM_ROOT}/av1/common/txb_common.c"
"${AOM_ROOT}/av1/common/txb_common.h"
"${AOM_ROOT}/av1/common/warped_motion.c"
- "${AOM_ROOT}/av1/common/warped_motion.h")
-list(APPEND AOM_AV1_COMMON_SOURCES "${AOM_ROOT}/av1/common/pef.h")
-list(APPEND AOM_AV1_COMMON_SOURCES "${AOM_ROOT}/av1/common/pef.c")
+ "${AOM_ROOT}/av1/common/warped_motion.h"
+ "${AOM_ROOT}/av1/common/hr_coding.h"
+ "${AOM_ROOT}/av1/common/hr_coding.c")
+ list(APPEND AOM_AV1_COMMON_SOURCES "${AOM_ROOT}/av1/common/pef.h")
+ list(APPEND AOM_AV1_COMMON_SOURCES "${AOM_ROOT}/av1/common/pef.c")
if(CONFIG_LPF_MASK)
list(APPEND AOM_AV1_COMMON_SOURCES "${AOM_ROOT}/av1/common/loopfiltermask.c")
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 440656d..5dc7649 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -83,8 +83,11 @@
#define NUM_BASE_LEVELS 2
#define BR_CDF_SIZE (4)
+#if CONFIG_ADAPTIVE_HR
+#define COEFF_BASE_RANGE (1 * (BR_CDF_SIZE - 1))
+#else
#define COEFF_BASE_RANGE (4 * (BR_CDF_SIZE - 1))
-
+#endif // CONFIG_ADAPTIVE_HR
#define COEFF_CONTEXT_BITS 3
#define COEFF_CONTEXT_MASK ((1 << COEFF_CONTEXT_BITS) - 1)
#define MAX_BASE_BR_RANGE (COEFF_BASE_RANGE + NUM_BASE_LEVELS + 1)
diff --git a/av1/common/hr_coding.c b/av1/common/hr_coding.c
new file mode 100644
index 0000000..782e6e7
--- /dev/null
+++ b/av1/common/hr_coding.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2023, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include "av1/common/hr_coding.h"
+#include "aom/internal/aom_codec_internal.h"
+
+void write_exp_golomb(aom_writer *w, int level, int k) {
+ int x = level + (1 << k);
+ int length = 0;
+
+ length = get_msb(x) + 1;
+ assert(length > k);
+
+#if CONFIG_BYPASS_IMPROVEMENT
+ aom_write_literal(w, 0, length - 1 - k);
+ aom_write_literal(w, x, length);
+#else
+ for (i = 0; i < length - 1 - k; ++i) aom_write_bit(w, 0);
+ for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
+#endif // CONFIG_BYPASS_IMPROVEMENT
+}
+
+int read_exp_golomb(MACROBLOCKD *xd, aom_reader *r, int k) {
+#if CONFIG_BYPASS_IMPROVEMENT
+ int length = aom_read_unary(r, 21, ACCT_INFO("hr"));
+ if (length > 20) {
+ aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
+ "Invalid length in read_exp_golomb");
+ }
+ length += k;
+ int x = 1 << length;
+ x += aom_read_literal(r, length, ACCT_INFO("hr"));
+#else
+ int x = 1;
+ int length = 0;
+ int i = 0;
+ while (!i) {
+ i = aom_read_bit(r, ACCT_INFO("hr"));
+ ++length;
+ if (length > 20) {
+ aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
+ "Invalid length in read_exp_golomb");
+ break;
+ }
+ }
+ length += k;
+ for (i = 0; i < length - 1; ++i) {
+ x <<= 1;
+ x += aom_read_bit(r, ACCT_INFO("hr"));
+ }
+#endif // CONFIG_BYPASS_IMPROVEMENT
+
+ return x - (1 << k);
+}
+
+#if CONFIG_ADAPTIVE_HR
+
+typedef struct adaptive_table {
+ int *table;
+ int initial_param;
+ unsigned int table_size;
+} adaptive_table;
+
+int intra_table[] = { 10, 20, 40, 75, 135 };
+int inter_table[] = { 20, 35, 55, 95, 170 };
+int idtx_table[] = { 25, 50, 130 };
+
+adaptive_table tables[] = {
+ { .table = intra_table, .initial_param = 1, .table_size = 5 }, // Intra table
+ { .table = inter_table, .initial_param = 1, .table_size = 5 }, // Inter table
+ { .table = idtx_table,
+ .initial_param = 1,
+ .table_size = 3 }, // IDTX table (only for inter)
+};
+
+int get_adaptive_param(adaptive_hr_info *info) {
+ adaptive_table *adp_table;
+ adp_table = &tables[0];
+
+ int m = adp_table->initial_param;
+
+ for (unsigned int i = 0; i < adp_table->table_size; ++i) {
+ if (info->context < adp_table->table[i]) break;
+ ++m;
+ }
+
+ return m;
+}
+
+void write_truncated_rice(aom_writer *w, int level, int m, int k, int cmax) {
+ const int mask = (1 << m) - 1;
+ int q = level >> m;
+
+ if (q >= cmax) {
+ aom_write_literal(w, 0, cmax);
+ write_exp_golomb(w, level - (cmax << m), k);
+ } else {
+ aom_write_literal(w, 0, q);
+ aom_write_literal(w, 1, 1);
+ aom_write_literal(w, level & mask, m);
+ }
+}
+
+int read_truncated_rice(MACROBLOCKD *xd, aom_reader *r, int m, int k,
+ int cmax) {
+ int q = aom_read_unary(r, cmax, ACCT_INFO("hr"));
+ if (q == cmax) return read_exp_golomb(xd, r, k) + (cmax << m);
+
+ int rem = aom_read_literal(r, m, ACCT_INFO("hr"));
+ return rem + (q << m);
+}
+
+int get_truncated_rice_length(int level, int m, int k, int cmax) {
+ int q = level >> m;
+
+ if (q >= cmax) return cmax + get_exp_golomb_length(level - (cmax << m), k);
+
+ return q + 1 + m;
+}
+
+int get_truncated_rice_length_diff(int level, int m, int k, int cmax,
+ int *diff) {
+ int q = level >> m;
+
+ if (q >= cmax) {
+ int lshifted = level - (cmax << m);
+ if (lshifted == 0) {
+ int golomb_len0 = k + 1;
+ *diff = golomb_len0 - m;
+ return cmax + golomb_len0;
+ }
+ return cmax + get_exp_golomb_length_diff(lshifted, k, diff);
+ }
+
+ if (level == 0) {
+ *diff = m + 1;
+ return m + 1;
+ }
+
+ *diff = level == (q << m);
+ return q + 1 + m;
+}
+
+void write_adaptive_hr(aom_writer *w, int level, adaptive_hr_info *info) {
+ int m = get_adaptive_param(info);
+ write_truncated_rice(w, level, m, m + 1, AOMMIN(m + 4, 6));
+}
+
+int read_adaptive_hr(MACROBLOCKD *xd, aom_reader *r, adaptive_hr_info *info) {
+ int m = get_adaptive_param(info);
+ return read_truncated_rice(xd, r, m, m + 1, AOMMIN(m + 4, 6));
+}
+
+int get_adaptive_hr_length(int level, adaptive_hr_info *info) {
+ int m = get_adaptive_param(info);
+ return get_truncated_rice_length(level, m, m + 1, AOMMIN(m + 4, 6));
+}
+
+int get_adaptive_hr_length_diff(int level, adaptive_hr_info *info, int *diff) {
+ int m = get_adaptive_param(info);
+ return get_truncated_rice_length_diff(level, m, m + 1, AOMMIN(m + 4, 6),
+ diff);
+}
+
+#endif // CONFIG_ADAPTIVE_HR
diff --git a/av1/common/hr_coding.h b/av1/common/hr_coding.h
new file mode 100644
index 0000000..4b56c37
--- /dev/null
+++ b/av1/common/hr_coding.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#ifndef AOM_AV1_COMMON_HR_CODING_H_
+#define AOM_AV1_COMMON_HR_CODING_H_
+
+#include "config/aom_config.h"
+
+#include "av1/common/blockd.h"
+#include "aom_dsp/bitwriter.h"
+#include "aom_dsp/bitreader.h"
+
+void write_exp_golomb(aom_writer *w, int level, int k);
+int read_exp_golomb(MACROBLOCKD *xd, aom_reader *r, int k);
+
+static INLINE int get_exp_golomb_length(int level, int k) {
+ return 2 * get_msb(level + (1 << k)) + 1 - k;
+}
+
+static INLINE int get_exp_golomb_length_diff(int level, int k, int *diff) {
+ if (level == 0) {
+ *diff = k + 1;
+ return k + 1;
+ }
+
+ int x = level + (1 << k);
+ *diff = (x & (x - 1)) == 0 ? 2 : 0;
+ return 2 * get_msb(x) + 1 - k;
+}
+
+#if CONFIG_ADAPTIVE_HR
+
+typedef struct adaptive_hr_info {
+ int context;
+ TX_SIZE tx_size;
+ TX_TYPE tx_type;
+ int qindex;
+ bool is_inter;
+ bool is_dc;
+ bool is_eob;
+} adaptive_hr_info;
+
+void write_truncated_rice(aom_writer *w, int level, int m, int k, int cmax);
+int read_truncated_rice(MACROBLOCKD *xd, aom_reader *r, int m, int k, int cmax);
+int get_truncated_rice_length(int level, int m, int k, int cmax);
+int get_truncated_rice_length_diff(int level, int m, int k, int cmax,
+ int *diff);
+
+void write_adaptive_hr(aom_writer *w, int level, adaptive_hr_info *info);
+int read_adaptive_hr(MACROBLOCKD *xd, aom_reader *r, adaptive_hr_info *info);
+int get_adaptive_hr_length(int level, adaptive_hr_info *info);
+int get_adaptive_hr_length_diff(int level, adaptive_hr_info *info, int *diff);
+
+#endif // CONFIG_ADAPTIVE_HR
+
+#endif // AOM_AV1_COMMON_HR_CODING_H_
diff --git a/av1/common/txb_common.h b/av1/common/txb_common.h
index cc1829e..c35cce4 100644
--- a/av1/common/txb_common.h
+++ b/av1/common/txb_common.h
@@ -269,6 +269,7 @@
const TX_CLASS tx_class) {
int mag;
// Note: AOMMIN(level, 5) is useless for decoder since level < 5.
+
mag = clip_max5[levels[1]]; // { 0, 1 }
mag += clip_max5[levels[(1 << bwl) + TX_PAD_HOR]]; // { 1, 0 }
if (tx_class == TX_CLASS_2D) {
@@ -292,8 +293,8 @@
static AOM_FORCE_INLINE int get_nz_mag(const uint8_t *const levels,
const int bwl, const TX_CLASS tx_class) {
int mag;
-
// Note: AOMMIN(level, 3) is useless for decoder since level < 3.
+
mag = clip_max3[levels[1]]; // { 0, 1 }
mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR]]; // { 1, 0 }
@@ -314,6 +315,27 @@
return mag;
}
+#if CONFIG_ADAPTIVE_HR
+static AOM_FORCE_INLINE int get_nz_mag_noclip(const uint8_t *const levels,
+ const int bwl,
+ const TX_CLASS tx_class) {
+ int mag;
+
+ mag = levels[1]; // { 0, 1 }
+ mag += levels[(1 << bwl) + TX_PAD_HOR]; // { 1, 0 }
+
+ if (tx_class == TX_CLASS_2D) {
+ mag += levels[(1 << bwl) + TX_PAD_HOR + 1]; // { 1, 1 }
+ } else if (tx_class == TX_CLASS_VERT) {
+ mag += levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]; // { 2, 0 }
+ } else {
+ mag += levels[2]; // { 0, 2 }
+ }
+
+ return mag;
+}
+#endif // CONFIG_ADAPTIVE_HR
+
#define NZ_MAP_CTX_0 SIG_COEF_CONTEXTS_2D
#define NZ_MAP_CTX_5 (NZ_MAP_CTX_0 + 5)
#define NZ_MAP_CTX_10 (NZ_MAP_CTX_0 + 10)
@@ -621,6 +643,23 @@
}
}
+#if CONFIG_ADAPTIVE_HR
+static AOM_FORCE_INLINE int get_hr_ctx(const uint8_t *levels, int coeff_idx,
+ int bwl, int is_eob, TX_CLASS tx_class) {
+ if (is_eob) return 0;
+ return get_nz_mag_noclip(levels + get_padded_idx(coeff_idx, bwl), bwl,
+ tx_class);
+}
+
+static AOM_FORCE_INLINE int get_hr_ctx_skip(const uint8_t *levels,
+ int coeff_idx, int bwl, int is_eob,
+ TX_CLASS tx_class) {
+ if (is_eob) return 0;
+ return get_nz_mag_noclip(levels + get_padded_idx_left(coeff_idx, bwl), bwl,
+ tx_class);
+}
+#endif // CONFIG_ADAPTIVE_HR
+
static INLINE void set_dc_sign(int *cul_level, int dc_val) {
if (dc_val < 0)
*cul_level |= 1 << COEFF_CONTEXT_BITS;
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index 3959564..3313fb7 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -18,40 +18,9 @@
#include "av1/common/scan.h"
#include "av1/common/txb_common.h"
#include "av1/common/reconintra.h"
+#include "av1/common/hr_coding.h"
#include "av1/decoder/decodemv.h"
-static int read_golomb(MACROBLOCKD *xd, aom_reader *r) {
- int x = 1;
- int length = 0;
-
-#if CONFIG_BYPASS_IMPROVEMENT
- length = aom_read_unary(r, 21, ACCT_INFO("length"));
- if (length > 20) {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Invalid length in read_golomb");
- }
- x = 1 << length;
- x += aom_read_literal(r, length, ACCT_INFO());
-#else
- int i = 0;
- while (!i) {
- i = aom_read_bit(r, ACCT_INFO());
- ++length;
- if (length > 20) {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Invalid length in read_golomb");
- break;
- }
- }
- for (i = 0; i < length - 1; ++i) {
- x <<= 1;
- x += aom_read_bit(r, ACCT_INFO());
- }
-#endif // CONFIG_BYPASS_IMPROVEMENT
-
- return x - 1;
-}
-
static INLINE int rec_eob_pos(const int eob_token, const int extra) {
int eob = av1_eob_group_start[eob_token];
if (eob > 2) {
@@ -431,6 +400,16 @@
cm->features.reduced_tx_set_used);
const qm_val_t *iqmatrix =
av1_get_iqmatrix(&cm->quant_params, xd, plane, tx_size, tx_type);
+
+#if CONFIG_ADAPTIVE_HR
+ const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
#if CONFIG_INSPECTION
for (int c = 0; c < width * height; c++) {
dequant_values[c] = get_dqv(dequant, c, iqmatrix);
@@ -480,7 +459,18 @@
ACCT_INFO("sign"));
signs[get_padded_idx(pos, bwl)] = sign > 0 ? -1 : 1;
if (level >= MAX_BASE_BR_RANGE) {
- level += read_golomb(xd, r);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob_data->eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx_skip(levels, pos, bwl, is_eob, tx_class);
+ level += read_adaptive_hr(xd, r, &hr_info);
+
+ levels[get_padded_idx_left(pos, bwl)] =
+ (uint8_t)(AOMMIN(level, UINT8_MAX));
+#else
+ level += read_exp_golomb(xd, r, 0);
+#endif // CONFIG_ADAPTIVE_HR
}
if (c == 0) dc_val = sign ? -level : level;
// Bitmasking to clamp level to valid range:
@@ -589,6 +579,15 @@
const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
const qm_val_t *iqmatrix =
av1_get_iqmatrix(&cm->quant_params, xd, plane, tx_size, tx_type);
+
+#if CONFIG_ADAPTIVE_HR
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
#if CONFIG_INSPECTION
for (int c = 0; c < width * height; c++) {
dequant_values[c] = get_dqv(dequant, c, iqmatrix);
@@ -762,7 +761,19 @@
}
if (is_hidden && c == 0) {
if (level >= (MAX_BASE_BR_RANGE << 1)) {
- level += (read_golomb(xd, r) << 1);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (*eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ // Use context divided by 2 since the coefficient is also divided by 2
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class) >> 1;
+ level += (read_adaptive_hr(xd, r, &hr_info) << 1);
+
+ levels[get_padded_idx(pos, bwl)] =
+ (uint8_t)(AOMMIN(level, UINT8_MAX));
+#else
+ level += (read_exp_golomb(xd, r, 0) << 1);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
const int row = pos >> bwl;
@@ -770,11 +781,33 @@
int limits = get_lf_limits(row, col, tx_class, plane);
if (limits) {
if (level >= LF_MAX_BASE_BR_RANGE) {
- level += read_golomb(xd, r);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (*eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ level += read_adaptive_hr(xd, r, &hr_info);
+
+ levels[get_padded_idx(pos, bwl)] =
+ (uint8_t)(AOMMIN(level, UINT8_MAX));
+#else
+ level += read_exp_golomb(xd, r, 0);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
if (level >= MAX_BASE_BR_RANGE) {
- level += read_golomb(xd, r);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (*eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ level += read_adaptive_hr(xd, r, &hr_info);
+
+ levels[get_padded_idx(pos, bwl)] =
+ (uint8_t)(AOMMIN(level, UINT8_MAX));
+#else
+ level += read_exp_golomb(xd, r, 0);
+#endif // CONFIG_ADAPTIVE_HR
}
}
}
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 9bc2c04..e8a24cb 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -18,6 +18,7 @@
#include "av1/common/pred_common.h"
#include "av1/common/scan.h"
#include "av1/common/reconintra.h"
+#include "av1/common/hr_coding.h"
#include "av1/encoder/bitstream.h"
#include "av1/encoder/cost.h"
#include "av1/encoder/encodeframe.h"
@@ -108,29 +109,6 @@
void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
-static void write_golomb(aom_writer *w, int level) {
- int x = level + 1;
- int length = 0;
-
-#if CONFIG_BYPASS_IMPROVEMENT
- length = get_msb(x) + 1;
- assert(length > 0);
- aom_write_literal(w, 0, length - 1);
- aom_write_literal(w, x, length);
-#else
- int i = x;
- while (i) {
- i >>= 1;
- ++length;
- }
- assert(length > 0);
-
- for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0);
-
- for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
-#endif
-}
-
static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
int shift) {
const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
@@ -298,35 +276,93 @@
return eob_cost;
}
-static const int golomb_bits_cost[32] = {
+#if CONFIG_ADAPTIVE_HR
+static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps,
+ adaptive_hr_info *info) {
+ const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
+ int cost = coeff_lps[base_range];
+
+ if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS - 1;
+ cost += av1_cost_literal(get_adaptive_hr_length(r, info));
+ }
+ return cost;
+}
+
+static INLINE int get_br_lf_cost(tran_low_t level, const int *coeff_lps,
+ adaptive_hr_info *info) {
+ const int base_range =
+ AOMMIN(level - 1 - LF_NUM_BASE_LEVELS, COEFF_BASE_RANGE);
+ int cost = coeff_lps[base_range];
+
+ if (level >= 1 + LF_NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const int r = level - COEFF_BASE_RANGE - LF_NUM_BASE_LEVELS - 1;
+ cost += av1_cost_literal(get_adaptive_hr_length(r, info));
+ }
+ return cost;
+}
+
+static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
+ int *diff, adaptive_hr_info *info) {
+ const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
+ int cost = coeff_lps[base_range];
+
+ if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
+ *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
+
+ if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
+ const int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS - 1;
+ int bits, diff_bits;
+ bits = get_adaptive_hr_length_diff(r, info, &diff_bits);
+ *diff += av1_cost_literal(diff_bits);
+ cost += av1_cost_literal(bits);
+ }
+
+ return cost;
+}
+
+static INLINE int get_br_lf_cost_with_diff(tran_low_t level,
+ const int *coeff_lps, int *diff,
+ adaptive_hr_info *info) {
+ const int base_range =
+ AOMMIN(level - 1 - LF_NUM_BASE_LEVELS, COEFF_BASE_RANGE);
+ int cost = coeff_lps[base_range];
+
+ if (level <= COEFF_BASE_RANGE + 1 + LF_NUM_BASE_LEVELS)
+ *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
+
+ if (level >= COEFF_BASE_RANGE + 1 + LF_NUM_BASE_LEVELS) {
+ const int r = level - COEFF_BASE_RANGE - LF_NUM_BASE_LEVELS - 1;
+ int bits, diff_bits;
+ bits = get_adaptive_hr_length_diff(r, info, &diff_bits);
+ *diff += av1_cost_literal(diff_bits);
+ cost += av1_cost_literal(bits);
+ }
+
+ return cost;
+}
+#else
+static const int exp_golomb0_bits_cost[32] = {
0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
};
-static const int golomb_cost_diff[32] = {
+static const int exp_golomb0_cost_diff[32] = {
0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-static INLINE int get_golomb_cost(int abs_qc) {
- if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
- const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
- const int length = get_msb(r) + 1;
- return av1_cost_literal(2 * length - 1);
- }
- return 0;
-}
+static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) {
+ const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
+ int cost = coeff_lps[base_range];
-// Golomb cost of coding bypass coded level values in the
-// low-frequency region.
-static INLINE int get_golomb_cost_lf(int abs_qc) {
- if (abs_qc >= 1 + LF_NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
- const int r = abs_qc - COEFF_BASE_RANGE - LF_NUM_BASE_LEVELS;
- const int length = get_msb(r) + 1;
- return av1_cost_literal(2 * length - 1);
+ if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS - 1;
+ cost += av1_cost_literal(get_exp_golomb_length(r, 0));
}
- return 0;
+
+ return cost;
}
// Base range cost of coding level values in the
@@ -334,7 +370,35 @@
static INLINE int get_br_lf_cost(tran_low_t level, const int *coeff_lps) {
const int base_range =
AOMMIN(level - 1 - LF_NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- return coeff_lps[base_range] + get_golomb_cost_lf(level);
+ int cost = coeff_lps[base_range];
+
+ if (level >= 1 + LF_NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const int r = level - COEFF_BASE_RANGE - LF_NUM_BASE_LEVELS - 1;
+ cost += av1_cost_literal(get_exp_golomb_length(r, 0));
+ }
+
+ return cost;
+}
+
+static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
+ int *diff) {
+ const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
+ int cost = coeff_lps[base_range];
+
+ if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
+ *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
+
+ if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
+ int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
+ if (r < 32) {
+ cost += exp_golomb0_bits_cost[r];
+ *diff += exp_golomb0_cost_diff[r];
+ } else {
+ cost += av1_cost_literal(get_exp_golomb_length(r - 1, 0));
+ *diff += (r & (r - 1)) == 0 ? 1024 : 0;
+ }
+ }
+ return cost;
}
// Calculates differential cost for base range coding in the low-frequency
@@ -343,47 +407,24 @@
const int *coeff_lps, int *diff) {
const int base_range =
AOMMIN(level - 1 - LF_NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- int golomb_bits = 0;
+ int cost = coeff_lps[base_range];
+
if (level <= COEFF_BASE_RANGE + 1 + LF_NUM_BASE_LEVELS)
*diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
+
if (level >= COEFF_BASE_RANGE + 1 + LF_NUM_BASE_LEVELS) {
int r = level - COEFF_BASE_RANGE - LF_NUM_BASE_LEVELS;
if (r < 32) {
- golomb_bits = golomb_bits_cost[r];
- *diff += golomb_cost_diff[r];
+ cost += exp_golomb0_bits_cost[r];
+ *diff += exp_golomb0_cost_diff[r];
} else {
- golomb_bits = get_golomb_cost_lf(level);
+ cost += av1_cost_literal(get_exp_golomb_length(r - 1, 0));
*diff += (r & (r - 1)) == 0 ? 1024 : 0;
}
}
- return coeff_lps[base_range] + golomb_bits;
+ return cost;
}
-
-static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
- int *diff) {
- const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- int golomb_bits = 0;
- if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
- *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
-
- if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
- int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
- if (r < 32) {
- golomb_bits = golomb_bits_cost[r];
- *diff += golomb_cost_diff[r];
- } else {
- golomb_bits = get_golomb_cost(level);
- *diff += (r & (r - 1)) == 0 ? 1024 : 0;
- }
- }
-
- return coeff_lps[base_range] + golomb_bits;
-}
-
-static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) {
- const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- return coeff_lps[base_range] + get_golomb_cost(level);
-}
+#endif // CONFIG_ADAPTIVE_HR
static INLINE int get_nz_map_ctx(const uint8_t *const levels,
const int coeff_idx, const int bwl,
@@ -451,7 +492,7 @@
}
for (int j = 0; j < width; j++) {
*si++ = (int8_t)(coeff[i * width + j] > 0) ? 1 : -1;
- *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
+ *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, UINT8_MAX);
}
// right 4 pad
for (int j = 0; j < TX_PAD_RIGHT; j++) {
@@ -476,7 +517,7 @@
*ls++ = 0;
}
for (int j = 0; j < width; j++) {
- *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
+ *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, UINT8_MAX);
}
}
}
@@ -491,7 +532,7 @@
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
- *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
+ *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, UINT8_MAX);
}
for (int j = 0; j < TX_PAD_HOR; j++) {
*ls++ = 0;
@@ -705,6 +746,16 @@
const int16_t *const scan = scan_order->scan;
const int bwl = get_txb_bwl(tx_size);
+#if CONFIG_ADAPTIVE_HR
+ const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
const int txb_offset =
x->mbmi_ext_frame->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
@@ -747,8 +798,18 @@
if (level) {
int idtx_sign_ctx = get_sign_ctx_skip(signs, levels, pos, bwl);
aom_write_symbol(w, sign, ec_ctx->idtx_sign_cdf[idtx_sign_ctx], 2);
- if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
- write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
+ if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) {
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx_skip(levels, pos, bwl, is_eob, tx_class);
+ write_adaptive_hr(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS,
+ &hr_info);
+#else
+ write_exp_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS, 0);
+#endif // CONFIG_ADAPTIVE_HR
+ }
}
}
}
@@ -843,6 +904,15 @@
const int bwl = get_txb_bwl(tx_size);
+#if CONFIG_ADAPTIVE_HR
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
bool enable_parity_hiding = cm->features.allow_parity_hiding &&
!xd->lossless[xd->mi[0]->segment_id] &&
plane == PLANE_TYPE_Y &&
@@ -1039,19 +1109,54 @@
}
if (is_hidden && c == 0) {
int q_index = level >> 1;
- if (q_index > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
- write_golomb(w, q_index - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
+ if (q_index > COEFF_BASE_RANGE + NUM_BASE_LEVELS) {
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ // Use context divided by 2 since the coefficient is also divided by 2
+ hr_info.context =
+ get_hr_ctx(levels, scan[0], bwl, is_eob, tx_class) >> 1;
+ write_adaptive_hr(w, q_index - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS,
+ &hr_info);
+#else
+ write_exp_golomb(w, q_index - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS,
+ 0);
+#endif // CONFIG_ADAPTIVE_HR
+ }
} else {
const int pos = scan[c];
const int row = pos >> bwl;
const int col = pos - (row << bwl);
int limits = get_lf_limits(row, col, tx_class, plane);
if (limits) {
- if (level > COEFF_BASE_RANGE + LF_NUM_BASE_LEVELS)
- write_golomb(w, level - COEFF_BASE_RANGE - 1 - LF_NUM_BASE_LEVELS);
+ if (level > COEFF_BASE_RANGE + LF_NUM_BASE_LEVELS) {
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ write_adaptive_hr(
+ w, level - COEFF_BASE_RANGE - 1 - LF_NUM_BASE_LEVELS, &hr_info);
+#else
+ write_exp_golomb(
+ w, level - COEFF_BASE_RANGE - 1 - LF_NUM_BASE_LEVELS, 0);
+#endif // CONFIG_ADAPTIVE_HR
+ }
} else {
- if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
- write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
+ if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) {
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ write_adaptive_hr(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS,
+ &hr_info);
+#else
+ write_exp_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS,
+ 0);
+#endif // CONFIG_ADAPTIVE_HR
+ }
}
}
}
@@ -1308,6 +1413,7 @@
int8_t signs_buf[TX_PAD_2D];
int8_t *const signs = set_signs(signs_buf, width);
av1_txb_init_levels_signs(qcoeff, width, height, levels_buf, signs_buf);
+
const int bob_code = p->bobs[block];
const int bob = av1_get_max_eob(tx_size) - bob_code;
#if !CONFIG_TX_SKIP_FLAG_MODE_DEP_CTX
@@ -1319,6 +1425,15 @@
cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used,
eob, bob_code, is_fsc);
+#if CONFIG_ADAPTIVE_HR
+ const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
const int eob_multi_size = txsize_log2_minus4[tx_size];
const LV_MAP_EOB_COST *const eob_costs =
&x->coeff_costs.eob_costs[eob_multi_size][PLANE_TYPE_Y];
@@ -1350,7 +1465,15 @@
if (v) {
if (level > NUM_BASE_LEVELS) {
const int ctx = get_br_ctx_skip(levels, pos, bwl);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx_skip(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_cost(level, lps_cost[ctx], &hr_info);
+#else
cost += get_br_cost(level, lps_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
}
@@ -1415,6 +1538,15 @@
int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
#endif // CONFIG_CONTEXT_DERIVATION
+#if CONFIG_ADAPTIVE_HR
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
av1_txb_init_levels(qcoeff, width, height, levels);
const int bob_code = p->bobs[block];
@@ -1463,12 +1595,28 @@
if (limits) {
if (level > LF_NUM_BASE_LEVELS) {
const int ctx = get_br_ctx_lf_eob(pos, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_lf_cost(level, lps_lf_cost[ctx], &hr_info);
+#else
cost += get_br_lf_cost(level, lps_lf_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
if (level > NUM_BASE_LEVELS) {
const int ctx = 7; /* get_lf_ctx_eob */
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_cost(level, lps_cost[ctx], &hr_info);
+#else
cost += get_br_cost(level, lps_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
if (c) {
@@ -1530,12 +1678,28 @@
if (limits) {
if (level > LF_NUM_BASE_LEVELS) {
const int ctx = get_br_lf_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_lf_cost(level, lps_lf_cost[ctx], &hr_info);
+#else
cost += get_br_lf_cost(level, lps_lf_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
if (level > NUM_BASE_LEVELS) {
const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_cost(level, lps_cost[ctx], &hr_info);
+#else
cost += get_br_cost(level, lps_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
}
@@ -1560,7 +1724,17 @@
if (q_index > NUM_BASE_LEVELS) {
const int ctx = get_par_br_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ // Use context divided by 2 since the coefficient is also divided by 2
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class) >> 1;
+ cost +=
+ get_br_cost(q_index, coeff_costs_ph->lps_ph_cost[ctx], &hr_info);
+#else
cost += get_br_cost(q_index, coeff_costs_ph->lps_ph_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
return cost;
@@ -1597,12 +1771,28 @@
if (limits) {
if (level > LF_NUM_BASE_LEVELS) {
const int ctx = get_br_lf_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_lf_cost(level, lps_lf_cost[ctx], &hr_info);
+#else
cost += get_br_lf_cost(level, lps_lf_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
if (level > NUM_BASE_LEVELS) {
const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ bool is_eob = c == (eob - 1);
+ hr_info.is_dc = (c == 0);
+ hr_info.is_eob = is_eob;
+ hr_info.context = get_hr_ctx(levels, pos, bwl, is_eob, tx_class);
+ cost += get_br_cost(level, lps_cost[ctx], &hr_info);
+#else
cost += get_br_cost(level, lps_cost[ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
}
@@ -1866,7 +2056,12 @@
static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
int plane, int ci, tran_low_t abs_qc, int coeff_ctx,
const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
- const uint8_t *levels, int *cost_low) {
+ const uint8_t *levels, int *cost_low
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
// this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
// and not the last (scan_idx != eob - 1)
assert(ci > 0);
@@ -1925,16 +2120,32 @@
if (abs_qc > LF_NUM_BASE_LEVELS) {
const int br_ctx = get_br_lf_ctx(levels, ci, bwl, tx_class);
int brcost_diff = 0;
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_dc = false;
+ hr_info->is_eob = false;
+ hr_info->context = get_hr_ctx(levels, ci, bwl, false, tx_class);
+ cost += get_br_lf_cost_with_diff(abs_qc, txb_costs->lps_lf_cost[br_ctx],
+ &brcost_diff, hr_info);
+#else
cost += get_br_lf_cost_with_diff(abs_qc, txb_costs->lps_lf_cost[br_ctx],
&brcost_diff);
+#endif // CONFIG_ADAPTIVE_HR
diff += brcost_diff;
}
} else {
if (abs_qc > NUM_BASE_LEVELS) {
const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
int brcost_diff = 0;
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_dc = false;
+ hr_info->is_eob = false;
+ hr_info->context = get_hr_ctx(levels, ci, bwl, false, tx_class);
+ cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
+ &brcost_diff, hr_info);
+#else
cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
&brcost_diff);
+#endif // CONFIG_ADAPTIVE_HR
diff += brcost_diff;
}
}
@@ -1953,7 +2164,13 @@
int32_t *tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- int plane) {
+ int plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ const uint8_t *levels,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
int cost = 0;
const int row = ci >> bwl;
const int col = ci - (row << bwl);
@@ -1987,14 +2204,27 @@
}
if (limits) {
if (abs_qc > LF_NUM_BASE_LEVELS) {
- int br_ctx;
- br_ctx = get_br_ctx_lf_eob(ci, tx_class);
+ int br_ctx = get_br_ctx_lf_eob(ci, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_eob = true;
+ hr_info->is_dc = (ci == 0);
+ hr_info->context = get_hr_ctx(levels, ci, bwl, true, tx_class);
+ cost += get_br_lf_cost(abs_qc, txb_costs->lps_lf_cost[br_ctx], hr_info);
+#else
cost += get_br_lf_cost(abs_qc, txb_costs->lps_lf_cost[br_ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
if (abs_qc > NUM_BASE_LEVELS) {
int br_ctx = 7; /* get_br_ctx_eob */
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_eob = true;
+ hr_info->is_dc = (ci == 0);
+ hr_info->context = get_hr_ctx(levels, ci, bwl, true, tx_class);
+ cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx], hr_info);
+#else
cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
}
@@ -2012,7 +2242,12 @@
int32_t *tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- int plane) {
+ int plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
int cost = 0;
if (is_last) {
const int row = ci >> bwl;
@@ -2067,7 +2302,15 @@
br_ctx = get_br_ctx_lf_eob(ci, tx_class);
else
br_ctx = get_br_lf_ctx(levels, ci, bwl, tx_class);
+
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_dc = (ci == 0);
+ hr_info->is_eob = is_last;
+ hr_info->context = get_hr_ctx(levels, ci, bwl, is_last, tx_class);
+ cost += get_br_lf_cost(abs_qc, txb_costs->lps_lf_cost[br_ctx], hr_info);
+#else
cost += get_br_lf_cost(abs_qc, txb_costs->lps_lf_cost[br_ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
} else {
if (abs_qc > NUM_BASE_LEVELS) {
@@ -2076,7 +2319,15 @@
br_ctx = 0; /*get_br_ctx_eob*/
else
br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
+
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_dc = (ci == 0);
+ hr_info->is_eob = is_last;
+ hr_info->context = get_hr_ctx(levels, ci, bwl, is_last, tx_class);
+ cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx], hr_info);
+#else
cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
}
@@ -2111,7 +2362,12 @@
int32_t *tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- int plane, coeff_info *coef_info, bool enable_parity_hiding) {
+ int plane, coeff_info *coef_info, bool enable_parity_hiding
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
const int dqv = get_dqv(dequant, scan[si], iqmatrix);
const int ci = scan[si];
const tran_low_t qc = qcoeff[ci];
@@ -2142,7 +2398,12 @@
tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
const int64_t rd = RDCOST(rdmult, rate, dist);
tran_low_t qc_low, dqc_low;
@@ -2172,14 +2433,19 @@
tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
}
rd_low = RDCOST(rdmult, rate_low, dist_low);
if (rd_low < rd) {
qcoeff[ci] = qc_low;
dqcoeff[ci] = dqc_low;
- levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
+ levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, UINT8_MAX);
*accu_rate += rate_low;
*accu_dist += dist_low - dist0;
if (enable_parity_hiding)
@@ -2201,7 +2467,12 @@
const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels,
const qm_val_t *iqmatrix, coeff_info *coef_info, bool enable_parity_hiding,
- int plane) {
+ int plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
const int dqv = get_dqv(dequant, scan[si], iqmatrix);
(void)eob;
// this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
@@ -2238,7 +2509,13 @@
int rate_low = 0;
const int rate =
get_two_coeff_cost_simple(plane, ci, abs_qc, coeff_ctx, txb_costs, bwl,
- tx_class, levels, &rate_low);
+ tx_class, levels, &rate_low
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
+
if (abs_dqc < abs_tqc) {
*accu_rate += rate;
return;
@@ -2263,7 +2540,7 @@
rate, false, coef_info, si);
qcoeff[ci] = qc_low;
dqcoeff[ci] = dqc_low;
- levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
+ levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, UINT8_MAX);
*accu_rate += rate_low;
} else {
*accu_rate += rate;
@@ -2292,7 +2569,12 @@
int32_t *tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- int plane, coeff_info *coef_info, bool enable_parity_hiding) {
+ int plane, coeff_info *coef_info, bool enable_parity_hiding
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
const int bwl = get_txb_bwl(tx_size);
const int height = get_txb_high(tx_size);
const int dqv = get_dqv(dequant, scan[si], iqmatrix);
@@ -2337,7 +2619,12 @@
tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
tran_low_t qc_low, dqc_low;
@@ -2366,7 +2653,12 @@
tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
}
int rate_up_backup = rate;
@@ -2388,7 +2680,12 @@
tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ levels, hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
int64_t dist_new_eob = dist;
int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
int rateeobup = rate_coeff_eob;
@@ -2403,7 +2700,12 @@
tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ levels, hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
const int64_t dist_new_eob_low = dist_low;
const int64_t rd_new_eob_low =
RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
@@ -2451,7 +2753,7 @@
if (lower_level) {
qcoeff[ci] = qc_low;
dqcoeff[ci] = dqc_low;
- levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
+ levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, UINT8_MAX);
}
if (qcoeff[ci]) {
nz_ci[*nz_num] = ci;
@@ -2491,6 +2793,10 @@
,
int plane
#endif // CONFIG_CHROMA_TX_COEFF_CODING
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
) {
tran_low_t abslevel = abs(level), q_index = abslevel >> 1;
int sign = level < 0;
@@ -2515,14 +2821,25 @@
0
#endif // CONFIG_CONTEXT_DERIVATION
,
- 0);
+ 0
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
const int base_ctx_ph = get_base_ctx_ph(levels, pos, bwl, tx_class);
int rate_ph = txb_costs_ph->base_ph_cost[base_ctx_ph][AOMMIN(q_index, 3)];
if (q_index > NUM_BASE_LEVELS) {
- rate_ph += get_br_cost(
- q_index,
- txb_costs_ph->lps_ph_cost[get_par_br_ctx(levels, pos, bwl, tx_class)]);
+ int br_ctx = get_par_br_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_eob = false;
+ hr_info->is_dc = (pos == 0);
+ hr_info->context = get_hr_ctx(levels, pos, bwl, false, tx_class);
+ rate_ph += get_br_cost(q_index, txb_costs_ph->lps_ph_cost[br_ctx], hr_info);
+#else
+ rate_ph += get_br_cost(q_index, txb_costs_ph->lps_ph_cost[br_ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
if (abslevel) rate_ph += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
return rate_ph - *rate;
@@ -2544,7 +2861,12 @@
const LV_MAP_COEFF_COST *txb_costs_ph, int dc_sign_ctx, TX_CLASS tx_class,
uint8_t *levels, const int bwl, const int64_t rdmult,
const int32_t *dequant, const qm_val_t *iqmatrix, tune_cand *t_cand,
- int rate_cur) {
+ int rate_cur
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
+) {
const int dqv = get_dqv(dequant, pos, iqmatrix);
tran_low_t abslevel = abs(qcoeff), abstqc = abs(tcoeff);
int64_t dist = get_coeff_dist(tcoeff, dqcoeff, shift);
@@ -2564,9 +2886,16 @@
if (abslevel_cand) {
rate_cand += txb_costs->dc_sign_cost[dc_sign_ctx][tcoeff < 0];
if (q_index > NUM_BASE_LEVELS) {
+ int br_ctx = get_par_br_ctx(levels, pos, bwl, tx_class);
+#if CONFIG_ADAPTIVE_HR
+ hr_info->is_eob = false;
+ hr_info->is_dc = (pos == 0);
+ hr_info->context = get_hr_ctx(levels, pos, bwl, 0, tx_class);
rate_cand +=
- get_br_cost(q_index, txb_costs_ph->lps_ph_cost[get_par_br_ctx(
- levels, pos, bwl, tx_class)]);
+ get_br_cost(q_index, txb_costs_ph->lps_ph_cost[br_ctx], hr_info);
+#else
+ rate_cand += get_br_cost(q_index, txb_costs_ph->lps_ph_cost[br_ctx]);
+#endif // CONFIG_ADAPTIVE_HR
}
}
int64_t cost_cand = RDCOST(rdmult, rate_cand, dist_cand);
@@ -2704,6 +3033,10 @@
,
int plane
#endif // CONFIG_CHROMA_TX_COEFF_CODING
+#if CONFIG_ADAPTIVE_HR
+ ,
+ adaptive_hr_info *hr_info
+#endif // CONFIG_ADAPTIVE_HR
) {
int nzsbb = 0, sum_abs1 = 0;
for (int scan_idx = eob - 1; scan_idx > 0; --scan_idx) {
@@ -2728,6 +3061,10 @@
,
plane
#endif // CONFIG_CHROMA_TX_COEFF_CODING
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
);
if (!needtune && nzsbb >= PHTHRESH) {
@@ -2742,7 +3079,12 @@
if (nzsbb >= PHTHRESH) {
cost_hide_par(qcoeff[hidepos], dqcoeff[hidepos], tcoeff[hidepos], shift,
txb_costs, hidepos, txb_costs_ph, dc_sign_ctx, tx_class,
- levels, bwl, rdmult, dequant, iqmatrix, &t_cand_dc, rate_cur);
+ levels, bwl, rdmult, dequant, iqmatrix, &t_cand_dc, rate_cur
+#if CONFIG_ADAPTIVE_HR
+ ,
+ hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
}
// we change the level candidates to check the cost change.
@@ -2768,7 +3110,8 @@
qcoeff[tune_pos] = best->qcoeff;
dqcoeff[tune_pos] = best->dqcoeff;
*accu_rate += best->rate;
- levels[get_padded_idx(tune_pos, bwl)] = AOMMIN(abs(best->qcoeff), INT8_MAX);
+ levels[get_padded_idx(tune_pos, bwl)] =
+ AOMMIN(abs(best->qcoeff), UINT8_MAX);
return true;
}
@@ -2828,6 +3171,14 @@
coef_info[scan_idx].upround = false;
}
+#if CONFIG_ADAPTIVE_HR
+ adaptive_hr_info hr_info = { .tx_size = tx_size,
+ .tx_type = tx_type,
+ .qindex = xd->qindex[mbmi->segment_id],
+ .is_inter =
+ is_inter_block(mbmi, xd->tree_type) };
+#endif // CONFIG_ADAPTIVE_HR
+
const int rshift =
(sharpness +
(cpi->oxcf.q_cfg.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4
@@ -2899,7 +3250,12 @@
xd->tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane, coef_info, enable_parity_hiding);
+ plane, coef_info, enable_parity_hiding
+#if CONFIG_ADAPTIVE_HR
+ ,
+ &hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
--si;
} else {
assert(abs_qc == 1);
@@ -2912,7 +3268,12 @@
xd->tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane);
+ plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ levels, &hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
const tran_low_t tqc = tcoeff[ci];
const tran_low_t dqc = dqcoeff[ci];
const int64_t dist = get_coeff_dist(tqc, dqc, shift);
@@ -2933,7 +3294,12 @@
xd->tmp_sign
#endif
,
- plane, coef_info, enable_parity_hiding);
+ plane, coef_info, enable_parity_hiding
+#if CONFIG_ADAPTIVE_HR
+ ,
+ &hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
}
if (si == -1 && nz_num <= max_nz_num) {
update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost,
@@ -2943,8 +3309,12 @@
for (; si >= 1; --si) {
update_coeff_simple(&accu_rate, si, eob, tx_class, bwl, rdmult, shift,
dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
- levels, iqmatrix, coef_info, enable_parity_hiding,
- plane);
+ levels, iqmatrix, coef_info, enable_parity_hiding, plane
+#if CONFIG_ADAPTIVE_HR
+ ,
+ &hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
}
// DC position
@@ -2960,7 +3330,12 @@
xd->tmp_sign
#endif // CONFIG_CONTEXT_DERIVATION
,
- plane, coef_info, enable_parity_hiding);
+ plane, coef_info, enable_parity_hiding
+#if CONFIG_ADAPTIVE_HR
+ ,
+ &hr_info
+#endif // CONFIG_ADAPTIVE_HR
+ );
}
if (enable_parity_hiding) {
@@ -2971,6 +3346,10 @@
,
plane
#endif // CONFIG_CHROMA_TX_COEFF_CODING
+#if CONFIG_ADAPTIVE_HR
+ ,
+ &hr_info
+#endif // CONFIG_ADAPTIVE_HR
);
}
diff --git a/av1/encoder/x86/encodetxb_avx2.c b/av1/encoder/x86/encodetxb_avx2.c
index 44174c0..b241437 100644
--- a/av1/encoder/x86/encodetxb_avx2.c
+++ b/av1/encoder/x86/encodetxb_avx2.c
@@ -170,7 +170,7 @@
const __m256i c1 = yy_loadu_256(cf + 8);
const __m256i c0c1 = _mm256_packs_epi32(c0, c1);
const __m256i abs01 = _mm256_abs_epi16(c0c1);
- const __m256i abs01_8 = _mm256_packs_epi16(y_zeros, abs01);
+ const __m256i abs01_8 = _mm256_packus_epi16(y_zeros, abs01);
const __m256i sig01 = _mm256_sign_epi16(one16, c0c1);
const __m256i sig01_8 = _mm256_packs_epi16(sig01, y_zeros);
const __m256i res_ = _mm256_shuffle_epi32(abs01_8, 0xd8);
@@ -194,7 +194,7 @@
const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
const __m256i absAB = _mm256_abs_epi16(coeffAB);
const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
+ const __m256i absABCD = _mm256_packus_epi16(absAB, absCD);
const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
const __m128i res0 = _mm256_castsi256_si128(res);
@@ -237,7 +237,7 @@
const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
const __m256i absAB = _mm256_abs_epi16(coeffAB);
const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
+ const __m256i absABCD = _mm256_packus_epi16(absAB, absCD);
const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
const __m256i sigAB = _mm256_sign_epi16(one16, coeffAB);
@@ -269,7 +269,7 @@
const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
const __m256i absAB = _mm256_abs_epi16(coeffAB);
const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
+ const __m256i absABCD = _mm256_packus_epi16(absAB, absCD);
const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
const __m256i sigAB = _mm256_sign_epi16(one16, coeffAB);
@@ -311,7 +311,7 @@
const __m256i c0 = yy_loadu_256(cf);
const __m256i c1 = yy_loadu_256(cf + 8);
const __m256i abs01 = _mm256_abs_epi16(_mm256_packs_epi32(c0, c1));
- const __m256i abs01_8 = _mm256_packs_epi16(abs01, y_zeros);
+ const __m256i abs01_8 = _mm256_packus_epi16(abs01, y_zeros);
const __m256i res_ = _mm256_shuffle_epi32(abs01_8, 0xd8);
const __m256i res = _mm256_permute4x64_epi64(res_, 0xd8);
yy_storeu_256(ls, res);
@@ -329,7 +329,7 @@
const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
const __m256i absAB = _mm256_abs_epi16(coeffAB);
const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
+ const __m256i absABCD = _mm256_packus_epi16(absAB, absCD);
const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
const __m128i res0 = _mm256_castsi256_si128(res);
@@ -356,7 +356,7 @@
const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
const __m256i absAB = _mm256_abs_epi16(coeffAB);
const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
+ const __m256i absABCD = _mm256_packus_epi16(absAB, absCD);
const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
xx_storeu_128(ls, _mm256_castsi256_si128(res));
@@ -377,7 +377,7 @@
const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
const __m256i absAB = _mm256_abs_epi16(coeffAB);
const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
+ const __m256i absABCD = _mm256_packus_epi16(absAB, absCD);
const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
yy_storeu_256(ls, res);
diff --git a/av1/encoder/x86/encodetxb_sse4.c b/av1/encoder/x86/encodetxb_sse4.c
index d1b2dfe..8a7efcb 100644
--- a/av1/encoder/x86/encodetxb_sse4.c
+++ b/av1/encoder/x86/encodetxb_sse4.c
@@ -61,7 +61,7 @@
const __m128i absZA = _mm_abs_epi16(coeffZA);
const __m128i coeffZB = _mm_packs_epi32(zeros, coeffB);
const __m128i absZB = _mm_abs_epi16(coeffZB);
- const __m128i coeffAB = _mm_packs_epi16(absZA, absZB);
+ const __m128i coeffAB = _mm_packus_epi16(absZA, absZB);
xx_storeu_128(ls, coeffAB);
ls += ((stride) << 1);
cf += (width << 1);
@@ -75,7 +75,7 @@
const __m128i coeffB = xx_loadu_128(cf + 4);
const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
+ const __m128i absAB8 = _mm_packus_epi16(absAB, zeros);
xx_storeu_128(ls, absAB8);
ls += stride - TX_PAD_LEFT;
cf += width;
@@ -95,7 +95,7 @@
const __m128i coeffCD = _mm_packs_epi32(coeffC, coeffD);
const __m128i absAB = _mm_abs_epi16(coeffAB);
const __m128i absCD = _mm_abs_epi16(coeffCD);
- const __m128i absABCD = _mm_packs_epi16(absAB, absCD);
+ const __m128i absABCD = _mm_packus_epi16(absAB, absCD);
xx_storeu_128(ls + j, absABCD);
j += 16;
cf += 16;
@@ -148,7 +148,7 @@
const __m128i coeffB = xx_loadu_128(cf + 4);
const __m128i absZA = _mm_abs_epi16(_mm_packs_epi32(zeros, coeffA));
const __m128i absZB = _mm_abs_epi16(_mm_packs_epi32(zeros, coeffB));
- const __m128i coeffAB = _mm_packs_epi16(absZA, absZB);
+ const __m128i coeffAB = _mm_packus_epi16(absZA, absZB);
const __m128i signZA =
_mm_sign_epi16(one16, _mm_packs_epi32(coeffA, zeros));
const __m128i signZB =
@@ -167,7 +167,7 @@
const __m128i coeffB = xx_loadu_128(cf + 4);
const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
+ const __m128i absAB8 = _mm_packus_epi16(absAB, zeros);
const __m128i signAB = _mm_sign_epi16(one16, coeffAB);
const __m128i signAB8 = _mm_packs_epi16(signAB, zeros);
xx_storeu_128(ls, zeros);
@@ -193,7 +193,7 @@
const __m128i coeffCD = _mm_packs_epi32(coeffC, coeffD);
const __m128i absAB = _mm_abs_epi16(coeffAB);
const __m128i absCD = _mm_abs_epi16(coeffCD);
- const __m128i absABCD = _mm_packs_epi16(absAB, absCD);
+ const __m128i absABCD = _mm_packus_epi16(absAB, absCD);
const __m128i signAB = _mm_sign_epi16(one16, coeffAB);
const __m128i signCD = _mm_sign_epi16(one16, coeffCD);
const __m128i signABCD = _mm_packs_epi16(signAB, signCD);
@@ -233,7 +233,7 @@
const __m128i coeffB = xx_loadu_128(cf + 4);
const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
+ const __m128i absAB8 = _mm_packus_epi16(absAB, zeros);
const __m128i lsAB = _mm_unpacklo_epi32(absAB8, zeros);
xx_storeu_128(ls, lsAB);
ls += (stride << 1);
@@ -246,7 +246,7 @@
const __m128i coeffB = xx_loadu_128(cf + 4);
const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
+ const __m128i absAB8 = _mm_packus_epi16(absAB, zeros);
xx_storeu_128(ls, absAB8);
ls += stride;
cf += width;
@@ -264,7 +264,7 @@
const __m128i coeffCD = _mm_packs_epi32(coeffC, coeffD);
const __m128i absAB = _mm_abs_epi16(coeffAB);
const __m128i absCD = _mm_abs_epi16(coeffCD);
- const __m128i absABCD = _mm_packs_epi16(absAB, absCD);
+ const __m128i absABCD = _mm_packus_epi16(absAB, absCD);
xx_storeu_128(ls + j, absABCD);
j += 16;
cf += 16;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 659c356..3d308c3 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -339,6 +339,7 @@
"Keep optical flow refined MVs in TMVP list.")
set_aom_config_var(CONFIG_AFFINE_REFINEMENT 1
"Decoder side affine motion refinement.")
+set_aom_config_var(CONFIG_ADAPTIVE_HR 1 "AV2 new adaptive HR coefficient level coding.")
#
# Variables in this section control optional features of the build system.
#