Remove obsoleted skip_block for highbd_quantize_b 1. Remove skip_block from function parameter list 2. Remove branch for skip_block == 1 3. Function list aom_highbd_quantize_b_{c,sse2,avx2} aom_highbd_quantize_b_32x32_{c,sse2,avx2} aom_highbd_quantize_b_64x64_{c,sse2,avx2} Change-Id: I7aef5798997eaa9997d84ae6d2b48d1cea556ed8

commit: 6b9719890853cdc0ddfaf718a65790e81c3ae78a [log] [tgz]
author: Peng Bin <binpengsmail@gmail.com> Fri Sep 07 15:21:40 2018 +0800
committer: Yunqing Wang <yunqingwang@google.com> Fri Sep 07 16:54:37 2018 +0000
tree: 646eeea953ea791a170071977f4e95dc07775417
parent: 16f45be468e52086428e12d2ab8347681237ccf5 [diff]
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 50af6fd..81c9b34 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -529,13 +529,13 @@
 }  # CONFIG_AV1_ENCODER
 
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/aom_highbd_quantize_b sse2 avx2/;
 
-  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/aom_highbd_quantize_b_32x32 sse2/;
 
-  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
 
 }  # CONFIG_AV1_ENCODER
 

diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c
index e1601cc..d8151f0 100644
--- a/aom_dsp/quantize.c
+++ b/aom_dsp/quantize.c

@@ -77,8 +77,8 @@
 }
 
 void highbd_quantize_b_helper_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
@@ -95,42 +95,40 @@
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
 
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
-      const int coeff = coeff_ptr[rc] * wt;
+  // Pre-scan pass
+  for (i = 0; i < n_coeffs; i++) {
+    const int rc = scan[i];
+    const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int coeff = coeff_ptr[rc] * wt;
 
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
-          coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
-        idx_arr[idx++] = i;
-    }
+    // If the coefficient is out of the base ZBIN range, keep it for
+    // quantization.
+    if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
+        coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
+      idx_arr[idx++] = i;
+  }
 
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
-      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
-      const int64_t tmpw = tmp1 * wt;
-      const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-      const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
-                                   (16 - log_scale + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-                AOM_QM_BITS;
-      const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
-      dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
+  // Quantization pass: only process the coefficients selected in
+  // pre-scan pass. Note: idx can be zero.
+  for (i = 0; i < idx; i++) {
+    const int rc = scan[idx_arr[i]];
+    const int coeff = coeff_ptr[rc];
+    const int coeff_sign = (coeff >> 31);
+    const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+    const int64_t tmp1 =
+        abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
+    const int64_t tmpw = tmp1 * wt;
+    const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
+    const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
+                                 (16 - log_scale + AOM_QM_BITS));
+    qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+    dequant =
+        (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+    const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+    dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+    if (abs_qcoeff) eob = idx_arr[i];
   }
   *eob_ptr = eob + 1;
 }
@@ -174,38 +172,38 @@
 }
 
 void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *zbin_ptr, const int16_t *round_ptr,
+                             const int16_t *quant_ptr,
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
                              const int16_t *scan, const int16_t *iscan) {
-  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
-                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+                             quant_ptr, quant_shift_ptr, qcoeff_ptr,
                              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
                              NULL, NULL, 0);
 }
 
 void aom_highbd_quantize_b_32x32_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
-  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
-                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+                             quant_ptr, quant_shift_ptr, qcoeff_ptr,
                              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
                              NULL, NULL, 1);
 }
 
 void aom_highbd_quantize_b_64x64_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
-  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
-                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+                             quant_ptr, quant_shift_ptr, qcoeff_ptr,
                              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
                              NULL, NULL, 2);
 }

diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h
index a47bd05..47e78f3 100644
--- a/aom_dsp/quantize.h
+++ b/aom_dsp/quantize.h

@@ -38,16 +38,16 @@
                       const int16_t *iscan);
 
 void highbd_quantize_b_helper_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
     const qm_val_t *iqm_ptr, const int log_scale);
 
 void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *zbin_ptr, const int16_t *round_ptr,
+                             const int16_t *quant_ptr,
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,

diff --git a/aom_dsp/x86/highbd_quantize_intrin_avx2.c b/aom_dsp/x86/highbd_quantize_intrin_avx2.c
index dea113a..b968920 100644
--- a/aom_dsp/x86/highbd_quantize_intrin_avx2.c
+++ b/aom_dsp/x86/highbd_quantize_intrin_avx2.c

@@ -110,7 +110,7 @@
 }
 
 void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                                int skip_block, const int16_t *zbin_ptr,
+                                const int16_t *zbin_ptr,
                                 const int16_t *round_ptr,
                                 const int16_t *quant_ptr,
                                 const int16_t *quant_shift_ptr,
@@ -120,12 +120,23 @@
   (void)scan;
   const unsigned int step = 8;
 
-  if (LIKELY(!skip_block)) {
-    __m256i qp[5], coeff;
-    init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp);
-    coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+  __m256i qp[5], coeff;
+  init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp);
+  coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
 
-    __m256i eob = _mm256_setzero_si256();
+  __m256i eob = _mm256_setzero_si256();
+  quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan += step;
+  n_coeffs -= step;
+
+  update_qp(qp);
+
+  while (n_coeffs > 0) {
+    coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
     quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
 
     coeff_ptr += step;
@@ -133,40 +144,17 @@
     dqcoeff_ptr += step;
     iscan += step;
     n_coeffs -= step;
-
-    update_qp(qp);
-
-    while (n_coeffs > 0) {
-      coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-      quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-      coeff_ptr += step;
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      iscan += step;
-      n_coeffs -= step;
-    }
-    {
-      __m256i eob_s;
-      eob_s = _mm256_shuffle_epi32(eob, 0xe);
-      eob = _mm256_max_epi16(eob, eob_s);
-      eob_s = _mm256_shufflelo_epi16(eob, 0xe);
-      eob = _mm256_max_epi16(eob, eob_s);
-      eob_s = _mm256_shufflelo_epi16(eob, 1);
-      eob = _mm256_max_epi16(eob, eob_s);
-      const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
-                                              _mm256_extractf128_si256(eob, 1));
-      *eob_ptr = _mm_extract_epi16(final_eob, 0);
-    }
-  } else {
-    do {
-      const __m256i zero = _mm256_setzero_si256();
-      _mm256_storeu_si256((__m256i *)qcoeff_ptr, zero);
-      _mm256_storeu_si256((__m256i *)dqcoeff_ptr, zero);
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      n_coeffs -= step;
-    } while (n_coeffs > 0);
-    *eob_ptr = 0;
+  }
+  {
+    __m256i eob_s;
+    eob_s = _mm256_shuffle_epi32(eob, 0xe);
+    eob = _mm256_max_epi16(eob, eob_s);
+    eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+    eob = _mm256_max_epi16(eob, eob_s);
+    eob_s = _mm256_shufflelo_epi16(eob, 1);
+    eob = _mm256_max_epi16(eob, eob_s);
+    const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
+                                            _mm256_extractf128_si256(eob, 1));
+    *eob_ptr = _mm_extract_epi16(final_eob, 0);
   }
 }

diff --git a/aom_dsp/x86/highbd_quantize_intrin_sse2.c b/aom_dsp/x86/highbd_quantize_intrin_sse2.c
index 5570ca5..58e5f98 100644
--- a/aom_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/aom_dsp/x86/highbd_quantize_intrin_sse2.c

@@ -16,7 +16,7 @@
 #include "aom_ports/mem.h"
 
 void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
-                                int skip_block, const int16_t *zbin_ptr,
+                                const int16_t *zbin_ptr,
                                 const int16_t *round_ptr,
                                 const int16_t *quant_ptr,
                                 const int16_t *quant_shift_ptr,
@@ -41,50 +41,48 @@
   memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
 
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = ((int)count / 4) - 1; i >= 0; i--) {
-      __m128i coeffs, cmp1, cmp2;
-      int test;
-      coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
-      cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
-      cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
-      cmp1 = _mm_and_si128(cmp1, cmp2);
-      test = _mm_movemask_epi8(cmp1);
-      if (test == 0xffff)
-        non_zero_regs--;
-      else
-        break;
-    }
+  // Pre-scan pass
+  for (i = ((int)count / 4) - 1; i >= 0; i--) {
+    __m128i coeffs, cmp1, cmp2;
+    int test;
+    coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+    cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+    cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+    cmp1 = _mm_and_si128(cmp1, cmp2);
+    test = _mm_movemask_epi8(cmp1);
+    if (test == 0xffff)
+      non_zero_regs--;
+    else
+      break;
+  }
 
-    // Quantization pass:
-    for (i = 0; i < non_zero_regs; i++) {
-      __m128i coeffs, coeffs_sign, tmp1, tmp2;
-      int test;
-      int abs_coeff[4];
-      int coeff_sign[4];
+  // Quantization pass:
+  for (i = 0; i < non_zero_regs; i++) {
+    __m128i coeffs, coeffs_sign, tmp1, tmp2;
+    int test;
+    int abs_coeff[4];
+    int coeff_sign[4];
 
-      coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
-      coeffs_sign = _mm_srai_epi32(coeffs, 31);
-      coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
-      tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
-      tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
-      tmp1 = _mm_or_si128(tmp1, tmp2);
-      test = _mm_movemask_epi8(tmp1);
-      _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
-      _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
+    coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+    coeffs_sign = _mm_srai_epi32(coeffs, 31);
+    coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
+    tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
+    tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
+    tmp1 = _mm_or_si128(tmp1, tmp2);
+    test = _mm_movemask_epi8(tmp1);
+    _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
+    _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
 
-      for (j = 0; j < 4; j++) {
-        if (test & (1 << (4 * j))) {
-          int k = 4 * i + j;
-          const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
-          const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
-          const uint32_t abs_qcoeff =
-              (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
-          qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
-          dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
-          if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
-        }
+    for (j = 0; j < 4; j++) {
+      if (test & (1 << (4 * j))) {
+        int k = 4 * i + j;
+        const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
+        const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
+        const uint32_t abs_qcoeff =
+            (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
+        qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
+        dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
+        if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
       }
     }
   }
@@ -92,8 +90,8 @@
 }
 
 void aom_highbd_quantize_b_32x32_sse2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
@@ -116,38 +114,35 @@
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
 
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs / 4; i++) {
-      __m128i coeffs, cmp1, cmp2;
-      int test;
-      coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
-      cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
-      cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
-      cmp1 = _mm_and_si128(cmp1, cmp2);
-      test = _mm_movemask_epi8(cmp1);
-      if (!(test & 0xf)) idx_arr[idx++] = i * 4;
-      if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
-      if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
-      if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
-    }
+  // Pre-scan pass
+  for (i = 0; i < n_coeffs / 4; i++) {
+    __m128i coeffs, cmp1, cmp2;
+    int test;
+    coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+    cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+    cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+    cmp1 = _mm_and_si128(cmp1, cmp2);
+    test = _mm_movemask_epi8(cmp1);
+    if (!(test & 0xf)) idx_arr[idx++] = i * 4;
+    if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
+    if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
+    if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
+  }
 
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = idx_arr[i];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
-      const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-      const uint32_t abs_qcoeff =
-          (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
-      qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
-      if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
-    }
+  // Quantization pass: only process the coefficients selected in
+  // pre-scan pass. Note: idx can be zero.
+  for (i = 0; i < idx; i++) {
+    const int rc = idx_arr[i];
+    const int coeff = coeff_ptr[rc];
+    const int coeff_sign = (coeff >> 31);
+    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+    const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+    const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+    const uint32_t abs_qcoeff =
+        (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
+    qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
+    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+    if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
   }
   *eob_ptr = eob + 1;
 }

diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index d0477b3..f971e56 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c

@@ -392,28 +392,25 @@
                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
                                   const SCAN_ORDER *sc,
                                   const QUANT_PARAM *qparam) {
-  // obsolete skip_block
-  const int skip_block = 0;
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
   if (qm_ptr != NULL && iqm_ptr != NULL) {
-    highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                               p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
-                               qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
-                               sc->scan, sc->iscan, qm_ptr, iqm_ptr,
-                               qparam->log_scale);
+    highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                               p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                               dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                               sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
   } else {
     switch (qparam->log_scale) {
       case 0:
         if (LIKELY(n_coeffs >= 8)) {
-          aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                                p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
-                                qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
-                                eob_ptr, sc->scan, sc->iscan);
+          aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                                p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                                dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                                sc->iscan);
         } else {
           // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
           // quantization
-          aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+          aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX,
                                   p->round_QTX, p->quant_QTX,
                                   p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
                                   p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
@@ -421,15 +418,15 @@
         break;
       case 1:
         aom_highbd_quantize_b_32x32(
-            coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
-            p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-            p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+            coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
+            eob_ptr, sc->scan, sc->iscan);
         break;
       case 2:
         aom_highbd_quantize_b_64x64(
-            coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
-            p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-            p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+            coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
+            eob_ptr, sc->scan, sc->iscan);
         break;
       default: assert(0);
     }

diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 97e73bf..d1c9ed7 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc

@@ -27,15 +27,7 @@
 namespace {
 using libaom_test::ACMRandom;
 
-#define QUAN_PARAM_LIST                                                   \
-  const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,         \
-      const int16_t *zbin_ptr, const int16_t *round_ptr,                  \
-      const int16_t *quant_ptr, const int16_t *quant_shift_ptr,           \
-      tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,                    \
-      const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, \
-      const int16_t *iscan
-
-#define QUAN_PARAM_LIST_NO_SKIP                                               \
+#define QUAN_PARAM_LIST                                                       \
   const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,    \
       const int16_t *round_ptr, const int16_t *quant_ptr,                     \
       const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,                 \
@@ -43,44 +35,34 @@
       const int16_t *scan, const int16_t *iscan
 
 typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
-typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST_NO_SKIP, int log_scale);
-typedef void (*QuantizeFuncNoSkip)(QUAN_PARAM_LIST_NO_SKIP);
+typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
 
 #define HBD_QUAN_FUNC                                                      \
   fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
 
-#define LBD_QUAN_FUNC_NO_SKIP                                              \
+#define LBD_QUAN_FUNC                                                      \
   fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
 
 template <QuantizeFuncHbd fn>
 void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
-  (void)skip_block;
   const int log_scale = 0;
   HBD_QUAN_FUNC;
 }
 
 template <QuantizeFuncHbd fn>
 void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
-  (void)skip_block;
   const int log_scale = 1;
   HBD_QUAN_FUNC;
 }
 
 template <QuantizeFuncHbd fn>
 void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
-  (void)skip_block;
   const int log_scale = 2;
   HBD_QUAN_FUNC;
 }
 
-template <QuantizeFuncNoSkip fn>
-void lowbd_quan_wrapper(QUAN_PARAM_LIST) {
-  (void)skip_block;
-  LBD_QUAN_FUNC_NO_SKIP;
-}
-
 typedef enum { TYPE_B, TYPE_DC, TYPE_FP } QuantType;
 
 using ::testing::tuple;
@@ -125,7 +107,6 @@
   void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
     tran_low_t *coeff_ptr = coeff_;
     const intptr_t n_coeffs = coeff_num();
-    const int skip_block = 0;
 
     tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
     tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
@@ -158,13 +139,13 @@
 
       memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
 
-      quant_ref_(coeff_ptr, n_coeffs, skip_block, zbin, round, quant,
-                 quant_shift, qcoeff_ref, dqcoeff_ref, dequant, &eob[0],
-                 sc->scan, sc->iscan);
+      quant_ref_(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,
+                 qcoeff_ref, dqcoeff_ref, dequant, &eob[0], sc->scan,
+                 sc->iscan);
 
-      ASM_REGISTER_STATE_CHECK(quant_(
-          coeff_ptr, n_coeffs, skip_block, zbin, round, quant, quant_shift,
-          qcoeff, dqcoeff, dequant, &eob[1], sc->scan, sc->iscan));
+      ASM_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, zbin, round, quant,
+                                      quant_shift, qcoeff, dqcoeff, dequant,
+                                      &eob[1], sc->scan, sc->iscan));
 
       for (int j = 0; j < n_coeffs; ++j) {
         ASSERT_EQ(qcoeff_ref[j], qcoeff[j])
@@ -286,7 +267,6 @@
 TEST_P(QuantizeTest, DISABLED_Speed) {
   tran_low_t *coeff_ptr = coeff_;
   const intptr_t n_coeffs = coeff_num();
-  const int skip_block = 0;
 
   tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
   tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
@@ -312,8 +292,8 @@
 
   aom_usec_timer_start(&timer);
   for (int n = 0; n < kNumTests; ++n) {
-    quant_(coeff_ptr, n_coeffs, skip_block, zbin, round_fp, quant_fp,
-           quant_shift, qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan);
+    quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff,
+           dqcoeff, dequant, eob, sc->scan, sc->iscan);
   }
   aom_usec_timer_mark(&timer);
 
@@ -325,33 +305,24 @@
 
 #if HAVE_AVX2
 const QuantizeParam kQParamArrayAvx2[] = {
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X16, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X16, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_4X16, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_4X16, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X4, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X4, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_32X8, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_32X8, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_8X32, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_8X32, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_32X32, TYPE_FP,
-             AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_16X64, TYPE_FP,
-             AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_64X16, TYPE_FP,
-             AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_64x64_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_64x64_avx2>, TX_64X64, TYPE_FP,
-             AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_32X32,
+             TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_16X64,
+             TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_64X16,
+             TYPE_FP, AOM_BITS_8),
+  make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2, TX_64X64,
+             TYPE_FP, AOM_BITS_8),
   make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
              &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
              TYPE_FP, AOM_BITS_8),
@@ -393,20 +364,15 @@
 
 #if HAVE_SSE2
 const QuantizeParam kQParamArraySSE2[] = {
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X16, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X16, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_4X16, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_4X16, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X4, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X4, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_8X32, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_8X32, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
-             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_32X8, TYPE_FP,
+  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_32X8, TYPE_FP,
              AOM_BITS_8),
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
              TYPE_B, AOM_BITS_8),
commit	6b9719890853cdc0ddfaf718a65790e81c3ae78a	[log] [tgz]
author	Peng Bin <binpengsmail@gmail.com>	Fri Sep 07 15:21:40 2018 +0800
committer	Yunqing Wang <yunqingwang@google.com>	Fri Sep 07 16:54:37 2018 +0000
tree	646eeea953ea791a170071977f4e95dc07775417
parent	16f45be468e52086428e12d2ab8347681237ccf5 [diff]