Remove obsoleted skip_block for quantize_fp 1. Remove skip_block from function parameter list 2. Remove branch for skip_block == 1 3. Function list av1_quantize_fp_{c,sse2,avx2} av1_quantize_fp_32x32{c,avx2} av1_quantize_fp_64x64{c,avx2} Change-Id: Ic0f336fb5caa5a246978dc14a832b2985a4a3102

commit: 437cbaef5fad93e5a4cd4cc798a0bec3f93b8a12 [log] [tgz]
author: Peng Bin <binpengsmail@gmail.com> Thu Apr 19 20:39:22 2018 +0800
committer: Angie Chiang <angiebird@google.com> Fri Apr 20 21:48:01 2018 +0000
tree: 552c06a2e3a593e0f15e36cc3e2e392796cb9baa
parent: c08f949e2f41bcdea4d288d7c63922792cc3c7e5 [diff]
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index acfe4ab..1278a90 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -166,13 +166,13 @@
   add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
   specialize qw/av1_block_error avx2/;
 
-  add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp sse2 avx2/;
 
-  add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp_32x32 avx2/;
 
-  add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/av1_quantize_fp_64x64 avx2/;
 
   # fdct functions

diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 757c9f0..0264951 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c

@@ -32,8 +32,8 @@
 }
 
 static void quantize_fp_helper_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
@@ -47,7 +47,7 @@
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
 
-  if (!skip_block && qm_ptr == NULL && iqm_ptr == NULL) {
+  if (qm_ptr == NULL && iqm_ptr == NULL) {
     const int rounding0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
     {  // rc == 0
       const int coeff = coeff_ptr[0];
@@ -81,7 +81,7 @@
         }
       }
     }
-  } else if (!skip_block) {
+  } else {
     // Quantization pass: All coefficients with index >= zero_flag are
     // skippable. Note: zero_flag can be zero.
     for (i = 0; i < n_coeffs; i++) {
@@ -195,62 +195,58 @@
 }
 
 void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                       int skip_block, const int16_t *zbin_ptr,
-                       const int16_t *round_ptr, const int16_t *quant_ptr,
-                       const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-                       tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                       uint16_t *eob_ptr, const int16_t *scan,
-                       const int16_t *iscan) {
-  quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
+                       const int16_t *zbin_ptr, const int16_t *round_ptr,
+                       const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                       const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                       const int16_t *scan, const int16_t *iscan) {
+  quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+                       quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                       eob_ptr, scan, iscan, NULL, NULL, 0);
 }
 
 void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *zbin_ptr, const int16_t *round_ptr,
+                             const int16_t *quant_ptr,
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
                              const int16_t *scan, const int16_t *iscan) {
-  quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
+  quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+                       quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                       eob_ptr, scan, iscan, NULL, NULL, 1);
 }
 
 void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *zbin_ptr, const int16_t *round_ptr,
+                             const int16_t *quant_ptr,
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
                              const int16_t *scan, const int16_t *iscan) {
-  quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
+  quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+                       quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                       eob_ptr, scan, iscan, NULL, NULL, 2);
 }
 
 void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                             const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                             tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
                             const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
-  // obsolete skip_block
-  const int skip_block = 0;
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
   if (qm_ptr != NULL && iqm_ptr != NULL) {
-    quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                         p->round_fp_QTX, p->quant_fp_QTX, p->quant_shift_QTX,
-                         qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
-                         sc->scan, sc->iscan, qm_ptr, iqm_ptr,
-                         qparam->log_scale);
+    quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
+                         p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                         dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                         sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
   } else {
     switch (qparam->log_scale) {
       case 0:
         if (n_coeffs < 16) {
           // TODO(jingning): Need SIMD implementation for smaller block size
           // quantization.
-          quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+          quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX,
                                p->round_fp_QTX, p->quant_fp_QTX,
                                p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
                                p->dequant_QTX, eob_ptr, sc->scan, sc->iscan,
@@ -258,34 +254,34 @@
         } else {
           if (qparam->tx_size == TX_4X16 || qparam->tx_size == TX_16X4 ||
               qparam->tx_size == TX_8X32 || qparam->tx_size == TX_32X8)
-            av1_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                              p->round_fp_QTX, p->quant_fp_QTX,
-                              p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-                              p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+            av1_quantize_fp_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
+                              p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                              dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                              sc->iscan);
           else
-            av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                            p->round_fp_QTX, p->quant_fp_QTX,
-                            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-                            p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+            av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
+                            p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                            dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                            sc->iscan);
         }
         break;
       case 1:
         if (qparam->tx_size == TX_16X64 || qparam->tx_size == TX_64X16)
-          av1_quantize_fp_32x32_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+          av1_quantize_fp_32x32_c(coeff_ptr, n_coeffs, p->zbin_QTX,
                                   p->round_fp_QTX, p->quant_fp_QTX,
                                   p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
                                   p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
         else
-          av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
+          av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX,
                                 p->round_fp_QTX, p->quant_fp_QTX,
                                 p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
                                 p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
         break;
       case 2:
-        av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                              p->round_fp_QTX, p->quant_fp_QTX,
-                              p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-                              p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
+        av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
+                              p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                              dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                              sc->iscan);
         break;
       default: assert(0);
     }

diff --git a/av1/encoder/x86/av1_quantize_avx2.c b/av1/encoder/x86/av1_quantize_avx2.c
index fd7a081..b572d03 100644
--- a/av1/encoder/x86/av1_quantize_avx2.c
+++ b/av1/encoder/x86/av1_quantize_avx2.c

@@ -132,8 +132,8 @@
 }
 
 void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                          int skip_block, const int16_t *zbin_ptr,
-                          const int16_t *round_ptr, const int16_t *quant_ptr,
+                          const int16_t *zbin_ptr, const int16_t *round_ptr,
+                          const int16_t *quant_ptr,
                           const int16_t *quant_shift_ptr,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                           const int16_t *dequant_ptr, uint16_t *eob_ptr,
@@ -143,15 +143,26 @@
   (void)quant_shift_ptr;
   const unsigned int step = 16;
 
-  if (LIKELY(!skip_block)) {
-    __m256i qp[3];
-    __m256i coeff, thr;
-    const int log_scale = 0;
+  __m256i qp[3];
+  __m256i coeff, thr;
+  const int log_scale = 0;
 
-    init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+  read_coeff(coeff_ptr, &coeff);
+
+  __m256i eob = _mm256_setzero_si256();
+  quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan_ptr += step;
+  n_coeffs -= step;
+
+  update_qp(log_scale, &thr, qp);
+
+  while (n_coeffs > 0) {
     read_coeff(coeff_ptr, &coeff);
-
-    __m256i eob = _mm256_setzero_si256();
     quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
 
     coeff_ptr += step;
@@ -159,30 +170,8 @@
     dqcoeff_ptr += step;
     iscan_ptr += step;
     n_coeffs -= step;
-
-    update_qp(log_scale, &thr, qp);
-
-    while (n_coeffs > 0) {
-      read_coeff(coeff_ptr, &coeff);
-      quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-      coeff_ptr += step;
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      iscan_ptr += step;
-      n_coeffs -= step;
-    }
-    *eob_ptr = quant_gather_eob(eob);
-  } else {
-    do {
-      write_zero(qcoeff_ptr);
-      write_zero(dqcoeff_ptr);
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      n_coeffs -= step;
-    } while (n_coeffs > 0);
-    *eob_ptr = 0;
   }
+  *eob_ptr = quant_gather_eob(eob);
 }
 
 static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp,
@@ -219,8 +208,8 @@
 }
 
 void av1_quantize_fp_32x32_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan_ptr, const int16_t *iscan_ptr) {
@@ -229,15 +218,26 @@
   (void)quant_shift_ptr;
   const unsigned int step = 16;
 
-  if (LIKELY(!skip_block)) {
-    __m256i qp[3];
-    __m256i coeff, thr;
-    const int log_scale = 1;
+  __m256i qp[3];
+  __m256i coeff, thr;
+  const int log_scale = 1;
 
-    init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+  read_coeff(coeff_ptr, &coeff);
+
+  __m256i eob = _mm256_setzero_si256();
+  quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan_ptr += step;
+  n_coeffs -= step;
+
+  update_qp(log_scale, &thr, qp);
+
+  while (n_coeffs > 0) {
     read_coeff(coeff_ptr, &coeff);
-
-    __m256i eob = _mm256_setzero_si256();
     quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
 
     coeff_ptr += step;
@@ -245,31 +245,8 @@
     dqcoeff_ptr += step;
     iscan_ptr += step;
     n_coeffs -= step;
-
-    update_qp(log_scale, &thr, qp);
-
-    while (n_coeffs > 0) {
-      read_coeff(coeff_ptr, &coeff);
-      quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
-                     &eob);
-
-      coeff_ptr += step;
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      iscan_ptr += step;
-      n_coeffs -= step;
-    }
-    *eob_ptr = quant_gather_eob(eob);
-  } else {
-    do {
-      write_zero(qcoeff_ptr);
-      write_zero(dqcoeff_ptr);
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      n_coeffs -= step;
-    } while (n_coeffs > 0);
-    *eob_ptr = 0;
   }
+  *eob_ptr = quant_gather_eob(eob);
 }
 
 static INLINE void quantize_64x64(const __m256i *thr, const __m256i *qp,
@@ -310,8 +287,8 @@
 }
 
 void av1_quantize_fp_64x64_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan_ptr, const int16_t *iscan_ptr) {
@@ -320,15 +297,26 @@
   (void)quant_shift_ptr;
   const unsigned int step = 16;
 
-  if (LIKELY(!skip_block)) {
-    __m256i qp[3];
-    __m256i coeff, thr;
-    const int log_scale = 2;
+  __m256i qp[3];
+  __m256i coeff, thr;
+  const int log_scale = 2;
 
-    init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+  read_coeff(coeff_ptr, &coeff);
+
+  __m256i eob = _mm256_setzero_si256();
+  quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan_ptr += step;
+  n_coeffs -= step;
+
+  update_qp(log_scale, &thr, qp);
+
+  while (n_coeffs > 0) {
     read_coeff(coeff_ptr, &coeff);
-
-    __m256i eob = _mm256_setzero_si256();
     quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
 
     coeff_ptr += step;
@@ -336,29 +324,6 @@
     dqcoeff_ptr += step;
     iscan_ptr += step;
     n_coeffs -= step;
-
-    update_qp(log_scale, &thr, qp);
-
-    while (n_coeffs > 0) {
-      read_coeff(coeff_ptr, &coeff);
-      quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
-                     &eob);
-
-      coeff_ptr += step;
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      iscan_ptr += step;
-      n_coeffs -= step;
-    }
-    *eob_ptr = quant_gather_eob(eob);
-  } else {
-    do {
-      write_zero(qcoeff_ptr);
-      write_zero(dqcoeff_ptr);
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      n_coeffs -= step;
-    } while (n_coeffs > 0);
-    *eob_ptr = 0;
   }
+  *eob_ptr = quant_gather_eob(eob);
 }

diff --git a/av1/encoder/x86/av1_quantize_sse2.c b/av1/encoder/x86/av1_quantize_sse2.c
index 4f7c095..0f10e38 100644
--- a/av1/encoder/x86/av1_quantize_sse2.c
+++ b/av1/encoder/x86/av1_quantize_sse2.c

@@ -68,8 +68,8 @@
 }
 
 void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                          int skip_block, const int16_t *zbin_ptr,
-                          const int16_t *round_ptr, const int16_t *quant_ptr,
+                          const int16_t *zbin_ptr, const int16_t *round_ptr,
+                          const int16_t *quant_ptr,
                           const int16_t *quant_shift_ptr,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                           const int16_t *dequant_ptr, uint16_t *eob_ptr,
@@ -88,39 +88,104 @@
   n_coeffs = -n_coeffs;
   zero = _mm_setzero_si128();
 
-  if (!skip_block) {
-    __m128i eob;
-    __m128i round, quant, dequant;
+  __m128i eob;
+  __m128i round, quant, dequant;
+  {
+    __m128i coeff0, coeff1;
+
+    // Setup global values
     {
-      __m128i coeff0, coeff1;
+      round = _mm_load_si128((const __m128i *)round_ptr);
+      quant = _mm_load_si128((const __m128i *)quant_ptr);
+      dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+    }
 
-      // Setup global values
-      {
-        round = _mm_load_si128((const __m128i *)round_ptr);
-        quant = _mm_load_si128((const __m128i *)quant_ptr);
-        dequant = _mm_load_si128((const __m128i *)dequant_ptr);
-      }
+    {
+      __m128i coeff0_sign, coeff1_sign;
+      __m128i qcoeff0, qcoeff1;
+      __m128i qtmp0, qtmp1;
+      // Do DC and first 15 AC
+      read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
 
-      {
-        __m128i coeff0_sign, coeff1_sign;
-        __m128i qcoeff0, qcoeff1;
-        __m128i qtmp0, qtmp1;
-        // Do DC and first 15 AC
-        read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
+      // Poor man's sign extract
+      coeff0_sign = _mm_srai_epi16(coeff0, 15);
+      coeff1_sign = _mm_srai_epi16(coeff1, 15);
+      qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+      qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+      qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+      qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
 
-        // Poor man's sign extract
-        coeff0_sign = _mm_srai_epi16(coeff0, 15);
-        coeff1_sign = _mm_srai_epi16(coeff1, 15);
-        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+      qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+      round = _mm_unpackhi_epi64(round, round);
+      qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+      qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+      quant = _mm_unpackhi_epi64(quant, quant);
+      qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
 
+      // Reinsert signs
+      qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+      qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+      qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+      qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+      write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
+
+      coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+      dequant = _mm_unpackhi_epi64(dequant, dequant);
+      coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+      write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
+    }
+
+    {
+      // Scan for eob
+      __m128i zero_coeff0, zero_coeff1;
+      __m128i nzero_coeff0, nzero_coeff1;
+      __m128i iscan0, iscan1;
+      __m128i eob1;
+      zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+      zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+      nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+      nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+      iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+      iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+      // Add one to convert from indices to counts
+      iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+      iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+      eob = _mm_and_si128(iscan0, nzero_coeff0);
+      eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+      eob = _mm_max_epi16(eob, eob1);
+    }
+    n_coeffs += 8 * 2;
+  }
+
+  thr = _mm_srai_epi16(dequant, 1);
+
+  // AC only loop
+  while (n_coeffs < 0) {
+    __m128i coeff0, coeff1;
+    {
+      __m128i coeff0_sign, coeff1_sign;
+      __m128i qcoeff0, qcoeff1;
+      __m128i qtmp0, qtmp1;
+
+      read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
+
+      // Poor man's sign extract
+      coeff0_sign = _mm_srai_epi16(coeff0, 15);
+      coeff1_sign = _mm_srai_epi16(coeff1, 15);
+      qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+      qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+      qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+      qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+      nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
+               _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
+
+      if (nzflag) {
         qcoeff0 = _mm_adds_epi16(qcoeff0, round);
-        round = _mm_unpackhi_epi64(round, round);
         qcoeff1 = _mm_adds_epi16(qcoeff1, round);
         qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
-        quant = _mm_unpackhi_epi64(quant, quant);
         qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
 
         // Reinsert signs
@@ -132,121 +197,47 @@
         write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
 
         coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
-        dequant = _mm_unpackhi_epi64(dequant, dequant);
         coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
 
         write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
+      } else {
+        write_zero(qcoeff_ptr, n_coeffs);
+        write_zero(dqcoeff_ptr, n_coeffs);
       }
-
-      {
-        // Scan for eob
-        __m128i zero_coeff0, zero_coeff1;
-        __m128i nzero_coeff0, nzero_coeff1;
-        __m128i iscan0, iscan1;
-        __m128i eob1;
-        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-        // Add one to convert from indices to counts
-        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
-        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
-        eob = _mm_and_si128(iscan0, nzero_coeff0);
-        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
-        eob = _mm_max_epi16(eob, eob1);
-      }
-      n_coeffs += 8 * 2;
     }
 
-    thr = _mm_srai_epi16(dequant, 1);
-
-    // AC only loop
-    while (n_coeffs < 0) {
-      __m128i coeff0, coeff1;
-      {
-        __m128i coeff0_sign, coeff1_sign;
-        __m128i qcoeff0, qcoeff1;
-        __m128i qtmp0, qtmp1;
-
-        read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
-
-        // Poor man's sign extract
-        coeff0_sign = _mm_srai_epi16(coeff0, 15);
-        coeff1_sign = _mm_srai_epi16(coeff1, 15);
-        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
-                 _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
-
-        if (nzflag) {
-          qcoeff0 = _mm_adds_epi16(qcoeff0, round);
-          qcoeff1 = _mm_adds_epi16(qcoeff1, round);
-          qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
-          qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
-
-          // Reinsert signs
-          qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
-          qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
-          qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-          qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-          write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
-
-          coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
-          coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
-          write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
-        } else {
-          write_zero(qcoeff_ptr, n_coeffs);
-          write_zero(dqcoeff_ptr, n_coeffs);
-        }
-      }
-
-      if (nzflag) {
-        // Scan for eob
-        __m128i zero_coeff0, zero_coeff1;
-        __m128i nzero_coeff0, nzero_coeff1;
-        __m128i iscan0, iscan1;
-        __m128i eob0, eob1;
-        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-        // Add one to convert from indices to counts
-        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
-        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
-        eob0 = _mm_and_si128(iscan0, nzero_coeff0);
-        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
-        eob0 = _mm_max_epi16(eob0, eob1);
-        eob = _mm_max_epi16(eob, eob0);
-      }
-      n_coeffs += 8 * 2;
+    if (nzflag) {
+      // Scan for eob
+      __m128i zero_coeff0, zero_coeff1;
+      __m128i nzero_coeff0, nzero_coeff1;
+      __m128i iscan0, iscan1;
+      __m128i eob0, eob1;
+      zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+      zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+      nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+      nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+      iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+      iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+      // Add one to convert from indices to counts
+      iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+      iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+      eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+      eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+      eob0 = _mm_max_epi16(eob0, eob1);
+      eob = _mm_max_epi16(eob, eob0);
     }
+    n_coeffs += 8 * 2;
+  }
 
-    // Accumulate EOB
-    {
-      __m128i eob_shuffled;
-      eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      *eob_ptr = _mm_extract_epi16(eob, 1);
-    }
-  } else {
-    do {
-      write_zero(dqcoeff_ptr, n_coeffs);
-      write_zero(qcoeff_ptr, n_coeffs);
-      n_coeffs += 8 * 2;
-    } while (n_coeffs < 0);
-    *eob_ptr = 0;
+  // Accumulate EOB
+  {
+    __m128i eob_shuffled;
+    eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+    eob = _mm_max_epi16(eob, eob_shuffled);
+    eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+    eob = _mm_max_epi16(eob, eob_shuffled);
+    eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+    eob = _mm_max_epi16(eob, eob_shuffled);
+    *eob_ptr = _mm_extract_epi16(eob, 1);
   }
 }

diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 71c8d94..5318f21 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc

@@ -34,14 +34,26 @@
       const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, \
       const int16_t *iscan
 
+#define QUAN_PARAM_LIST_NO_SKIP                                               \
+  const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,    \
+      const int16_t *round_ptr, const int16_t *quant_ptr,                     \
+      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,                 \
+      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \
+      const int16_t *scan, const int16_t *iscan
+
 typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
 typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
+typedef void (*QuantizeFuncNoSkip)(QUAN_PARAM_LIST_NO_SKIP);
 
 #define HBD_QUAN_FUNC                                                      \
   fn(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr,      \
      quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, \
      iscan, log_scale)
 
+#define LBD_QUAN_FUNC_NO_SKIP                                              \
+  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
+     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
+
 template <QuantizeFuncHbd fn>
 void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
   const int log_scale = 0;
@@ -60,6 +72,12 @@
   HBD_QUAN_FUNC;
 }
 
+template <QuantizeFuncNoSkip fn>
+void lowbd_quan_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
+  LBD_QUAN_FUNC_NO_SKIP;
+}
+
 typedef enum { TYPE_B, TYPE_DC, TYPE_FP } QuantType;
 
 typedef ::testing::tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType,
@@ -297,12 +315,15 @@
 
 #if HAVE_AVX2
 const QuantizeParam kQParamArrayAvx2[] = {
-  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X16, TYPE_FP,
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X16, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_32X32,
-             TYPE_FP, AOM_BITS_8),
-  make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2, TX_64X64,
-             TYPE_FP, AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_32X32, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_64x64_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_64x64_avx2>, TX_64X64, TYPE_FP,
+             AOM_BITS_8),
   make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
              &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
              TYPE_FP, AOM_BITS_8),
@@ -344,7 +365,8 @@
 
 #if HAVE_SSE2
 const QuantizeParam kQParamArraySSE2[] = {
-  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X16, TYPE_FP,
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X16, TYPE_FP,
              AOM_BITS_8),
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
              TYPE_B, AOM_BITS_8),
commit	437cbaef5fad93e5a4cd4cc798a0bec3f93b8a12	[log] [tgz]
author	Peng Bin <binpengsmail@gmail.com>	Thu Apr 19 20:39:22 2018 +0800
committer	Angie Chiang <angiebird@google.com>	Fri Apr 20 21:48:01 2018 +0000
tree	552c06a2e3a593e0f15e36cc3e2e392796cb9baa
parent	c08f949e2f41bcdea4d288d7c63922792cc3c7e5 [diff]