Remove obsoleted skip_block for highbd_quantize_fp 1. Remove skip_block from function parameter list 2. Remove branch for skip_block == 1 3. Function list av1_highbd_quantize_fp_{c,sse4_1,avx2} Change-Id: I84dbe12354c70d62f35b0be1d39bdd07254f51d7

commit: 0dcd23c81923f04d5a820f27b1cfb6101eae3e7c [log] [tgz]
author: Peng Bin <binpengsmail@gmail.com> Thu Apr 19 21:42:30 2018 +0800
committer: Angie Chiang <angiebird@google.com> Fri Apr 20 21:48:01 2018 +0000
tree: 950c4b031a5cf373a091392ebed00dc1c9fb9eeb
parent: 437cbaef5fad93e5a4cd4cc798a0bec3f93b8a12 [diff]
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 1278a90..6df0287 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -227,7 +227,7 @@
 
   add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
 
-  add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+  add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
   specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
 
   add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";

diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 0264951..0adbc2a 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c

@@ -113,8 +113,8 @@
 }
 
 static void highbd_quantize_fp_helper_c(
-    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
@@ -128,68 +128,61 @@
   (void)quant_shift_ptr;
   (void)iscan;
 
-  if (!skip_block) {
-    if (qm_ptr || iqm_ptr) {
-      // Quantization pass: All coefficients with index >= zero_flag are
-      // skippable. Note: zero_flag can be zero.
-      for (i = 0; i < count; i++) {
-        const int rc = scan[i];
-        const int coeff = coeff_ptr[rc];
-        const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
-        const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
-        const int dequant =
-            (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-            AOM_QM_BITS;
-        const int coeff_sign = (coeff >> 31);
-        const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-        int abs_qcoeff = 0;
-        if (abs_coeff * wt >=
-            (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
-          const int64_t tmp =
-              abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
-          abs_qcoeff =
-              (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
-          qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-          const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
-          dqcoeff_ptr[rc] =
-              (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-          if (abs_qcoeff) eob = i;
-        } else {
-          qcoeff_ptr[rc] = 0;
-          dqcoeff_ptr[rc] = 0;
-        }
-      }
-    } else {
-      const int log_scaled_round_arr[2] = {
-        ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
-        ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
-      };
-      for (i = 0; i < count; i++) {
-        const int rc = scan[i];
-        const int coeff = coeff_ptr[rc];
-        const int rc01 = (rc != 0);
-        const int coeff_sign = (coeff >> 31);
-        const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-        const int log_scaled_round = log_scaled_round_arr[rc01];
-        if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
-          const int quant = quant_ptr[rc01];
-          const int dequant = dequant_ptr[rc01];
-          const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
-          const int abs_qcoeff = (int)((tmp * quant) >> shift);
-          qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-          const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
-          if (abs_qcoeff) eob = i;
-          dqcoeff_ptr[rc] =
-              (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-        } else {
-          qcoeff_ptr[rc] = 0;
-          dqcoeff_ptr[rc] = 0;
-        }
+  if (qm_ptr || iqm_ptr) {
+    // Quantization pass: All coefficients with index >= zero_flag are
+    // skippable. Note: zero_flag can be zero.
+    for (i = 0; i < count; i++) {
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int dequant =
+          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+          AOM_QM_BITS;
+      const int coeff_sign = (coeff >> 31);
+      const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      int abs_qcoeff = 0;
+      if (abs_coeff * wt >=
+          (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
+        const int64_t tmp =
+            abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
+        abs_qcoeff =
+            (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
+        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+        const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+        dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+        if (abs_qcoeff) eob = i;
+      } else {
+        qcoeff_ptr[rc] = 0;
+        dqcoeff_ptr[rc] = 0;
       }
     }
   } else {
-    memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
-    memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+    const int log_scaled_round_arr[2] = {
+      ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
+      ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
+    };
+    for (i = 0; i < count; i++) {
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
+      const int rc01 = (rc != 0);
+      const int coeff_sign = (coeff >> 31);
+      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      const int log_scaled_round = log_scaled_round_arr[rc01];
+      if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
+        const int quant = quant_ptr[rc01];
+        const int dequant = dequant_ptr[rc01];
+        const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
+        const int abs_qcoeff = (int)((tmp * quant) >> shift);
+        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+        const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+        if (abs_qcoeff) eob = i;
+        dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+      } else {
+        qcoeff_ptr[rc] = 0;
+        dqcoeff_ptr[rc] = 0;
+      }
+    }
   }
   *eob_ptr = eob + 1;
 }
@@ -381,30 +374,27 @@
                                    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
                                    const SCAN_ORDER *sc,
                                    const QUANT_PARAM *qparam) {
-  // obsolete skip_block
-  const int skip_block = 0;
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
   if (qm_ptr != NULL && iqm_ptr != NULL) {
-    highbd_quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                                p->round_fp_QTX, p->quant_fp_QTX,
-                                p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-                                p->dequant_QTX, eob_ptr, sc->scan, sc->iscan,
-                                qm_ptr, iqm_ptr, qparam->log_scale);
+    highbd_quantize_fp_helper_c(
+        coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
+        p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
+        sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
   } else {
     if (n_coeffs < 16) {
       // TODO(jingning): Need SIMD implementation for smaller block size
       // quantization.
       av1_highbd_quantize_fp_c(
-          coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_fp_QTX,
-          p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-          p->dequant_QTX, eob_ptr, sc->scan, sc->iscan, qparam->log_scale);
+          coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
+          p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
+          sc->scan, sc->iscan, qparam->log_scale);
       return;
     }
-    av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
-                           p->round_fp_QTX, p->quant_fp_QTX, p->quant_shift_QTX,
-                           qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
-                           sc->scan, sc->iscan, qparam->log_scale);
+    av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
+                           p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                           dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                           sc->iscan, qparam->log_scale);
   }
 }
 
@@ -507,16 +497,18 @@
                      qparam->log_scale);
 }
 
-void av1_highbd_quantize_fp_c(
-    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, int log_scale) {
-  highbd_quantize_fp_helper_c(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                              quant_ptr, quant_shift_ptr, qcoeff_ptr,
-                              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
-                              NULL, NULL, log_scale);
+void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
+                              const int16_t *zbin_ptr, const int16_t *round_ptr,
+                              const int16_t *quant_ptr,
+                              const int16_t *quant_shift_ptr,
+                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                              const int16_t *scan, const int16_t *iscan,
+                              int log_scale) {
+  highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
+                              quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+                              dequant_ptr, eob_ptr, scan, iscan, NULL, NULL,
+                              log_scale);
 }
 
 static void invert_quant(int16_t *quant, int16_t *shift, int d) {

diff --git a/av1/encoder/x86/av1_highbd_quantize_avx2.c b/av1/encoder/x86/av1_highbd_quantize_avx2.c
index 4cb1f2e..8dc99cf 100644
--- a/av1/encoder/x86/av1_highbd_quantize_avx2.c
+++ b/av1/encoder/x86/av1_highbd_quantize_avx2.c

@@ -87,8 +87,8 @@
 }
 
 void av1_highbd_quantize_fp_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, int log_scale) {
@@ -96,14 +96,23 @@
   (void)zbin_ptr;
   (void)quant_shift_ptr;
   const unsigned int step = 8;
+  __m256i qp[3], coeff;
 
-  if (LIKELY(!skip_block)) {
-    __m256i qp[3], coeff;
+  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp);
+  coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
 
-    init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp);
+  __m256i eob = _mm256_setzero_si256();
+  quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan += step;
+  n_coeffs -= step;
+
+  update_qp(qp);
+  while (n_coeffs > 0) {
     coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-
-    __m256i eob = _mm256_setzero_si256();
     quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
 
     coeff_ptr += step;
@@ -111,39 +120,17 @@
     dqcoeff_ptr += step;
     iscan += step;
     n_coeffs -= step;
-
-    update_qp(qp);
-    while (n_coeffs > 0) {
-      coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-      quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-      coeff_ptr += step;
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      iscan += step;
-      n_coeffs -= step;
-    }
-    {
-      __m256i eob_s;
-      eob_s = _mm256_shuffle_epi32(eob, 0xe);
-      eob = _mm256_max_epi16(eob, eob_s);
-      eob_s = _mm256_shufflelo_epi16(eob, 0xe);
-      eob = _mm256_max_epi16(eob, eob_s);
-      eob_s = _mm256_shufflelo_epi16(eob, 1);
-      eob = _mm256_max_epi16(eob, eob_s);
-      const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
-                                              _mm256_extractf128_si256(eob, 1));
-      *eob_ptr = _mm_extract_epi16(final_eob, 0);
-    }
-  } else {
-    do {
-      const __m256i zero = _mm256_setzero_si256();
-      _mm256_storeu_si256((__m256i *)qcoeff_ptr, zero);
-      _mm256_storeu_si256((__m256i *)dqcoeff_ptr, zero);
-      qcoeff_ptr += step;
-      dqcoeff_ptr += step;
-      n_coeffs -= step;
-    } while (n_coeffs > 0);
-    *eob_ptr = 0;
+  }
+  {
+    __m256i eob_s;
+    eob_s = _mm256_shuffle_epi32(eob, 0xe);
+    eob = _mm256_max_epi16(eob, eob_s);
+    eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+    eob = _mm256_max_epi16(eob, eob_s);
+    eob_s = _mm256_shufflelo_epi16(eob, 1);
+    eob = _mm256_max_epi16(eob, eob_s);
+    const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
+                                            _mm256_extractf128_si256(eob, 1));
+    *eob_ptr = _mm_extract_epi16(final_eob, 0);
   }
 }

diff --git a/av1/encoder/x86/av1_highbd_quantize_sse4.c b/av1/encoder/x86/av1_highbd_quantize_sse4.c
index da8bb37..eaebb83 100644
--- a/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ b/av1/encoder/x86/av1_highbd_quantize_sse4.c

@@ -111,8 +111,8 @@
 }
 
 void av1_highbd_quantize_fp_sse4_1(
-    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, int log_scale) {
@@ -124,7 +124,6 @@
   const int shift = 16 - log_scale;
   const int coeff_stride = 4;
   const int quan_stride = coeff_stride;
-  (void)skip_block;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
   (void)scan;
@@ -132,31 +131,54 @@
   memset(quanAddr, 0, count * sizeof(quanAddr[0]));
   memset(dquanAddr, 0, count * sizeof(dquanAddr[0]));
 
-  if (!skip_block) {
+  coeff[0] = _mm_loadu_si128((__m128i const *)src);
+  const int round1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
+  const int round0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
+
+  qparam[0] = _mm_set_epi32(round1, round1, round1, round0);
+  qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[0]);
+  qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[0]);
+  qparam[3] = _mm_set_epi32(dequant_ptr[1], dequant_ptr[1], dequant_ptr[1],
+                            dequant_ptr[0]);
+
+  // DC and first 3 AC
+  quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
+                        &coeff_sign);
+
+  // update round/quan/dquan for AC
+  qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
+  qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[1]);
+  qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[1]);
+  qparam[3] = _mm_set1_epi32(dequant_ptr[1]);
+  quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
+                        quanAddr, dquanAddr);
+
+  // next 4 AC
+  coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
+  quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
+                        &coeff_sign);
+  quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
+                        quanAddr + quan_stride, dquanAddr + quan_stride);
+
+  find_eob(quanAddr, iscan, &eob);
+
+  count -= 8;
+
+  // loop for the rest of AC
+  while (count > 0) {
+    src += coeff_stride << 1;
+    quanAddr += quan_stride << 1;
+    dquanAddr += quan_stride << 1;
+    iscan += quan_stride << 1;
+
     coeff[0] = _mm_loadu_si128((__m128i const *)src);
-    const int round1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
-    const int round0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
+    coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
 
-    qparam[0] = _mm_set_epi32(round1, round1, round1, round0);
-    qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[0]);
-    qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[0]);
-    qparam[3] = _mm_set_epi32(dequant_ptr[1], dequant_ptr[1], dequant_ptr[1],
-                              dequant_ptr[0]);
-
-    // DC and first 3 AC
     quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
                           &coeff_sign);
-
-    // update round/quan/dquan for AC
-    qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
-    qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[1]);
-    qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[1]);
-    qparam[3] = _mm_set1_epi32(dequant_ptr[1]);
     quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
                           log_scale, quanAddr, dquanAddr);
 
-    // next 4 AC
-    coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
     quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
                           &coeff_sign);
     quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
@@ -166,34 +188,6 @@
     find_eob(quanAddr, iscan, &eob);
 
     count -= 8;
-
-    // loop for the rest of AC
-    while (count > 0) {
-      src += coeff_stride << 1;
-      quanAddr += quan_stride << 1;
-      dquanAddr += quan_stride << 1;
-      iscan += quan_stride << 1;
-
-      coeff[0] = _mm_loadu_si128((__m128i const *)src);
-      coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
-
-      quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff,
-                            dequant, &coeff_sign);
-      quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
-                            log_scale, quanAddr, dquanAddr);
-
-      quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff,
-                            dequant, &coeff_sign);
-      quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
-                            log_scale, quanAddr + quan_stride,
-                            dquanAddr + quan_stride);
-
-      find_eob(quanAddr, iscan, &eob);
-
-      count -= 8;
-    }
-    *eob_ptr = get_accumulated_eob(&eob);
-  } else {
-    *eob_ptr = 0;
   }
+  *eob_ptr = get_accumulated_eob(&eob);
 }

diff --git a/test/av1_quantize_test.cc b/test/av1_quantize_test.cc
index f594a64..e6bd3d6 100644
--- a/test/av1_quantize_test.cc
+++ b/test/av1_quantize_test.cc

@@ -22,8 +22,8 @@
 namespace {
 
 typedef void (*QuantizeFpFunc)(
-    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, int log_scale);
@@ -63,7 +63,6 @@
     uint16_t ref_eob;
     int err_count_total = 0;
     int first_failure = -1;
-    int skip_block = 0;
     int count = params_.coeffCount;
     const TX_SIZE txSize = getTxSize(count);
     int log_scale = (txSize == TX_32X32);
@@ -93,12 +92,12 @@
         quant_ptr[j] = quant_ptr[1];
         round_ptr[j] = round_ptr[1];
       }
-      quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+      quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                   quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
                   &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
 
       ASM_REGISTER_STATE_CHECK(
-          quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+          quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                    quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
                    scanOrder.scan, scanOrder.iscan, log_scale));
 
@@ -137,7 +136,6 @@
     DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
     uint16_t eob;
     uint16_t ref_eob;
-    int skip_block = 0;
     int count = params_.coeffCount;
     const TX_SIZE txSize = getTxSize(count);
     int log_scale = (txSize == TX_32X32);
@@ -171,12 +169,12 @@
         round_ptr[j] = round_ptr[1];
       }
 
-      quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+      quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                   quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
                   &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
 
       ASM_REGISTER_STATE_CHECK(
-          quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+          quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                    quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
                    scanOrder.scan, scanOrder.iscan, log_scale));
       EXPECT_EQ(ref_eob, eob) << "eob error: "

diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 5318f21..ffd6a0e 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc

@@ -42,13 +42,12 @@
       const int16_t *scan, const int16_t *iscan
 
 typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
-typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
+typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST_NO_SKIP, int log_scale);
 typedef void (*QuantizeFuncNoSkip)(QUAN_PARAM_LIST_NO_SKIP);
 
 #define HBD_QUAN_FUNC                                                      \
-  fn(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr,      \
-     quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, \
-     iscan, log_scale)
+  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
+     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
 
 #define LBD_QUAN_FUNC_NO_SKIP                                              \
   fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
@@ -56,18 +55,21 @@
 
 template <QuantizeFuncHbd fn>
 void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
   const int log_scale = 0;
   HBD_QUAN_FUNC;
 }
 
 template <QuantizeFuncHbd fn>
 void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
   const int log_scale = 1;
   HBD_QUAN_FUNC;
 }
 
 template <QuantizeFuncHbd fn>
 void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
   const int log_scale = 2;
   HBD_QUAN_FUNC;
 }
@@ -80,8 +82,8 @@
 
 typedef enum { TYPE_B, TYPE_DC, TYPE_FP } QuantType;
 
-typedef ::testing::tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType,
-                         aom_bit_depth_t>
+using ::testing::tuple;
+typedef tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType, aom_bit_depth_t>
     QuantizeParam;
 
 typedef struct {
commit	0dcd23c81923f04d5a820f27b1cfb6101eae3e7c	[log] [tgz]
author	Peng Bin <binpengsmail@gmail.com>	Thu Apr 19 21:42:30 2018 +0800
committer	Angie Chiang <angiebird@google.com>	Fri Apr 20 21:48:01 2018 +0000
tree	950c4b031a5cf373a091392ebed00dc1c9fb9eeb
parent	437cbaef5fad93e5a4cd4cc798a0bec3f93b8a12 [diff]