Merge "removed the idct rtcd idct calls" into experimental
diff --git a/vp9/decoder/decodframe.c b/vp9/decoder/decodframe.c
index a579894..625053e 100644
--- a/vp9/decoder/decodframe.c
+++ b/vp9/decoder/decodframe.c
@@ -288,13 +288,7 @@
       xd->eobs[i] = 0;
     }
 
-    if (tx_size == TX_16X16) {
-      eobtotal = vp9_decode_mb_tokens_16x16(pbi, xd, bc);
-    } else if (tx_size == TX_8X8) {
-      eobtotal = vp9_decode_mb_tokens_8x8(pbi, xd, bc);
-    } else {
-      eobtotal = vp9_decode_mb_tokens_4x4(pbi, xd, bc);
-    }
+    eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
     if (eobtotal == 0) {  // skip loopfilter
       xd->mode_info_context->mbmi.mb_skip_coeff = 1;
       continue;
@@ -392,12 +386,8 @@
       xd->block[i].eob = 0;
       xd->eobs[i] = 0;
     }
-    if (tx_size == TX_16X16) {
-      eobtotal = vp9_decode_mb_tokens_16x16(pbi, xd, bc);
-    } else if (tx_size == TX_8X8) {
-      eobtotal = vp9_decode_mb_tokens_8x8(pbi, xd, bc);
-    } else if (mode != B_PRED) {
-      eobtotal = vp9_decode_mb_tokens_4x4(pbi, xd, bc);
+    if (mode != B_PRED) {
+      eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
     }
   }
 
diff --git a/vp9/decoder/detokenize.c b/vp9/decoder/detokenize.c
index 1194aa8..b6823e7 100644
--- a/vp9/decoder/detokenize.c
+++ b/vp9/decoder/detokenize.c
@@ -124,14 +124,14 @@
                         PLANE_TYPE type,
                         TX_TYPE tx_type,
                         int seg_eob, INT16 *qcoeff_ptr,
-                        const int *const scan, int block_type,
+                        const int *const scan, TX_SIZE txfm_size,
                         const int *coef_bands) {
   FRAME_CONTEXT *const fc = &dx->common.fc;
   int pt, c = (type == PLANE_TYPE_Y_NO_DC);
   vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][ENTROPY_NODES], *prob;
   unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
 
-  switch (block_type) {
+  switch (txfm_size) {
     default:
     case TX_4X4:
       if (tx_type == DCT_DCT) {
@@ -246,8 +246,7 @@
   return c;
 }
 
-
-int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) {
+static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) {
   int active = vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB);
   int eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
 
@@ -257,132 +256,117 @@
 }
 
 
-int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
-                               MACROBLOCKD* const xd,
-                               BOOL_DECODER* const bc) {
+static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
+                                      MACROBLOCKD* const xd,
+                                      BOOL_DECODER* const bc) {
   ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
   ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
-
   unsigned short* const eobs = xd->eobs;
-  PLANE_TYPE type;
-  int c, i, eobtotal = 0, seg_eob;
   const int segment_id = xd->mode_info_context->mbmi.segment_id;
-  INT16 *qcoeff_ptr = &xd->qcoeff[0];
-  TX_TYPE tx_type = get_tx_type(xd, &xd->block[0]);
-
-  type = PLANE_TYPE_Y_WITH_DC;
-  seg_eob = get_eob(xd, segment_id, 256);
+  int c, i, eobtotal = 0, seg_eob;
 
   // Luma block
-  {
-    const int* const scan = vp9_default_zig_zag1d_16x16;
-    eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, type,
-                               tx_type, seg_eob, qcoeff_ptr,
-                               scan, TX_16X16, vp9_coef_bands_16x16);
-    A[1] = A[2] = A[3] = A[0];
-    L[1] = L[2] = L[3] = L[0];
-    eobtotal += c;
-  }
+  eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, PLANE_TYPE_Y_WITH_DC,
+                             get_tx_type(xd, &xd->block[0]),
+                             get_eob(xd, segment_id, 256),
+                             xd->qcoeff, vp9_default_zig_zag1d_16x16,
+                             TX_16X16, vp9_coef_bands_16x16);
+  A[1] = A[2] = A[3] = A[0];
+  L[1] = L[2] = L[3] = L[0];
+  eobtotal += c;
 
   // 8x8 chroma blocks
-  qcoeff_ptr += 256;
-  type = PLANE_TYPE_UV;
-  tx_type = DCT_DCT;
   seg_eob = get_eob(xd, segment_id, 64);
   for (i = 16; i < 24; i += 4) {
     ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i];
     ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i];
-    const int* const scan = vp9_default_zig_zag1d_8x8;
 
-    eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type,
-                               tx_type, seg_eob, qcoeff_ptr,
-                               scan, TX_8X8, vp9_coef_bands_8x8);
+    eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
+                               DCT_DCT, seg_eob, xd->block[i].qcoeff,
+                               vp9_default_zig_zag1d_8x8,
+                               TX_8X8, vp9_coef_bands_8x8);
     a[1] = a[0];
     l[1] = l[0];
-
     eobtotal += c;
-    qcoeff_ptr += 64;
   }
+
+  // no Y2 block
   vpx_memset(&A[8], 0, sizeof(A[8]));
   vpx_memset(&L[8], 0, sizeof(L[8]));
+
   return eobtotal;
 }
 
-int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
-                             MACROBLOCKD* const xd,
-                             BOOL_DECODER* const bc) {
+static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
+                                    MACROBLOCKD* const xd,
+                                    BOOL_DECODER* const bc) {
   ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
   ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
-
   unsigned short *const eobs = xd->eobs;
   PLANE_TYPE type;
   int c, i, eobtotal = 0, seg_eob;
   const int segment_id = xd->mode_info_context->mbmi.segment_id;
-  INT16 *qcoeff_ptr = &xd->qcoeff[0];
-  TX_TYPE tx_type = DCT_DCT;
 
-  int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
-                  xd->mode_info_context->mbmi.mode == SPLITMV) ? 16 : 24;
+  // 2nd order DC block
   if (xd->mode_info_context->mbmi.mode != B_PRED &&
       xd->mode_info_context->mbmi.mode != SPLITMV &&
       xd->mode_info_context->mbmi.mode != I8X8_PRED) {
     ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[24];
     ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[24];
-    const int *const scan = vp9_default_zig_zag1d;
-    type = PLANE_TYPE_Y2;
 
-    seg_eob = get_eob(xd, segment_id, 4);
-    eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, type,
-                                tx_type, seg_eob, qcoeff_ptr + 24 * 16,
-                                scan, TX_8X8, vp9_coef_bands);
-
+    eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_Y2,
+                                DCT_DCT, get_eob(xd, segment_id, 4),
+                                xd->block[24].qcoeff,
+                                vp9_default_zig_zag1d, TX_8X8, vp9_coef_bands);
     eobtotal += c - 4;
-
     type = PLANE_TYPE_Y_NO_DC;
-  } else
+  } else {
     type = PLANE_TYPE_Y_WITH_DC;
-
-  seg_eob = get_eob(xd, segment_id, 64);
-
-  for (i = 0; i < bufthred ; i += 4) {
-    ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i];
-    ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i];
-    const int *const scan = vp9_default_zig_zag1d_8x8;
-    tx_type = DCT_DCT;
-
-    if (i == 16)
-      type = PLANE_TYPE_UV;
-    if (type == PLANE_TYPE_Y_WITH_DC) {
-      tx_type = get_tx_type(xd, xd->block + i);
-    }
-
-    eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type,
-                               tx_type, seg_eob, qcoeff_ptr,
-                               scan, TX_8X8, vp9_coef_bands_8x8);
-    a[1] = a[0];
-    l[1] = l[0];
-
-    eobtotal += c;
-    qcoeff_ptr += 64;
   }
 
-  if (bufthred == 16) {
-    type = PLANE_TYPE_UV;
-    tx_type = DCT_DCT;
-    seg_eob = get_eob(xd, segment_id, 16);
+  // luma blocks
+  seg_eob = get_eob(xd, segment_id, 64);
+  for (i = 0; i < 16; i += 4) {
+    ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i];
+    ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i];
 
-    // use 4x4 transform for U, V components in I8X8 prediction mode
+    eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type,
+                               type == PLANE_TYPE_Y_WITH_DC ?
+                                 get_tx_type(xd, xd->block + i) : DCT_DCT,
+                               seg_eob, xd->block[i].qcoeff,
+                               vp9_default_zig_zag1d_8x8,
+                               TX_8X8, vp9_coef_bands_8x8);
+    a[1] = a[0];
+    l[1] = l[0];
+    eobtotal += c;
+  }
+
+  // chroma blocks
+  if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+      xd->mode_info_context->mbmi.mode == SPLITMV) {
+    // use 4x4 transform for U, V components in I8X8/splitmv prediction mode
+    seg_eob = get_eob(xd, segment_id, 16);
     for (i = 16; i < 24; i++) {
       ENTROPY_CONTEXT *const a = A + vp9_block2above[i];
       ENTROPY_CONTEXT *const l = L + vp9_block2left[i];
-      const int *scan = vp9_default_zig_zag1d;
 
-      eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type,
-                                 tx_type, seg_eob, qcoeff_ptr,
-                                 scan, TX_4X4, vp9_coef_bands);
-
+      eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
+                                 DCT_DCT, seg_eob, xd->block[i].qcoeff,
+                                 vp9_default_zig_zag1d, TX_4X4, vp9_coef_bands);
       eobtotal += c;
-      qcoeff_ptr += 16;
+    }
+  } else {
+    for (i = 16; i < 24; i += 4) {
+      ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i];
+      ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i];
+
+      eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
+                                 DCT_DCT, seg_eob, xd->block[i].qcoeff,
+                                 vp9_default_zig_zag1d_8x8,
+                                 TX_8X8, vp9_coef_bands_8x8);
+      a[1] = a[0];
+      l[1] = l[0];
+      eobtotal += c;
     }
   }
 
@@ -435,9 +419,9 @@
   return eobtotal;
 }
 
-int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx,
-                             MACROBLOCKD* const xd,
-                             BOOL_DECODER* const bc) {
+static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx,
+                                    MACROBLOCKD* const xd,
+                                    BOOL_DECODER* const bc) {
   int i, eobtotal = 0;
   PLANE_TYPE type;
 
@@ -456,3 +440,21 @@
 
   return eobtotal + vp9_decode_mb_tokens_4x4_uv(dx, xd, bc);
 }
+
+int vp9_decode_mb_tokens(VP9D_COMP* const dx,
+                         MACROBLOCKD* const xd,
+                         BOOL_DECODER* const bc) {
+  const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+  int eobtotal;
+
+  if (tx_size == TX_16X16) {
+    eobtotal = vp9_decode_mb_tokens_16x16(dx, xd, bc);
+  } else if (tx_size == TX_8X8) {
+    eobtotal = vp9_decode_mb_tokens_8x8(dx, xd, bc);
+  } else {
+    assert(tx_size == TX_4X4);
+    eobtotal = vp9_decode_mb_tokens_4x4(dx, xd, bc);
+  }
+
+  return eobtotal;
+}
diff --git a/vp9/decoder/detokenize.h b/vp9/decoder/detokenize.h
index a8f78f4..9f00d29 100644
--- a/vp9/decoder/detokenize.h
+++ b/vp9/decoder/detokenize.h
@@ -20,16 +20,10 @@
                          BOOL_DECODER* const bc,
                          PLANE_TYPE type, int i);
 
-int vp9_decode_mb_tokens_4x4(VP9D_COMP* const, MACROBLOCKD* const,
-                             BOOL_DECODER* const);
+int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const,
+                         BOOL_DECODER* const);
 
 int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd,
                                 BOOL_DECODER* const bc);
 
-int vp9_decode_mb_tokens_8x8(VP9D_COMP* const, MACROBLOCKD* const,
-                             BOOL_DECODER* const);
-
-int vp9_decode_mb_tokens_16x16(VP9D_COMP* const, MACROBLOCKD* const,
-                               BOOL_DECODER* const);
-
 #endif /* DETOKENIZE_H */
diff --git a/vp9/encoder/rdopt.c b/vp9/encoder/rdopt.c
index 5dee848..a3e5b75 100644
--- a/vp9/encoder/rdopt.c
+++ b/vp9/encoder/rdopt.c
@@ -4620,7 +4620,8 @@
 #endif
 
     // Test best rd so far against threshold for trying this mode.
-    if (best_rd <= cpi->rd_threshes[mode_index]) {
+    if (best_rd <= cpi->rd_threshes[mode_index] ||
+        cpi->rd_threshes[mode_index] == INT_MAX) {
       continue;
     }