Merge "Adding high-bitdepth intra prediction functions"
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
index 44eba33..70b3009 100644
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -71,6 +71,7 @@
     if (ext_fb_list_[idx].size < min_size) {
       delete [] ext_fb_list_[idx].data;
       ext_fb_list_[idx].data = new uint8_t[min_size];
+      memset(ext_fb_list_[idx].data, 0, min_size);
       ext_fb_list_[idx].size = min_size;
     }
 
diff --git a/vp9/common/vp9_frame_buffers.c b/vp9/common/vp9_frame_buffers.c
index 733b3a9..34795b7 100644
--- a/vp9/common/vp9_frame_buffers.c
+++ b/vp9/common/vp9_frame_buffers.c
@@ -61,6 +61,10 @@
     if (!int_fb_list->int_fb[i].data)
       return -1;
 
+    // This memset is needed for fixing valgrind error from C loop filter
+    // due to access uninitialized memory in frame border. It could be
+    // removed if border is totally removed.
+    vpx_memset(int_fb_list->int_fb[i].data, 0, min_size);
     int_fb_list->int_fb[i].size = min_size;
   }
 
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 76ca1ae..df46f64 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -13,6 +13,7 @@
 
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_entropy.h"
 
 #include "vp9/decoder/vp9_detokenize.h"
 
@@ -31,29 +32,31 @@
 #define INCREMENT_COUNT(token)                              \
   do {                                                      \
      if (!cm->frame_parallel_decoding_mode)                 \
-       ++coef_counts[band][ctx][token];                     \
+       ++coef_counts[band][ctx][token];                      \
   } while (0)
 
-#define WRITE_COEF_CONTINUE(val, token)                  \
-  {                                                      \
-    v = (val * dqv) >> dq_shift;                         \
-    dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v;         \
-    token_cache[scan[c]] = vp9_pt_energy_class[token];   \
-    ++c;                                                 \
-    ctx = get_coef_context(nb, token_cache, c);          \
-    dqv = dq[1];                                         \
-    continue;                                            \
-  }
+static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
+  int i, val = 0;
+  for (i = 0; i < n; ++i)
+    val = (val << 1) | vp9_read(r, probs[i]);
+  return val;
+}
 
-#define ADJUST_COEF(prob, bits_count)                   \
-  do {                                                  \
-    val += (vp9_read(r, prob) << bits_count);           \
-  } while (0)
+static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = {
+  2, 6,                                         /* 0 = LOW_VAL */
+  -TWO_TOKEN, 4,                                /* 1 = TWO */
+  -THREE_TOKEN, -FOUR_TOKEN,                    /* 2 = THREE */
+  8, 10,                                        /* 3 = HIGH_LOW */
+  -CATEGORY1_TOKEN, -CATEGORY2_TOKEN,           /* 4 = CAT_ONE */
+  12, 14,                                       /* 5 = CAT_THREEFOUR */
+  -CATEGORY3_TOKEN, -CATEGORY4_TOKEN,           /* 6 = CAT_THREE */
+  -CATEGORY5_TOKEN, -CATEGORY6_TOKEN            /* 7 = CAT_FIVE */
+};
 
 static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
-                       tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
-                       int ctx, const int16_t *scan, const int16_t *nb,
-                       vp9_reader *r) {
+                        tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+                        int ctx, const int16_t *scan, const int16_t *nb,
+                        vp9_reader *r) {
   const int max_eob = 16 << (tx_size << 1);
   const FRAME_CONTEXT *const fc = &cm->fc;
   FRAME_COUNTS *const counts = &cm->counts;
@@ -69,11 +72,11 @@
   uint8_t token_cache[32 * 32];
   const uint8_t *band_translate = get_band_translate(tx_size);
   const int dq_shift = (tx_size == TX_32X32);
-  int v;
+  int v, token;
   int16_t dqv = dq[0];
 
   while (c < max_eob) {
-    int val;
+    int val = -1;
     band = *band_translate++;
     prob = coef_probs[band][ctx];
     if (!cm->frame_parallel_decoding_mode)
@@ -95,81 +98,46 @@
       prob = coef_probs[band][ctx];
     }
 
-    // ONE_CONTEXT_NODE_0_
     if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
       INCREMENT_COUNT(ONE_TOKEN);
-      WRITE_COEF_CONTINUE(1, ONE_TOKEN);
-    }
-
-    INCREMENT_COUNT(TWO_TOKEN);
-
-    prob = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
-
-    if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) {
-      if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) {
-        WRITE_COEF_CONTINUE(2, TWO_TOKEN);
+      token = ONE_TOKEN;
+      val = 1;
+    } else {
+      INCREMENT_COUNT(TWO_TOKEN);
+      token = vp9_read_tree(r, coeff_subtree_high,
+                            vp9_pareto8_full[prob[PIVOT_NODE] - 1]);
+      switch (token) {
+        case TWO_TOKEN:
+        case THREE_TOKEN:
+        case FOUR_TOKEN:
+          val = token;
+          break;
+        case CATEGORY1_TOKEN:
+          val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r);
+          break;
+        case CATEGORY2_TOKEN:
+          val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r);
+          break;
+        case CATEGORY3_TOKEN:
+          val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r);
+          break;
+        case CATEGORY4_TOKEN:
+          val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r);
+          break;
+        case CATEGORY5_TOKEN:
+          val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r);
+          break;
+        case CATEGORY6_TOKEN:
+          val = CAT6_MIN_VAL + read_coeff(vp9_cat6_prob, 14, r);
+          break;
       }
-      if (!vp9_read(r, prob[THREE_CONTEXT_NODE])) {
-        WRITE_COEF_CONTINUE(3, THREE_TOKEN);
-      }
-      WRITE_COEF_CONTINUE(4, FOUR_TOKEN);
     }
-
-    if (!vp9_read(r, prob[HIGH_LOW_CONTEXT_NODE])) {
-      if (!vp9_read(r, prob[CAT_ONE_CONTEXT_NODE])) {
-        val = CAT1_MIN_VAL;
-        ADJUST_COEF(vp9_cat1_prob[0], 0);
-        WRITE_COEF_CONTINUE(val, CATEGORY1_TOKEN);
-      }
-      val = CAT2_MIN_VAL;
-      ADJUST_COEF(vp9_cat2_prob[0], 1);
-      ADJUST_COEF(vp9_cat2_prob[1], 0);
-      WRITE_COEF_CONTINUE(val, CATEGORY2_TOKEN);
-    }
-
-    if (!vp9_read(r, prob[CAT_THREEFOUR_CONTEXT_NODE])) {
-      if (!vp9_read(r, prob[CAT_THREE_CONTEXT_NODE])) {
-        val = CAT3_MIN_VAL;
-        ADJUST_COEF(vp9_cat3_prob[0], 2);
-        ADJUST_COEF(vp9_cat3_prob[1], 1);
-        ADJUST_COEF(vp9_cat3_prob[2], 0);
-        WRITE_COEF_CONTINUE(val, CATEGORY3_TOKEN);
-      }
-      val = CAT4_MIN_VAL;
-      ADJUST_COEF(vp9_cat4_prob[0], 3);
-      ADJUST_COEF(vp9_cat4_prob[1], 2);
-      ADJUST_COEF(vp9_cat4_prob[2], 1);
-      ADJUST_COEF(vp9_cat4_prob[3], 0);
-      WRITE_COEF_CONTINUE(val, CATEGORY4_TOKEN);
-    }
-
-    if (!vp9_read(r, prob[CAT_FIVE_CONTEXT_NODE])) {
-      val = CAT5_MIN_VAL;
-      ADJUST_COEF(vp9_cat5_prob[0], 4);
-      ADJUST_COEF(vp9_cat5_prob[1], 3);
-      ADJUST_COEF(vp9_cat5_prob[2], 2);
-      ADJUST_COEF(vp9_cat5_prob[3], 1);
-      ADJUST_COEF(vp9_cat5_prob[4], 0);
-      WRITE_COEF_CONTINUE(val, CATEGORY5_TOKEN);
-    }
-    val = 0;
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[0]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[1]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[2]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[3]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[4]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[5]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[6]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[7]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[8]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[9]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[10]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[11]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[12]);
-    val = (val << 1) | vp9_read(r, vp9_cat6_prob[13]);
-    val += CAT6_MIN_VAL;
-
-    WRITE_COEF_CONTINUE(val, CATEGORY6_TOKEN);
+    v = (val * dqv) >> dq_shift;
+    dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v;
+    token_cache[scan[c]] = vp9_pt_energy_class[token];
+    ++c;
+    ctx = get_coef_context(nb, token_cache, c);
+    dqv = dq[1];
   }
 
   return c;
diff --git a/vpx/vpx_frame_buffer.h b/vpx/vpx_frame_buffer.h
index e69df4b..41038b1 100644
--- a/vpx/vpx_frame_buffer.h
+++ b/vpx/vpx_frame_buffer.h
@@ -43,15 +43,15 @@
  *
  * This callback is invoked by the decoder to retrieve data for the frame
  * buffer in order for the decode call to complete. The callback must
- * allocate at least min_size in bytes and assign it to fb->data. Then the
- * callback must set fb->size to the allocated size. The application does not
- * need to align the allocated data. The callback is triggered when the
- * decoder needs a frame buffer to decode a compressed image into. This
- * function may be called more than once for every call to vpx_codec_decode.
- * The application may set fb->priv to some data which will be passed
- * back in the ximage and the release function call. |fb| is guaranteed to
- * not be NULL. On success the callback must return 0. Any failure the
- * callback must return a value less than 0.
+ * allocate at least min_size in bytes and assign it to fb->data. The callback
+ * must zero out all the data allocated. Then the callback must set fb->size
+ * to the allocated size. The application does not need to align the allocated
+ * data. The callback is triggered when the decoder needs a frame buffer to
+ * decode a compressed image into. This function may be called more than once
+ * for every call to vpx_codec_decode. The application may set fb->priv to
+ * some data which will be passed back in the ximage and the release function
+ * call. |fb| is guaranteed to not be NULL. On success the callback must
+ * return 0. Any failure the callback must return a value less than 0.
  *
  * \param[in] priv         Callback's private data
  * \param[in] new_size     Size in bytes needed by the buffer
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 70d7ac0..475d231 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -199,11 +199,6 @@
       if (fb->data == NULL || fb->size < external_frame_size)
         return -1;
 
-      // This memset is needed for fixing valgrind error from C loop filter
-      // due to access uninitialized memory in frame border. It could be
-      // removed if border is totally removed.
-      vpx_memset(fb->data, 0, fb->size);
-
       ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
     } else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
       // Allocation to hold larger frame, or first allocation.
diff --git a/vpxdec.c b/vpxdec.c
index 091522f..cf23c29 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -384,7 +384,7 @@
 
   if (ext_fb_list->ext_fb[i].size < min_size) {
     free(ext_fb_list->ext_fb[i].data);
-    ext_fb_list->ext_fb[i].data = (uint8_t *)malloc(min_size);
+    ext_fb_list->ext_fb[i].data = (uint8_t *)calloc(min_size, sizeof(uint8_t));
     if (!ext_fb_list->ext_fb[i].data)
       return -1;