Allocate four |zcoeff_blk| for sub8x8 contexts.

The RD and load save/code grabs it as groups of four. In practice there
is no change to physical allocations becaquse this is backed by a 16-byte
memalign.

Change-Id: I01e89769872300e23227e03dd24a6e229f482025
diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c
index 7945ee0..fbff9d7 100644
--- a/vp9/encoder/vp9_context_tree.c
+++ b/vp9/encoder/vp9_context_tree.c
@@ -26,7 +26,7 @@
   ctx->num_4x4_blk = num_blk;
 
   CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
-                  vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+                  vpx_calloc(num_blk, sizeof(uint8_t)));
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     for (k = 0; k < 3; ++k) {
       CHECK_MEM_ERROR(cm, ctx->coeff[i][k],