Group data structures based on type for better data packing

Restructuring MB_MODE_INFO, PICK_MODE_CONTEXT, PC_TREE_STATS,
PC_TREE and MB_MODE_INFO_EXT for data packing.

Observed memory footprint reduction with no impact on
encoder/decoder speed.

stream                   cpu-used  encoder   decoder
BasketballDrill_832x480     1      ~0.42%    ~0.75%
parkrun_720p50              3      ~0.37%    ~0.99%

Change-Id: I1ff67dac462fc4248f9a45306771258d55c5a946
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index d6727b8..14dfd80 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -164,8 +164,8 @@
 } PALETTE_MODE_INFO;
 
 typedef struct {
-  uint8_t use_filter_intra;
   FILTER_INTRA_MODE filter_intra_mode;
+  uint8_t use_filter_intra;
 } FILTER_INTRA_MODE_INFO;
 
 static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = {
@@ -208,8 +208,8 @@
   int wedge_index;
   int wedge_sign;
   DIFFWTD_MASK_TYPE mask_type;
-  uint8_t *seg_mask;
   COMPOUND_TYPE type;
+  uint8_t *seg_mask;
 } INTERINTER_COMPOUND_DATA;
 
 #define INTER_TX_SIZE_BUF_LEN 16
@@ -219,30 +219,16 @@
   // Common for both INTER and INTRA blocks
   BLOCK_SIZE sb_type;
   PREDICTION_MODE mode;
-  TX_SIZE tx_size;
-  uint8_t inter_tx_size[INTER_TX_SIZE_BUF_LEN];
-  int8_t skip;
-  int8_t skip_mode;
-  int8_t segment_id;
-  int8_t seg_id_predicted;  // valid only when temporal_update is enabled
-
   // Only for INTRA blocks
   UV_PREDICTION_MODE uv_mode;
 
-  PALETTE_MODE_INFO palette_mode_info;
-  uint8_t use_intrabc;
-
   // Only for INTER blocks
   InterpFilters interp_filters;
-  MV_REFERENCE_FRAME ref_frame[2];
 
   TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
 
   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
 
-  // The actual prediction angle is the base angle + (angle_delta * step).
-  int8_t angle_delta[PLANE_TYPES];
-
   // interintra members
   INTERINTRA_MODE interintra_mode;
   // TODO(debargha): Consolidate these flags
@@ -254,10 +240,7 @@
   MOTION_MODE motion_mode;
   int overlappable_neighbors[2];
   int_mv mv[2];
-  uint8_t ref_mv_idx;
   PARTITION_TYPE partition;
-  /* deringing gain *per-superblock* */
-  int8_t cdef_strength;
   int current_qindex;
   int delta_lf_from_base;
   int delta_lf[FRAME_LF_COUNT];
@@ -276,6 +259,20 @@
 
   int compound_idx;
   int comp_group_idx;
+  PALETTE_MODE_INFO palette_mode_info;
+  MV_REFERENCE_FRAME ref_frame[2];
+  int8_t skip;
+  int8_t skip_mode;
+  uint8_t inter_tx_size[INTER_TX_SIZE_BUF_LEN];
+  TX_SIZE tx_size;
+  int8_t segment_id;
+  int8_t seg_id_predicted;  // valid only when temporal_update is enabled
+  uint8_t use_intrabc;
+  // The actual prediction angle is the base angle + (angle_delta * step).
+  int8_t angle_delta[PLANE_TYPES];
+  /* deringing gain *per-superblock* */
+  int8_t cdef_strength;
+  uint8_t ref_mv_idx;
 } MB_MODE_INFO;
 
 static INLINE int is_intrabc_block(const MB_MODE_INFO *mbmi) {
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 1b04519..0471de5 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -74,16 +74,16 @@
 } CB_COEFF_BUFFER;
 
 typedef struct {
-  int16_t mode_context[MODE_CTX_REF_FRAMES];
   // TODO(angiebird): Reduce the buffer size according to sb_type
   tran_low_t *tcoeff[MAX_MB_PLANE];
   uint16_t *eobs[MAX_MB_PLANE];
   uint8_t *txb_skip_ctx[MAX_MB_PLANE];
   int *dc_sign_ctx[MAX_MB_PLANE];
-  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
   CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
   int_mv global_mvs[REF_FRAMES];
   int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
+  int16_t mode_context[MODE_CTX_REF_FRAMES];
+  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
 } MB_MODE_INFO_EXT;
 
 typedef struct {
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index cde3f2b..7adb83b 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -38,6 +38,8 @@
 typedef struct {
   MB_MODE_INFO mic;
   MB_MODE_INFO_EXT mbmi_ext;
+  int64_t dist;
+  int64_t rdcost;
   uint8_t *color_index_map[2];
   uint8_t *blk_skip;
 
@@ -62,11 +64,10 @@
   // TODO(jingning) Use RD_COST struct here instead. This involves a boarder
   // scope of refactoring.
   int rate;
-  int64_t dist;
-  int64_t rdcost;
+
   int rd_mode_is_ready;  // Flag to indicate whether rd pick mode decision has
                          // been made.
-
+  int mode_selected;
 #if CONFIG_ONE_PASS_SVM
   // Features for one pass svm early term
   int seg_feat;
@@ -86,21 +87,19 @@
   // mode as well as the mode of GLOBALMV, more ref/mode combos could be
   // skipped.
   MV_REFERENCE_FRAME ref_selected[2];
-  int mode_selected;
 } PICK_MODE_CONTEXT;
 
 typedef struct {
+  int64_t rdcost;
+  int64_t sub_block_rdcost[4];
   int valid;
   int split;
-  int skip;
-  int64_t rdcost;
   int sub_block_split[4];
   int sub_block_skip[4];
-  int64_t sub_block_rdcost[4];
+  int skip;
 } PC_TREE_STATS;
 
 typedef struct PC_TREE {
-  int index;
   PARTITION_TYPE partitioning;
   BLOCK_SIZE block_size;
   PICK_MODE_CONTEXT none;
@@ -112,9 +111,10 @@
   PICK_MODE_CONTEXT verticalb[3];
   PICK_MODE_CONTEXT horizontal4[4];
   PICK_MODE_CONTEXT vertical4[4];
-  CB_TREE_SEARCH cb_search_range;
   struct PC_TREE *split[4];
   PC_TREE_STATS pc_tree_stats;
+  CB_TREE_SEARCH cb_search_range;
+  int index;
 } PC_TREE;
 
 void av1_setup_pc_tree(struct AV1Common *cm, struct ThreadData *td);