Further work on ext-comp-refs for ref frame coding

(1) Work with var-refs to remove redundant bits in ref frame
    coding;
(2) Add a new uni-directional compound reference pair:
    (LAST_FRAME, LAST3_FRAME);
(3) Redesign the contexts for encoding uni-directional reference frame
    pairs;
(4) Use aom_entropy_optimizer to collect stats for all the default
    probability setups related to the coding of reference frames.

Compared against the baseline (default enabled tools excluding ext-tx
and global-motion for encoder speed concern) with one-sided-compound,
the coding gain of ext-comp-refs + var-refs - one-sided-compound is:

lowres: avg_psnr -0.385%; ovr_psnr -0.378% ssim -0.344%
midres: avg_psnr -0.466%; ovr_psnr -0.447% ssim -0.513%

AWCY - High Latency:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.2758 | -0.1526 | -0.0965 |  -0.2581 | -0.2492 | -0.2534 |    -0.2118

AWCY - Low Latency:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-1.0467 | -1.4500 | -0.9732 |  -0.9928 | -1.0407 | -1.0180 |    -1.0049

Compared against the baseline (default enabled tools excluding ext-tx
and global-motion for encoder speed concern) without
one-sided-compound, the coding gain of
ext-comp-refs + var-refs - one-sided-compound is:

lowres: avg_psnr -0.875%; ovr_psnr -0.877% ssim -0.895%
midres: avg_psnr -0.824%; ovr_psnr -0.802% ssim -0.843%

Change-Id: I8de774c9a74c20632ea93ccb0c17779fa94431cb
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 7ecd203..dcf3d29 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1098,9 +1098,18 @@
       const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
                                                     ? UNIDIR_COMP_REFERENCE
                                                     : BIDIR_COMP_REFERENCE;
-
 #if USE_UNI_COMP_REFS
-      aom_write(w, comp_ref_type, av1_get_comp_reference_type_prob(cm, xd));
+#if CONFIG_VAR_REFS
+      if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
+        if (L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm) || BWD_AND_ALT(cm))
+#endif  // CONFIG_VAR_REFS
+          aom_write(w, comp_ref_type, av1_get_comp_reference_type_prob(cm, xd));
+#if CONFIG_VAR_REFS
+        else
+          assert(comp_ref_type == BIDIR_COMP_REFERENCE);
+      else
+        assert(comp_ref_type == UNIDIR_COMP_REFERENCE);
+#endif  // CONFIG_VAR_REFS
 #else   // !USE_UNI_COMP_REFS
       // NOTE: uni-directional comp refs disabled
       assert(comp_ref_type == BIDIR_COMP_REFERENCE);
@@ -1108,14 +1117,41 @@
 
       if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
         const int bit = mbmi->ref_frame[0] == BWDREF_FRAME;
-        aom_write(w, bit, av1_get_pred_prob_uni_comp_ref_p(cm, xd));
+#if CONFIG_VAR_REFS
+        if ((L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm)) && BWD_AND_ALT(cm))
+#endif  // CONFIG_VAR_REFS
+          aom_write(w, bit, av1_get_pred_prob_uni_comp_ref_p(cm, xd));
+
         if (!bit) {
-          const int bit1 = mbmi->ref_frame[1] == GOLDEN_FRAME;
-          aom_write(w, bit1, av1_get_pred_prob_uni_comp_ref_p1(cm, xd));
+          assert(mbmi->ref_frame[0] == LAST_FRAME);
+#if CONFIG_VAR_REFS
+          if (L_AND_L2(cm) && (L_AND_L3(cm) || L_AND_G(cm))) {
+#endif  // CONFIG_VAR_REFS
+            const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME ||
+                             mbmi->ref_frame[1] == GOLDEN_FRAME;
+            aom_write(w, bit1, av1_get_pred_prob_uni_comp_ref_p1(cm, xd));
+
+            if (bit1) {
+#if CONFIG_VAR_REFS
+              if (L_AND_L3(cm) && L_AND_G(cm)) {
+#endif  // CONFIG_VAR_REFS
+                const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME;
+                aom_write(w, bit2, av1_get_pred_prob_uni_comp_ref_p2(cm, xd));
+#if CONFIG_VAR_REFS
+              }
+#endif  // CONFIG_VAR_REFS
+            }
+#if CONFIG_VAR_REFS
+          }
+#endif  // CONFIG_VAR_REFS
+        } else {
+          assert(mbmi->ref_frame[1] == ALTREF_FRAME);
         }
 
         return;
       }
+
+      assert(comp_ref_type == BIDIR_COMP_REFERENCE);
 #endif  // CONFIG_EXT_COMP_REFS
 
 #if CONFIG_EXT_REFS
@@ -2311,7 +2347,7 @@
   m = xd->mi[0];
   if (is_inter_block(&m->mbmi)) {
 #define FRAME_TO_CHECK 1
-    if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 0) {
+    if (cm->current_video_frame == FRAME_TO_CHECK /* && cm->show_frame == 1*/) {
       const MB_MODE_INFO *const mbmi = &m->mbmi;
       const BLOCK_SIZE bsize = mbmi->sb_type;
 
@@ -4608,7 +4644,7 @@
         }
       }
     }
-#endif
+
     if (cm->reference_mode != SINGLE_REFERENCE) {
 #if CONFIG_EXT_COMP_REFS
       for (i = 0; i < COMP_REF_TYPE_CONTEXTS; i++)
@@ -4621,7 +4657,6 @@
                                     counts->uni_comp_ref[i][j], probwt);
 #endif  // CONFIG_EXT_COMP_REFS
 
-#if !CONFIG_NEW_MULTISYMBOL
       for (i = 0; i < REF_CONTEXTS; i++) {
 #if CONFIG_EXT_REFS
         for (j = 0; j < (FWD_REFS - 1); j++) {
@@ -4639,8 +4674,8 @@
         }
 #endif  // CONFIG_EXT_REFS
       }
-#endif  // CONFIG_NEW_MULTISYMBOL
     }
+#endif  // CONFIG_NEW_MULTISYMBOL
 
 #if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
     for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++)
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 32d244c..7d3d44e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1613,17 +1613,21 @@
           // TODO(zoeliu): Temporarily turn off uni-directional comp refs
           assert(comp_ref_type == BIDIR_COMP_REFERENCE);
 #endif  // !USE_UNI_COMP_REFS
-          counts->comp_ref_type[av1_get_comp_reference_type_context(cm, xd)]
+          counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
                                [comp_ref_type]++;
 
           if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
             const int bit = (ref0 == BWDREF_FRAME);
-            counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(cm, xd)][0]
+            counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0]
                                 [bit]++;
             if (!bit) {
-              const int bit1 = (ref1 == GOLDEN_FRAME);
-              counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(cm, xd)]
-                                  [1][bit1]++;
+              const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
+              counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
+                                  [bit1]++;
+              if (bit1) {
+                counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)]
+                                    [2][ref1 == GOLDEN_FRAME]++;
+              }
             }
           } else {
 #endif  // CONFIG_EXT_COMP_REFS
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 91ee497..54f4102 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4184,7 +4184,7 @@
       map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]];
   const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]];
   const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]];
-#else
+#else  // !CONFIG_ONE_SIDED_COMPOUND
   const int bwd_is_last = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[0]];
   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
 
@@ -4198,12 +4198,12 @@
 
   const int bwd_is_gld = map[cpi->bwd_fb_idx] == map[cpi->gld_fb_idx];
 
-#endif
+#endif  // CONFIG_ONE_SIDED_COMPOUND
   const int last2_is_alt = map[cpi->lst_fb_idxes[1]] == map[cpi->alt_fb_idx];
   const int last3_is_alt = map[cpi->lst_fb_idxes[2]] == map[cpi->alt_fb_idx];
   const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
   const int bwd_is_alt = map[cpi->bwd_fb_idx] == map[cpi->alt_fb_idx];
-#else
+#else   // !CONFIG_EXT_REFS
   const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
   const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
@@ -4229,12 +4229,7 @@
 
   if (last3_is_last || last3_is_last2 || last3_is_alt) flags &= ~AOM_LAST3_FLAG;
 
-#if CONFIG_EXT_COMP_REFS
-  if (gld_is_last2) flags &= ~AOM_GOLD_FLAG;
-  if (gld_is_last3) flags &= ~AOM_LAST3_FLAG;
-#else
   if (gld_is_last2 || gld_is_last3) flags &= ~AOM_GOLD_FLAG;
-#endif  // CONFIG_EXT_COMP_REFS
 
 #if CONFIG_ONE_SIDED_COMPOUND  // Changes LL & HL bitstream
   /* Allow biprediction between two identical frames (e.g. bwd_is_last = 1) */
@@ -4380,11 +4375,14 @@
   }
   printf(
       "\nFrame=%5d, encode_update_type[%5d]=%1d, show_existing_frame=%d, "
-      "y_stride=%4d, uv_stride=%4d, width=%4d, height=%4d\n",
+      "source_alt_ref_active=%d, refresh_alt_ref_frame=%d, rf_level=%d, "
+      "y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n",
       cm->current_video_frame, cpi->twopass.gf_group.index,
       cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
-      cm->show_existing_frame, recon_buf->y_stride, recon_buf->uv_stride,
-      cm->width, cm->height);
+      cm->show_existing_frame, cpi->rc.source_alt_ref_active,
+      cpi->refresh_alt_ref_frame,
+      cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index],
+      recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
 
   // --- Y ---
   for (h = 0; h < cm->height; ++h) {
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 2fafa4d..1c9ed43 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -1061,6 +1061,7 @@
 
 #if CONFIG_EXT_COMP_REFS
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 1000;
+  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] += 1000;
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] += 1000;
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] += 1000;
 #endif  // CONFIG_EXT_COMP_REFS
@@ -1081,6 +1082,7 @@
   rd->thresh_mult[THR_COMP_NEARESTGB] += 1000;
 #if CONFIG_EXT_COMP_REFS
   rd->thresh_mult[THR_COMP_NEARESTLL2] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTLL3] += 1000;
   rd->thresh_mult[THR_COMP_NEARESTLG] += 1000;
   rd->thresh_mult[THR_COMP_NEARESTBA] += 1000;
 #endif  // CONFIG_EXT_COMP_REFS
@@ -1158,8 +1160,6 @@
   rd->thresh_mult[THR_COMP_ZERO_ZEROGB] += 2500;
 
 #if CONFIG_EXT_COMP_REFS
-  rd->thresh_mult[THR_COMP_NEAREST_NEARLL2] += 1200;
-  rd->thresh_mult[THR_COMP_NEAR_NEARESTLL2] += 1200;
   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1200;
   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 1500;
   rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 1500;
@@ -1168,8 +1168,14 @@
   rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2000;
   rd->thresh_mult[THR_COMP_ZERO_ZEROLL2] += 2500;
 
-  rd->thresh_mult[THR_COMP_NEAREST_NEARLG] += 1200;
-  rd->thresh_mult[THR_COMP_NEAR_NEARESTLG] += 1200;
+  rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1200;
+  rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 1500;
+  rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 1500;
+  rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2000;
+  rd->thresh_mult[THR_COMP_ZERO_ZEROLL3] += 2500;
+
   rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1200;
   rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 1500;
   rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 1500;
@@ -1178,8 +1184,6 @@
   rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2000;
   rd->thresh_mult[THR_COMP_ZERO_ZEROLG] += 2500;
 
-  rd->thresh_mult[THR_COMP_NEAREST_NEARBA] += 1200;
-  rd->thresh_mult[THR_COMP_NEAR_NEARESTBA] += 1200;
   rd->thresh_mult[THR_COMP_NEAR_NEARBA] += 1200;
   rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 1500;
   rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 1500;
@@ -1216,6 +1220,8 @@
 #if CONFIG_EXT_COMP_REFS
   rd->thresh_mult[THR_COMP_NEARLL2] += 1500;
   rd->thresh_mult[THR_COMP_NEWLL2] += 2000;
+  rd->thresh_mult[THR_COMP_NEARLL3] += 1500;
+  rd->thresh_mult[THR_COMP_NEWLL3] += 2000;
   rd->thresh_mult[THR_COMP_NEARLG] += 1500;
   rd->thresh_mult[THR_COMP_NEWLG] += 2000;
   rd->thresh_mult[THR_COMP_NEARBA] += 1500;
@@ -1238,6 +1244,7 @@
 
 #if CONFIG_EXT_COMP_REFS
   rd->thresh_mult[THR_COMP_ZEROLL2] += 2500;
+  rd->thresh_mult[THR_COMP_ZEROLL3] += 2500;
   rd->thresh_mult[THR_COMP_ZEROLG] += 2500;
   rd->thresh_mult[THR_COMP_ZEROBA] += 2500;
 #endif  // CONFIG_EXT_COMP_REFS
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index c9de1cd..efa6450 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -158,6 +158,7 @@
   THR_COMP_NEAREST_NEARESTGB,
 #if CONFIG_EXT_COMP_REFS
   THR_COMP_NEAREST_NEARESTLL2,
+  THR_COMP_NEAREST_NEARESTLL3,
   THR_COMP_NEAREST_NEARESTLG,
   THR_COMP_NEAREST_NEARESTBA,
 #endif  // CONFIG_EXT_COMP_REFS
@@ -178,6 +179,7 @@
   THR_COMP_NEARESTGB,
 #if CONFIG_EXT_COMP_REFS
   THR_COMP_NEARESTLL2,
+  THR_COMP_NEARESTLL3,
   THR_COMP_NEARESTLG,
   THR_COMP_NEARESTBA,
 #endif  // CONFIG_EXT_COMP_REFS
@@ -265,8 +267,6 @@
   THR_COMP_ZERO_ZEROGB,
 
 #if CONFIG_EXT_COMP_REFS
-  THR_COMP_NEAR_NEARESTLL2,
-  THR_COMP_NEAREST_NEARLL2,
   THR_COMP_NEAR_NEARLL2,
   THR_COMP_NEW_NEARESTLL2,
   THR_COMP_NEAREST_NEWLL2,
@@ -275,8 +275,14 @@
   THR_COMP_NEW_NEWLL2,
   THR_COMP_ZERO_ZEROLL2,
 
-  THR_COMP_NEAR_NEARESTLG,
-  THR_COMP_NEAREST_NEARLG,
+  THR_COMP_NEAR_NEARLL3,
+  THR_COMP_NEW_NEARESTLL3,
+  THR_COMP_NEAREST_NEWLL3,
+  THR_COMP_NEW_NEARLL3,
+  THR_COMP_NEAR_NEWLL3,
+  THR_COMP_NEW_NEWLL3,
+  THR_COMP_ZERO_ZEROLL3,
+
   THR_COMP_NEAR_NEARLG,
   THR_COMP_NEW_NEARESTLG,
   THR_COMP_NEAREST_NEWLG,
@@ -285,8 +291,6 @@
   THR_COMP_NEW_NEWLG,
   THR_COMP_ZERO_ZEROLG,
 
-  THR_COMP_NEAR_NEARESTBA,
-  THR_COMP_NEAREST_NEARBA,
   THR_COMP_NEAR_NEARBA,
   THR_COMP_NEW_NEARESTBA,
   THR_COMP_NEAREST_NEWBA,
@@ -323,6 +327,8 @@
 #if CONFIG_EXT_COMP_REFS
   THR_COMP_NEARLL2,
   THR_COMP_NEWLL2,
+  THR_COMP_NEARLL3,
+  THR_COMP_NEWLL3,
   THR_COMP_NEARLG,
   THR_COMP_NEWLG,
   THR_COMP_NEARBA,
@@ -345,6 +351,7 @@
 
 #if CONFIG_EXT_COMP_REFS
   THR_COMP_ZEROLL2,
+  THR_COMP_ZEROLL3,
   THR_COMP_ZEROLG,
   THR_COMP_ZEROBA,
 #endif  // CONFIG_EXT_COMP_REFS
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 90fc215..b3e517e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -266,6 +266,7 @@
 
 #if CONFIG_EXT_COMP_REFS
   { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+  { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
   { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
   { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
 #endif  // CONFIG_EXT_COMP_REFS
@@ -287,6 +288,7 @@
 
 #if CONFIG_EXT_COMP_REFS
   { NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+  { NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
   { NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
   { NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
 #endif  // CONFIG_EXT_COMP_REFS
@@ -380,6 +382,14 @@
   { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
   { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } },
 
+  { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+  { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+  { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+  { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+  { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+  { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+  { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } },
+
   { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
   { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
   { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
@@ -424,6 +434,8 @@
 #if CONFIG_EXT_COMP_REFS
   { NEARMV, { LAST_FRAME, LAST2_FRAME } },
   { NEWMV, { LAST_FRAME, LAST2_FRAME } },
+  { NEARMV, { LAST_FRAME, LAST3_FRAME } },
+  { NEWMV, { LAST_FRAME, LAST3_FRAME } },
   { NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
   { NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
   { NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
@@ -446,6 +458,7 @@
 
 #if CONFIG_EXT_COMP_REFS
   { ZEROMV, { LAST_FRAME, LAST2_FRAME } },
+  { ZEROMV, { LAST_FRAME, LAST3_FRAME } },
   { ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
   { ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
 #endif  // CONFIG_EXT_COMP_REFS
@@ -6054,62 +6067,26 @@
                       ref_costs_single[ALTREF_FRAME] = base_cost;
 
 #if CONFIG_EXT_REFS
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT) branch node in
-      // tree
-      if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
-        ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
-        ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
-        ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
-        ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
-        ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
+      ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
+      ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
+      ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
+      ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
+      ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
 
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L,L2) vs (L3,G) branch node in tree
-      if (L_OR_L2(cm) && L3_OR_G(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
-        ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
-        ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
-        ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
+      ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
+      ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
+      ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
 
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (BWD) vs (ALT) branch node in tree
-      if (BWD_AND_ALT(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
-        ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
+      ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
 
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L) vs (L2) branch node in tree
-      if (L_AND_L2(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
-        ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
+      ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
 
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L3) vs (G) branch node in tree
-      if (L3_AND_G(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
-        ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
+      ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
 #else   // !CONFIG_EXT_REFS
       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
@@ -6176,13 +6153,19 @@
 
       aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
       aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd);
+      aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd);
 
       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
           base_cost + av1_cost_bit(comp_ref_type_p, 0) +
           av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0);
+      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
+          base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+          av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
+          av1_cost_bit(uni_comp_ref_p2, 0);
       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
           base_cost + av1_cost_bit(comp_ref_type_p, 0) +
-          av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1);
+          av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
+          av1_cost_bit(uni_comp_ref_p2, 1);
 
       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
           base_cost + av1_cost_bit(comp_ref_type_p, 0) +
@@ -6201,49 +6184,21 @@
 #endif  // CONFIG_EXT_REFS
 
 #if CONFIG_EXT_REFS
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L,L2) vs (L3,G) branch node in tree
-      if (L_OR_L2(cm) && L3_OR_G(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
-        ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
-        ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
-        ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
+      ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
+      ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
+      ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
 
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L) vs (L2) branch node in tree
-      if (L_AND_L2(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
-        ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
+      ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
 
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (L3) vs (G) branch node in tree
-      if (L3_AND_G(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
-        ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
+      ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
 
-// NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
-//               more bit.
-#if CONFIG_VAR_REFS
-      // Test need to explicitly code (BWD) vs (ALT) branch node in tree
-      if (BWD_AND_ALT(cm)) {
-#endif  // CONFIG_VAR_REFS
-        ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
-        ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
-#if CONFIG_VAR_REFS
-      }
-#endif  // CONFIG_VAR_REFS
+      // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
+      //               more bit.
+      ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
+      ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
 #else   // !CONFIG_EXT_REFS
       ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
       ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
@@ -6257,6 +6212,7 @@
         ref_costs_comp[ref0][ALTREF_FRAME] = 512;
       }
       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
+      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
 #else  // !CONFIG_EXT_COMP_REFS