Further work on ext-comp-refs for ref frame coding

(1) Work with var-refs to remove redundant bits in ref frame
    coding;
(2) Add a new uni-directional compound reference pair:
    (LAST_FRAME, LAST3_FRAME);
(3) Redesign the contexts for encoding uni-directional reference frame
    pairs;
(4) Use aom_entropy_optimizer to collect stats for all the default
    probability setups related to the coding of reference frames.

Compared against the baseline (default enabled tools excluding ext-tx
and global-motion for encoder speed concern) with one-sided-compound,
the coding gain of ext-comp-refs + var-refs - one-sided-compound is:

lowres: avg_psnr -0.385%; ovr_psnr -0.378% ssim -0.344%
midres: avg_psnr -0.466%; ovr_psnr -0.447% ssim -0.513%

AWCY - High Latency:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.2758 | -0.1526 | -0.0965 |  -0.2581 | -0.2492 | -0.2534 |    -0.2118

AWCY - Low Latency:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-1.0467 | -1.4500 | -0.9732 |  -0.9928 | -1.0407 | -1.0180 |    -1.0049

Compared against the baseline (default enabled tools excluding ext-tx
and global-motion for encoder speed concern) without
one-sided-compound, the coding gain of
ext-comp-refs + var-refs - one-sided-compound is:

lowres: avg_psnr -0.875%; ovr_psnr -0.877% ssim -0.895%
midres: avg_psnr -0.824%; ovr_psnr -0.802% ssim -0.843%

Change-Id: I8de774c9a74c20632ea93ccb0c17779fa94431cb
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c
index 01e7f41..4b98a8c 100644
--- a/av1/common/pred_common.c
+++ b/av1/common/pred_common.c
@@ -263,6 +263,9 @@
 #define IS_BACKWARD_REF_FRAME(ref_frame) ((ref_frame) == cm->comp_fixed_ref)
 #endif  // CONFIG_EXT_REFS
 
+#define CHECK_GOLDEN_OR_LAST3(ref_frame) \
+  (((ref_frame) == GOLDEN_FRAME) || ((ref_frame) == LAST3_FRAME))
+
 int av1_get_reference_mode_context(const AV1_COMMON *cm,
                                    const MACROBLOCKD *xd) {
   int ctx;
@@ -313,16 +316,15 @@
 #if CONFIG_EXT_COMP_REFS
 #define CHECK_BWDREF_OR_ALTREF(ref_frame) \
   ((ref_frame) == BWDREF_FRAME || (ref_frame) == ALTREF_FRAME)
-int av1_get_comp_reference_type_context(const AV1_COMMON *cm,
-                                        const MACROBLOCKD *xd) {
+// TODO(zoeliu): To try on the design of 3 contexts, instead of 5:
+//               COMP_REF_TYPE_CONTEXTS = 3
+int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
   int pred_context;
   const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
   const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
   const int above_in_image = xd->up_available;
   const int left_in_image = xd->left_available;
 
-  (void)cm;
-
   if (above_in_image && left_in_image) {  // both edges available
     const int above_intra = !is_inter_block(above_mbmi);
     const int left_intra = !is_inter_block(left_mbmi);
@@ -388,197 +390,165 @@
 }
 
 // Returns a context number for the given MB prediction signal
-// Signal the uni-directional compound reference frame pair as
-// either (BWDREF, ALTREF), or (LAST, LAST2)/(LAST, GOLDEN),
+//
+// Signal the uni-directional compound reference frame pair as either
+// (BWDREF, ALTREF), or (LAST, LAST2) / (LAST, LAST3) / (LAST, GOLDEN),
 // conditioning on the pair is known as uni-directional.
-int av1_get_pred_context_uni_comp_ref_p(const AV1_COMMON *cm,
-                                        const MACROBLOCKD *xd) {
+//
+// 3 contexts: Voting is used to compare the count of forward references with
+//             that of backward references from the spatial neighbors.
+int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd) {
   int pred_context;
   const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
   const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
   const int above_in_image = xd->up_available;
   const int left_in_image = xd->left_available;
 
-  (void)cm;
+  // Count of forward references (L, L2, L3, or G)
+  int frf_count = 0;
+  // Count of backward references (B or A)
+  int brf_count = 0;
 
-  if (above_in_image && left_in_image) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
-
-    if (above_intra && left_intra) {  // intra/intra
-      pred_context = 2;
-    } else if (above_intra || left_intra) {  // intra/inter
-      const MB_MODE_INFO *inter_mbmi = above_intra ? left_mbmi : above_mbmi;
-
-      if (!has_second_ref(inter_mbmi)) {  // single pred
-        pred_context =
-            1 + 2 * (!CHECK_BWDREF_OR_ALTREF(inter_mbmi->ref_frame[0]));
-      } else {                              // comp pred
-        if (has_uni_comp_refs(inter_mbmi))  // comp unidir
-          pred_context = 4 * (inter_mbmi->ref_frame[0] != BWDREF_FRAME);
-        else  // comp_bidir
-          pred_context = 2;
-      }
-    } else {  // inter/inter
-      const int a_sg = !has_second_ref(above_mbmi);
-      const int l_sg = !has_second_ref(left_mbmi);
-      const MV_REFERENCE_FRAME frfa = above_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME frfl = left_mbmi->ref_frame[0];
-
-      if (CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_BWDREF_OR_ALTREF(frfl)) {
-        pred_context = 0;
-      } else if (a_sg && l_sg) {  // single/single
-        pred_context = 2 + (!CHECK_BWDREF_OR_ALTREF(frfa) &&
-                            !CHECK_BWDREF_OR_ALTREF(frfl));
-      } else if (l_sg || a_sg) {  // single/comp
-        const MV_REFERENCE_FRAME frfc = a_sg ? frfl : frfa;
-        const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
-        const int uni_rfc =
-            a_sg ? has_uni_comp_refs(left_mbmi) : has_uni_comp_refs(above_mbmi);
-
-        if (uni_rfc && (frfc == BWDREF_FRAME))
-          pred_context = 1;
-        else if (uni_rfc && (frfc == LAST_FRAME))
-          pred_context = 3 + (!CHECK_BWDREF_OR_ALTREF(rfs));
-        else if (CHECK_BWDREF_OR_ALTREF(rfs))
-          pred_context = 2;
-        else
-          pred_context = 3;
-      } else {  // comp/comp
-        const int a_uni_rfc = has_uni_comp_refs(above_mbmi);
-        const int l_uni_rfc = has_uni_comp_refs(left_mbmi);
-
-        if (a_uni_rfc && l_uni_rfc)
-          pred_context = 2 + 2 * (frfa != BWDREF_FRAME && frfl != BWDREF_FRAME);
-        else if (a_uni_rfc || l_uni_rfc)
-          pred_context = 1 + 2 * (frfa != BWDREF_FRAME && frfl != BWDREF_FRAME);
-        else
-          pred_context = 2;
-      }
+  if (above_in_image && is_inter_block(above_mbmi)) {
+    if (above_mbmi->ref_frame[0] <= GOLDEN_FRAME)
+      ++frf_count;
+    else
+      ++brf_count;
+    if (has_second_ref(above_mbmi)) {
+      if (above_mbmi->ref_frame[1] <= GOLDEN_FRAME)
+        ++frf_count;
+      else
+        ++brf_count;
     }
-  } else if (above_in_image || left_in_image) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
-
-    if (!is_inter_block(edge_mbmi)) {  // intra
-      pred_context = 2;
-    } else {                             // inter
-      if (!has_second_ref(edge_mbmi)) {  // single pred
-        pred_context =
-            1 + 2 * (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]));
-      } else {                             // comp pred
-        if (has_uni_comp_refs(edge_mbmi))  // comp unidir
-          pred_context = 4 * (edge_mbmi->ref_frame[0] != BWDREF_FRAME);
-        else  // comp bidir
-          pred_context = 2;
-      }
-    }
-  } else {  // no edges available
-    pred_context = 2;
   }
 
+  if (left_in_image && is_inter_block(left_mbmi)) {
+    if (left_mbmi->ref_frame[0] <= GOLDEN_FRAME)
+      ++frf_count;
+    else
+      ++brf_count;
+    if (has_second_ref(left_mbmi)) {
+      if (left_mbmi->ref_frame[1] <= GOLDEN_FRAME)
+        ++frf_count;
+      else
+        ++brf_count;
+    }
+  }
+
+  pred_context =
+      (frf_count == brf_count) ? 1 : ((frf_count < brf_count) ? 0 : 2);
+
   assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
   return pred_context;
 }
 
 // Returns a context number for the given MB prediction signal
+//
 // Signal the uni-directional compound reference frame pair as
-// either (BWDREF, ALTREF), or (LAST, LAST2)/(LAST, GOLDEN),
-// conditioning on the pair is known as uni-directional.
-#define CHECK_LAST2_OR_GOLDEN(ref_frame) \
-  ((ref_frame) == LAST2_FRAME || (ref_frame) == GOLDEN_FRAME)
-int av1_get_pred_context_uni_comp_ref_p1(const AV1_COMMON *cm,
-                                         const MACROBLOCKD *xd) {
+// either (LAST, LAST2), or (LAST, LAST3) / (LAST, GOLDEN),
+// conditioning on the pair is known as one of the above three.
+//
+// 3 contexts: Voting is used to compare the count of LAST2_FRAME with the
+//             total count of LAST3/GOLDEN from the spatial neighbors.
+int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd) {
   int pred_context;
   const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
   const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
   const int above_in_image = xd->up_available;
   const int left_in_image = xd->left_available;
 
-  (void)cm;
+  // Count of LAST2
+  int last2_count = 0;
+  // Count of LAST3 or GOLDEN
+  int last3_or_gld_count = 0;
 
-  if (above_in_image && left_in_image) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
-
-    if (above_intra && left_intra) {  // intra/intra
-      pred_context = 2;
-    } else if (above_intra || left_intra) {  // intra/inter
-      const MB_MODE_INFO *inter_mbmi = above_intra ? left_mbmi : above_mbmi;
-
-      if (!has_second_ref(inter_mbmi)) {  // single pred
-        if (CHECK_LAST2_OR_GOLDEN(inter_mbmi->ref_frame[0]))
-          pred_context = 1 + 2 * (inter_mbmi->ref_frame[0] == LAST2_FRAME);
-        else
-          pred_context = 2;
-      } else {  // comp pred
-        if (CHECK_LAST2_OR_GOLDEN(inter_mbmi->ref_frame[1]))
-          pred_context = 4 * (inter_mbmi->ref_frame[1] == LAST2_FRAME);
-        else
-          pred_context = 2;
-      }
-    } else {  // inter/inter
-      const int a_sg = !has_second_ref(above_mbmi);
-      const int l_sg = !has_second_ref(left_mbmi);
-      const MV_REFERENCE_FRAME frfa = above_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME frfl = left_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME brfa = above_mbmi->ref_frame[1];
-      const MV_REFERENCE_FRAME brfl = left_mbmi->ref_frame[1];
-
-      if (a_sg && l_sg) {  // single/single
-        if (CHECK_LAST2_OR_GOLDEN(frfa) && (frfa == frfl || frfl == LAST_FRAME))
-          pred_context = 1 + 2 * (frfa == LAST2_FRAME);
-        else if (frfa == LAST_FRAME && CHECK_LAST2_OR_GOLDEN(frfl))
-          pred_context = 1 + 2 * (frfl == LAST2_FRAME);
-        else
-          pred_context = 2;
-      } else if (l_sg || a_sg) {  // single/comp
-        const MV_REFERENCE_FRAME brfc = a_sg ? brfl : brfa;
-
-        if (CHECK_LAST2_OR_GOLDEN(brfc))
-          pred_context = 4 * (brfc == LAST2_FRAME);
-        if (CHECK_LAST2_OR_GOLDEN(frfa) && (frfa == frfl || frfl == LAST_FRAME))
-          pred_context = 1 + 2 * (frfa == LAST2_FRAME);
-        else if (frfa == LAST_FRAME && CHECK_LAST2_OR_GOLDEN(frfl))
-          pred_context = 1 + 2 * (frfl == LAST2_FRAME);
-        else
-          pred_context = 2;
-      } else {  // comp/comp
-        if (CHECK_LAST2_OR_GOLDEN(brfa) &&
-            (brfa == brfl || !CHECK_LAST2_OR_GOLDEN(brfl)))
-          pred_context = 4 * (brfa == LAST2_FRAME);
-        else if (!CHECK_LAST2_OR_GOLDEN(brfa) && CHECK_LAST2_OR_GOLDEN(brfl))
-          pred_context = 4 * (brfl == LAST2_FRAME);
-        else if (CHECK_LAST2_OR_GOLDEN(frfa) &&
-                 (frfa == frfl || frfl == LAST_FRAME))
-          pred_context = 1 + 2 * (frfa == LAST2_FRAME);
-        else if (frfa == LAST_FRAME && CHECK_LAST2_OR_GOLDEN(frfl))
-          pred_context = 1 + 2 * (frfl == LAST2_FRAME);
-        else
-          pred_context = 2;
-      }
+  if (above_in_image && is_inter_block(above_mbmi)) {
+    last2_count = (above_mbmi->ref_frame[0] == LAST2_FRAME) ? last2_count + 1
+                                                            : last2_count;
+    last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(above_mbmi->ref_frame[0])
+                             ? last3_or_gld_count + 1
+                             : last3_or_gld_count;
+    if (has_second_ref(above_mbmi)) {
+      last2_count = (above_mbmi->ref_frame[1] == LAST2_FRAME) ? last2_count + 1
+                                                              : last2_count;
+      last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(above_mbmi->ref_frame[1])
+                               ? last3_or_gld_count + 1
+                               : last3_or_gld_count;
     }
-  } else if (above_in_image || left_in_image) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
-
-    if (!is_inter_block(edge_mbmi)) {  // intra
-      pred_context = 2;
-    } else {                             // inter
-      if (!has_second_ref(edge_mbmi)) {  // single pred
-        if (CHECK_LAST2_OR_GOLDEN(edge_mbmi->ref_frame[0]))
-          pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == LAST2_FRAME);
-        else
-          pred_context = 2;
-      } else {  // comp pred
-        if (CHECK_LAST2_OR_GOLDEN(edge_mbmi->ref_frame[1]))
-          pred_context = 4 * (edge_mbmi->ref_frame[1] == LAST2_FRAME);
-        else
-          pred_context = 2;
-      }
-    }
-  } else {  // no edges available
-    pred_context = 2;
   }
 
+  if (left_in_image && is_inter_block(left_mbmi)) {
+    last2_count = (left_mbmi->ref_frame[0] == LAST2_FRAME) ? last2_count + 1
+                                                           : last2_count;
+    last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(left_mbmi->ref_frame[0])
+                             ? last3_or_gld_count + 1
+                             : last3_or_gld_count;
+    if (has_second_ref(left_mbmi)) {
+      last2_count = (left_mbmi->ref_frame[1] == LAST2_FRAME) ? last2_count + 1
+                                                             : last2_count;
+      last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(left_mbmi->ref_frame[1])
+                               ? last3_or_gld_count + 1
+                               : last3_or_gld_count;
+    }
+  }
+
+  pred_context = (last2_count == last3_or_gld_count)
+                     ? 1
+                     : ((last2_count < last3_or_gld_count) ? 0 : 2);
+
+  assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
+  return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+//
+// Signal the uni-directional compound reference frame pair as
+// either (LAST, LAST3) or (LAST, GOLDEN),
+// conditioning on the pair is known as one of the above two.
+//
+// 3 contexts: Voting is used to compare the count of LAST3_FRAME with the
+//             total count of GOLDEN_FRAME from the spatial neighbors.
+int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Count of LAST3
+  int last3_count = 0;
+  // Count of GOLDEN
+  int gld_count = 0;
+
+  if (above_in_image && is_inter_block(above_mbmi)) {
+    last3_count = (above_mbmi->ref_frame[0] == LAST3_FRAME) ? last3_count + 1
+                                                            : last3_count;
+    gld_count =
+        (above_mbmi->ref_frame[0] == GOLDEN_FRAME) ? gld_count + 1 : gld_count;
+    if (has_second_ref(above_mbmi)) {
+      last3_count = (above_mbmi->ref_frame[1] == LAST3_FRAME) ? last3_count + 1
+                                                              : last3_count;
+      gld_count = (above_mbmi->ref_frame[1] == GOLDEN_FRAME) ? gld_count + 1
+                                                             : gld_count;
+    }
+  }
+
+  if (left_in_image && is_inter_block(left_mbmi)) {
+    last3_count = (left_mbmi->ref_frame[0] == LAST3_FRAME) ? last3_count + 1
+                                                           : last3_count;
+    gld_count =
+        (left_mbmi->ref_frame[0] == GOLDEN_FRAME) ? gld_count + 1 : gld_count;
+    if (has_second_ref(left_mbmi)) {
+      last3_count = (left_mbmi->ref_frame[1] == LAST3_FRAME) ? last3_count + 1
+                                                             : last3_count;
+      gld_count =
+          (left_mbmi->ref_frame[1] == GOLDEN_FRAME) ? gld_count + 1 : gld_count;
+    }
+  }
+
+  pred_context =
+      (last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2);
+
   assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
   return pred_context;
 }
@@ -592,9 +562,6 @@
 #define CHECK_LAST_OR_LAST2(ref_frame) \
   ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME))
 
-#define CHECK_GOLDEN_OR_LAST3(ref_frame) \
-  ((ref_frame == GOLDEN_FRAME) || (ref_frame == LAST3_FRAME))
-
 // Returns a context number for the given MB prediction signal
 // Signal the first reference frame for a compound mode be either
 // GOLDEN/LAST3, or LAST/LAST2.