Another fix of daala-dist for cb4x4

Daala-dist replaces the luma distortion of sub8x8 partitions with
its own distortion thus requires to split the luma distortion only.
Doing so, there has been a bug that INT_MAX64 value comes
when the sub8x8 parition is skipped. This happened because the existing
code does not initialize the rd_stats_y or tmp_rd_stats_y, i.e. rd_stat struct
for luma only in several places.

Change-Id: If229b53bb7a6cff0b8751138a32b1dcf02665624
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index a0eed78..cf98b35 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3551,6 +3551,11 @@
             &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
 #endif  // CONFIG_SUPERTX
 
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+        if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
+          assert(this_rdc.dist_y < INT64_MAX);
+        }
+#endif
         if (this_rdc.rate == INT_MAX) {
           sum_rdc.rdcost = INT64_MAX;
 #if CONFIG_SUPERTX
@@ -3565,7 +3570,10 @@
           sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
-          sum_rdc.dist_y += this_rdc.dist_y;
+          if (bsize == BLOCK_8X8) {
+            assert(this_rdc.dist_y < INT64_MAX);
+            sum_rdc.dist_y += this_rdc.dist_y;
+          }
 #endif
         }
       }
@@ -3581,6 +3589,7 @@
                                     src_stride, x->decoded_8x8, 8, 8, 8, 1,
                                     use_activity_masking, x->qindex)
                      << 4;
+        assert(sum_rdc.dist_y < INT64_MAX);
         sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
         sum_rdc.rdcost =
             RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
@@ -4068,6 +4077,12 @@
   // checks occur in some sub function and thus are used...
   (void)best_rd;
   *rd_cost = best_rdc;
+
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+  if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
+    assert(rd_cost->dist_y < INT64_MAX);
+  }
+#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
 #if CONFIG_SUPERTX
   *rate_nocoef = best_rate_nocoef;
 #endif  // CONFIG_SUPERTX
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 720a3df..953ce90 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9498,6 +9498,19 @@
       {
         RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
         av1_init_rd_stats(&rd_stats);
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+        // While av1 master uses rd_stats_y.rate through out the codebase,
+        // which is set when handle_inter_moden is called, the daala-dist code
+        // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know
+        // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse.
+        // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not
+        // initialized when rd_stats.skip = 1,
+        // then instead rd_stats.dist and rd_stats.sse have the
+        // combined luma and chroma dist and sse.
+        // This can be seen inside motion_mode_rd(), which is called by
+        // handle_inter_mode().
+        if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y);
+#endif
         rd_stats.rate = rate2;
 
         // Point to variables that are maintained between loop iterations
@@ -9517,7 +9530,14 @@
         rate_y = rd_stats_y.rate;
         rate_uv = rd_stats_uv.rate;
 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
-        if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
+        if (bsize < BLOCK_8X8) {
+          if (rd_stats_y.rate != INT_MAX) {
+            assert(rd_stats_y.sse < INT64_MAX);
+            assert(rd_stats_y.dist < INT64_MAX);
+          }
+          total_sse_y = rd_stats_y.sse;
+          distortion2_y = rd_stats_y.dist;
+        }
 #endif
       }
 
@@ -9585,6 +9605,7 @@
           RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
 
           av1_invalid_rd_stats(&tmp_rd_stats);
+
           x->skip = 0;
 
           mbmi->ref_mv_idx = 1 + ref_idx;
@@ -9655,7 +9676,15 @@
 
             frame_mv[NEARMV][ref_frame] = cur_mv;
             av1_init_rd_stats(&tmp_rd_stats);
-
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+            // With the same reason as 'rd_stats_y' passed to above
+            // handle_inter_mode(), tmp_rd_stats_y.dist and
+            // tmp_rd_stats_y.sse are sometimes not initialized, esp. when
+            // tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse
+            // represent combined luma and chroma .dist and .sse,
+            // we should initialized tmp_rd_stats_y.
+            if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y);
+#endif
             // Point to variables that are not maintained between iterations
             args.single_newmv = dummy_single_newmv;
 #if CONFIG_EXT_INTER
@@ -9730,6 +9759,10 @@
             backup_skip = x->skip;
 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
             if (bsize < BLOCK_8X8) {
+              if (tmp_rd_stats_y.rate != INT_MAX) {
+                assert(tmp_rd_stats_y.sse < INT64_MAX);
+                assert(tmp_rd_stats_y.dist < INT64_MAX);
+              }
               total_sse_y = tmp_rd_stats_y.sse;
               distortion2_y = tmp_rd_stats_y.dist;
             }
@@ -9808,7 +9841,10 @@
           rate_y = 0;
           rate_uv = 0;
 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
-          if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
+          if (bsize < BLOCK_8X8) {
+            assert(total_sse_y < INT64_MAX);
+            distortion2_y = total_sse_y;
+          }
 #endif
         }
       } else {
@@ -9829,6 +9865,12 @@
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
     }
 
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+    if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) {
+      assert(distortion2_y < INT64_MAX);
+    }
+#endif
+
     if (ref_frame == INTRA_FRAME) {
       // Keep record of best intra rd
       if (this_rd < best_intra_rd) {
@@ -9900,7 +9942,10 @@
                                             this_skip2 || skippable);
         best_rate_uv = rate_uv;
 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
-        if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
+        if (bsize < BLOCK_8X8) {
+          assert(distortion2_y < INT64_MAX);
+          rd_cost->dist_y = distortion2_y;
+        }
 #endif
 #if CONFIG_VAR_TX
         for (i = 0; i < MAX_MB_PLANE; ++i)
@@ -9909,7 +9954,11 @@
 #endif  // CONFIG_VAR_TX
       }
     }
-
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+    if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+      assert(rd_cost->dist_y < INT64_MAX);
+    }
+#endif
     /* keep record of best compound/single-only prediction */
     if (!disable_skip && ref_frame != INTRA_FRAME) {
       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
@@ -10029,15 +10078,25 @@
       rd_cost->rate +=
           (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
       rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
-      if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
-#endif
       rd_cost->rdcost =
           RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
       best_skip2 = skip_blk;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+      if (bsize < BLOCK_8X8) {
+        assert(rd_cost->rate != INT_MAX);
+        assert(rd_cost->dist_y < INT64_MAX);
+        rd_cost->dist_y = rd_stats_y.dist;
+      }
+#endif
     }
   }
 
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+  if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+    assert(rd_cost->dist_y < INT64_MAX);
+  }
+#endif
+
 #if CONFIG_PALETTE
   // Only try palette mode when the best mode so far is an intra mode.
   if (try_palette && !is_inter_mode(best_mbmode.mode)) {