New experiment DIST_8x8 A framework for computing a distortion at 8x8 luma block level during RDO-based mode decision search. New 8x8 distortion metric can be plugged in by way of this tool. Existing daala_dist now uses this experiment as well. Other possible applications that can make use of this experiment would be a distortion meric, which should apply at 8x8 pixels such as PSNR-HVS, SSIM, or etc. A rd_cost for final coding mode decision for a super block is computed for a partition size 8x8 or larger. For a block larger than 8x8, a distortion of each 8x8 block is independently computed then summed up. The rd_cost for 8x8 block with new 8x8 distortion metric is computed only when the mode decision of its sub8x8 blocks are completed. However, MSE distortion metric is used with sub8x8 mode decision. Thus, early termination is also determined with the MSE based rd_cost. Because the best rd_cost (i.e. the reference rd_cost) during sub8x8 prediction or sub8x8 tx is based on new 8x8 distortion while each sub8x8 uses MSE, the existing early termination cannot be used (And this can be the one of possible reason for the BD-Rate change with this revision). For a sub8x8 prediction, prediction mode for each sub8x8 block of a 8x8 block is decided with existing MSE and then av1_dist_8x8() is applied to the 8x8 pixels. (There is also av1_dist_8x8_diff, which can input diff signal directly) For a sub8x8 tx in a block larger than 8x8, instead of computing MSE distortion for each sub8x8 tx block, we wait until all sub8x8 tx blocks are encoded before av1_dist_8x8() is applied to 8x8 pixels. Sub8x8 prediction and transformas were most of tricky parts in this change. Two kind of distortions, for a) predicted pixels and b) decoded pixels (i.e. predicted + possible reconstructed residue), are always computed during RDO. In order to access those two signals a) and b) for a 8x8 block after its sub8x8 mode decision is finished, a) and b) need be properly stored for later retrieval. The CB4X4 makes the task of accessing a) and b) signals for sub8x8 block further difficult, since the intermediate data (i.e. a and/or b) for sub8x8 block are not easily accessible outside of current partition unless reconstruced with decided coding modes. Change-Id: If60301a890c0674a3de1d8206965bbd6a6495bb7

commit: b7b60c57236bfec13fd8acedf8c66b14853a3f5b [log] [tgz]
author: Yushin Cho <ycho@mozilla.com> Fri Jul 14 16:18:52 2017 -0700
committer: Yushin Cho <ycho@mozilla.com> Thu Jul 20 00:58:07 2017 +0000
tree: e211c99381c478d321af2c7b37f7f71fe294a8ca
parent: 68ad7a6e20425c60bb4a98428454f6e2db5c0d2f [diff] [blame]
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9531abe..dbce6a8 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -1315,10 +1315,10 @@
   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
 }
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
-static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
-                                      BLOCK_SIZE bsize, int bw, int bh,
-                                      int mi_row, int mi_col) {
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
+                                    BLOCK_SIZE bsize, int bw, int bh,
+                                    int mi_row, int mi_col) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
   const int dst_stride = pd->dst.stride;
@@ -3729,7 +3729,7 @@
             &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
         if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
           assert(this_rdc.dist_y < INT64_MAX);
         }
@@ -3747,7 +3747,7 @@
 #if CONFIG_SUPERTX
           sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
           if (bsize == BLOCK_8X8) {
             assert(this_rdc.dist_y < INT64_MAX);
             sum_rdc.dist_y += this_rdc.dist_y;
@@ -3757,11 +3757,10 @@
       }
       reached_last_index = (idx == 4);
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
       if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
           bsize == BLOCK_8X8) {
-        int use_activity_masking = 0;
-        int64_t daala_dist;
+        int64_t dist_8x8;
         const int src_stride = x->plane[0].src.stride;
         uint8_t *decoded_8x8;
 
@@ -3772,19 +3771,16 @@
 #endif
           decoded_8x8 = (uint8_t *)x->decoded_8x8;
 
-#if CONFIG_PVQ
-        use_activity_masking = x->daala_enc.use_activity_masking;
-#endif
-        daala_dist =
-            av1_daala_dist(xd, x->plane[0].src.buf - 4 * src_stride - 4,
-                           src_stride, decoded_8x8, 8, 8, 8, 8, 8, 1,
-                           use_activity_masking, x->qindex)
+        dist_8x8 =
+            av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4,
+                         src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
+                         x->qindex)
             << 4;
         assert(sum_rdc.dist_y < INT64_MAX);
-        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
         sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
       }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 
 #if CONFIG_SUPERTX
       if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
@@ -3922,14 +3918,14 @@
                        best_rdc.rdcost - sum_rdc.rdcost);
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
       if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
         update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
                      subsize, DRY_RUN_NORMAL);
         encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
                           subsize, NULL);
       }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 
       if (this_rdc.rate == INT_MAX) {
         sum_rdc.rdcost = INT64_MAX;
@@ -3943,14 +3939,13 @@
 #if CONFIG_SUPERTX
         sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
         sum_rdc.dist_y += this_rdc.dist_y;
 #endif
       }
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
       if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
-        int use_activity_masking = 0;
-        int64_t daala_dist;
+        int64_t dist_8x8;
         const int src_stride = x->plane[0].src.stride;
         uint8_t *decoded_8x8;
 
@@ -3961,17 +3956,14 @@
 #endif
           decoded_8x8 = (uint8_t *)x->decoded_8x8;
 
-#if CONFIG_PVQ
-        use_activity_masking = x->daala_enc.use_activity_masking;
-#endif
-        daala_dist = av1_daala_dist(xd, x->plane[0].src.buf - 4 * src_stride,
-                                    src_stride, decoded_8x8, 8, 8, 8, 8, 8, 1,
-                                    use_activity_masking, x->qindex)
-                     << 4;
-        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+        dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride,
+                                src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
+                                8, x->qindex)
+                   << 4;
+        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
         sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
       }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
     }
 
 #if CONFIG_SUPERTX
@@ -4106,14 +4098,14 @@
                        best_rdc.rdcost - sum_rdc.rdcost);
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
       if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
         update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
                      subsize, DRY_RUN_NORMAL);
         encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
                           subsize, NULL);
       }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 
       if (this_rdc.rate == INT_MAX) {
         sum_rdc.rdcost = INT64_MAX;
@@ -4127,14 +4119,13 @@
 #if CONFIG_SUPERTX
         sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
         sum_rdc.dist_y += this_rdc.dist_y;
 #endif
       }
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
       if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
-        int use_activity_masking = 0;
-        int64_t daala_dist;
+        int64_t dist_8x8;
         const int src_stride = x->plane[0].src.stride;
         uint8_t *decoded_8x8;
 
@@ -4145,17 +4136,14 @@
 #endif
           decoded_8x8 = (uint8_t *)x->decoded_8x8;
 
-#if CONFIG_PVQ
-        use_activity_masking = x->daala_enc.use_activity_masking;
-#endif
-        daala_dist =
-            av1_daala_dist(xd, x->plane[0].src.buf - 4, src_stride, decoded_8x8,
-                           8, 8, 8, 8, 8, 1, use_activity_masking, x->qindex)
+        dist_8x8 =
+            av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride,
+                         decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
             << 4;
-        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
         sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
       }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
     }
 #if CONFIG_SUPERTX
     if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
@@ -4405,11 +4393,11 @@
   (void)best_rd;
   *rd_cost = best_rdc;
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
   if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
     assert(rd_cost->dist_y < INT64_MAX);
   }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 #if CONFIG_SUPERTX
   *rate_nocoef = best_rate_nocoef;
 #endif  // CONFIG_SUPERTX
@@ -4435,13 +4423,13 @@
   x->cfl_store_y = 0;
 #endif
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
       bsize == BLOCK_4X4 && pc_tree->index == 3) {
     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
               pc_tree, NULL);
   }
-#endif  // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 
   if (bsize == cm->sb_size) {
 #if !CONFIG_PVQ && !CONFIG_LV_MAP
@@ -6053,11 +6041,11 @@
 #endif
   }
 
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
   if (bsize < BLOCK_8X8) {
-    daala_dist_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
-                              block_size_wide[bsize], block_size_high[bsize],
-                              mi_row, mi_col);
+    dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
+                            block_size_wide[bsize], block_size_high[bsize],
+                            mi_row, mi_col);
   }
 #endif
commit	b7b60c57236bfec13fd8acedf8c66b14853a3f5b	[log] [tgz]
author	Yushin Cho <ycho@mozilla.com>	Fri Jul 14 16:18:52 2017 -0700
committer	Yushin Cho <ycho@mozilla.com>	Thu Jul 20 00:58:07 2017 +0000
tree	e211c99381c478d321af2c7b37f7f71fe294a8ca
parent	68ad7a6e20425c60bb4a98428454f6e2db5c0d2f [diff] [blame]