Modify size of ref_mv_stack and weight in PC_TREE Size of ref_mv_stack and weight in MB_MODE_INFO_EXT used in PC_TREE has been modified from [29][8] to [29][4]. Similarly, in mbmi_ext_frame_base the size is reduced from [8] to [4]. This is the usable size of ref_mv_stack and weight for encoding decisions as well as bitstream preparation, once the mv_refs list is populated by function call av1_find_mv_refs(). Observed memory footprint reduction with similar encoding time. Resolution Tile Memory-reduction Single Multi Thread Thread 832x480 2x1 ~18% ~24% (2 threads) 1280x720 2x2 ~14% ~25% (4 threads) 1920x1080 4x2 ~9% ~25% (8 threads) Memory measuring command: $ command time -v ./aomenc ... Change-Id: I7a60f2debbc688b3da678f3509a120a5d1d689ad

commit: fa73e201d8d79749710ecf84fed92d3228905c32 [log] [tgz]
author: Ravi Chaudhary <ravi.chaudhary@ittiam.com> Mon Aug 19 12:41:26 2019 +0530
committer: Yunqing Wang <yunqingwang@google.com> Wed Aug 21 23:54:52 2019 +0000
tree: 6f1dedbca440c3b0acc161a3a8696df565f87ce6
parent: f20376cb0e4054b608c77c6db9d5f7fed5a87fca [diff]
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 20b9234..d5a9151 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h

@@ -553,6 +553,7 @@
 #define MAX_MV_REF_CANDIDATES 2
 
 #define MAX_REF_MV_STACK_SIZE 8
+#define USABLE_REF_MV_STACK_SIZE 4
 #define REF_CAT_LEVEL 640
 
 #define INTRA_INTER_CONTEXTS 4

diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index febac03..199074c 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h

@@ -84,8 +84,8 @@
 
 typedef struct {
   // TODO(angiebird): Reduce the buffer size according to sb_type
-  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
-  uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][USABLE_REF_MV_STACK_SIZE];
+  uint16_t weight[MODE_CTX_REF_FRAMES][USABLE_REF_MV_STACK_SIZE];
   int_mv global_mvs[REF_FRAMES];
   // TODO(Ravi/Remya): Check if this variable is still needed at block level
   int cb_offset;
@@ -96,8 +96,8 @@
 // Structure to store winner reference mode information at frame level. This
 // frame level information will be used during bitstream preparation stage.
 typedef struct {
-  CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE];
-  uint16_t weight[MAX_REF_MV_STACK_SIZE];
+  CANDIDATE_MV ref_mv_stack[USABLE_REF_MV_STACK_SIZE];
+  uint16_t weight[USABLE_REF_MV_STACK_SIZE];
   // TODO(Ravi/Remya): Reduce the buffer size of global_mvs
   int_mv global_mvs[REF_FRAMES];
   int cb_offset;

diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f1027fb..75a09bf 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -641,9 +641,9 @@
   uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   memcpy(x->mbmi_ext_frame->ref_mv_stack,
          x->mbmi_ext->ref_mv_stack[ref_frame_type],
-         sizeof(x->mbmi_ext->ref_mv_stack[MAX_REF_MV_STACK_SIZE]));
+         sizeof(x->mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
   memcpy(x->mbmi_ext_frame->weight, x->mbmi_ext->weight[ref_frame_type],
-         sizeof(x->mbmi_ext->weight[MAX_REF_MV_STACK_SIZE]));
+         sizeof(x->mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
   x->mbmi_ext_frame->mode_context = x->mbmi_ext->mode_context[ref_frame_type];
   x->mbmi_ext_frame->ref_mv_count = x->mbmi_ext->ref_mv_count[ref_frame_type];
   memcpy(x->mbmi_ext_frame->global_mvs, x->mbmi_ext->global_mvs,

diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 850b6c5..2c72345 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c

@@ -274,9 +274,11 @@
     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf,
                          num_planes);
     av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                     mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
-                     mbmi_ext->global_mvs, mi_row, mi_col,
-                     mbmi_ext->mode_context);
+                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                     mi_row, mi_col, mbmi_ext->mode_context);
+    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
     av1_find_best_ref_mvs_from_stack(cm->allow_high_precision_mv, mbmi_ext,
                                      ref_frame, &frame_mv[NEARESTMV][ref_frame],
                                      &frame_mv[NEARMV][ref_frame], 0);

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index d2d509e..2b5af99 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -6901,10 +6901,11 @@
 
   // Gets an initial list of candidate vectors from neighbours and orders them
   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                   mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
-                   mbmi_ext->global_mvs, mi_row, mi_col,
-                   mbmi_ext->mode_context);
-
+                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                   mi_row, mi_col, mbmi_ext->mode_context);
+  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   // Further refinement that is encode side only to test the top few candidates
   // in full and choose the best as the center point for subsequent searches.
   // The current implementation doesn't support scaling.
@@ -10795,10 +10796,11 @@
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                   mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
-                   mbmi_ext->global_mvs, mi_row, mi_col,
-                   mbmi_ext->mode_context);
-
+                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                   mi_row, mi_col, mbmi_ext->mode_context);
+  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   int_mv nearestmv, nearmv;
   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
                                    0);
@@ -11120,9 +11122,11 @@
     }
     MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
-                     mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
-                     mbmi_ext->global_mvs, mi_row, mi_col,
-                     mbmi_ext->mode_context);
+                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                     mi_row, mi_col, mbmi_ext->mode_context);
+    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
   }
 
   assert(this_mode == NEAREST_NEARESTMV);
@@ -11616,9 +11620,11 @@
       }
     }
     av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                     mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
-                     mbmi_ext->global_mvs, mi_row, mi_col,
-                     mbmi_ext->mode_context);
+                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+                     mi_row, mi_col, mbmi_ext->mode_context);
+    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   }
 
   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);

diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 99358c7..9fb7f5d 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h

@@ -205,6 +205,18 @@
   return sb_mi_size;
 }
 
+// This function will copy usable ref_mv_stack[ref_frame][4] and
+// weight[ref_frame][4] information from ref_mv_stack[ref_frame][8] and
+// weight[ref_frame][8].
+static INLINE void av1_copy_usable_ref_mv_stack_and_weight(
+    const MACROBLOCKD *xd, MB_MODE_INFO_EXT *const mbmi_ext,
+    MV_REFERENCE_FRAME ref_frame) {
+  memcpy(mbmi_ext->weight[ref_frame], xd->weight[ref_frame],
+         USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0]));
+  memcpy(mbmi_ext->ref_mv_stack[ref_frame], xd->ref_mv_stack[ref_frame],
+         USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0]));
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
commit	fa73e201d8d79749710ecf84fed92d3228905c32	[log] [tgz]
author	Ravi Chaudhary <ravi.chaudhary@ittiam.com>	Mon Aug 19 12:41:26 2019 +0530
committer	Yunqing Wang <yunqingwang@google.com>	Wed Aug 21 23:54:52 2019 +0000
tree	6f1dedbca440c3b0acc161a3a8696df565f87ce6
parent	f20376cb0e4054b608c77c6db9d5f7fed5a87fca [diff]