Modify size of ref_mv_stack and weight in PC_TREE
Size of ref_mv_stack and weight in MB_MODE_INFO_EXT
used in PC_TREE has been modified from [29][8] to
[29][4]. Similarly, in mbmi_ext_frame_base the size
is reduced from [8] to [4]. This is the usable size
of ref_mv_stack and weight for encoding decisions as
well as bitstream preparation, once the mv_refs list
is populated by function call av1_find_mv_refs().
Observed memory footprint reduction with similar
encoding time.
Resolution Tile Memory-reduction
Single Multi
Thread Thread
832x480 2x1 ~18% ~24% (2 threads)
1280x720 2x2 ~14% ~25% (4 threads)
1920x1080 4x2 ~9% ~25% (8 threads)
Memory measuring command:
$ command time -v ./aomenc ...
Change-Id: I7a60f2debbc688b3da678f3509a120a5d1d689ad
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 20b9234..d5a9151 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -553,6 +553,7 @@
#define MAX_MV_REF_CANDIDATES 2
#define MAX_REF_MV_STACK_SIZE 8
+#define USABLE_REF_MV_STACK_SIZE 4
#define REF_CAT_LEVEL 640
#define INTRA_INTER_CONTEXTS 4
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index febac03..199074c 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -84,8 +84,8 @@
typedef struct {
// TODO(angiebird): Reduce the buffer size according to sb_type
- CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
- uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+ CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][USABLE_REF_MV_STACK_SIZE];
+ uint16_t weight[MODE_CTX_REF_FRAMES][USABLE_REF_MV_STACK_SIZE];
int_mv global_mvs[REF_FRAMES];
// TODO(Ravi/Remya): Check if this variable is still needed at block level
int cb_offset;
@@ -96,8 +96,8 @@
// Structure to store winner reference mode information at frame level. This
// frame level information will be used during bitstream preparation stage.
typedef struct {
- CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE];
- uint16_t weight[MAX_REF_MV_STACK_SIZE];
+ CANDIDATE_MV ref_mv_stack[USABLE_REF_MV_STACK_SIZE];
+ uint16_t weight[USABLE_REF_MV_STACK_SIZE];
// TODO(Ravi/Remya): Reduce the buffer size of global_mvs
int_mv global_mvs[REF_FRAMES];
int cb_offset;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f1027fb..75a09bf 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -641,9 +641,9 @@
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
memcpy(x->mbmi_ext_frame->ref_mv_stack,
x->mbmi_ext->ref_mv_stack[ref_frame_type],
- sizeof(x->mbmi_ext->ref_mv_stack[MAX_REF_MV_STACK_SIZE]));
+ sizeof(x->mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
memcpy(x->mbmi_ext_frame->weight, x->mbmi_ext->weight[ref_frame_type],
- sizeof(x->mbmi_ext->weight[MAX_REF_MV_STACK_SIZE]));
+ sizeof(x->mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
x->mbmi_ext_frame->mode_context = x->mbmi_ext->mode_context[ref_frame_type];
x->mbmi_ext_frame->ref_mv_count = x->mbmi_ext->ref_mv_count[ref_frame_type];
memcpy(x->mbmi_ext_frame->global_mvs, x->mbmi_ext->global_mvs,
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 850b6c5..2c72345 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -274,9 +274,11 @@
av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf,
num_planes);
av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
- mbmi_ext->global_mvs, mi_row, mi_col,
- mbmi_ext->mode_context);
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mi_row, mi_col, mbmi_ext->mode_context);
+ // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
av1_find_best_ref_mvs_from_stack(cm->allow_high_precision_mv, mbmi_ext,
ref_frame, &frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame], 0);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index d2d509e..2b5af99 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -6901,10 +6901,11 @@
// Gets an initial list of candidate vectors from neighbours and orders them
av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
- mbmi_ext->global_mvs, mi_row, mi_col,
- mbmi_ext->mode_context);
-
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mi_row, mi_col, mbmi_ext->mode_context);
+ // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
// Further refinement that is encode side only to test the top few candidates
// in full and choose the best as the center point for subsequent searches.
// The current implementation doesn't support scaling.
@@ -10795,10 +10796,11 @@
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
- mbmi_ext->global_mvs, mi_row, mi_col,
- mbmi_ext->mode_context);
-
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mi_row, mi_col, mbmi_ext->mode_context);
+ // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
int_mv nearestmv, nearmv;
av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
0);
@@ -11120,9 +11122,11 @@
}
MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
- mbmi_ext->global_mvs, mi_row, mi_col,
- mbmi_ext->mode_context);
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mi_row, mi_col, mbmi_ext->mode_context);
+ // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
}
assert(this_mode == NEAREST_NEARESTMV);
@@ -11616,9 +11620,11 @@
}
}
av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
- mbmi_ext->global_mvs, mi_row, mi_col,
- mbmi_ext->mode_context);
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mi_row, mi_col, mbmi_ext->mode_context);
+ // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
}
av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 99358c7..9fb7f5d 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -205,6 +205,18 @@
return sb_mi_size;
}
+// This function will copy usable ref_mv_stack[ref_frame][4] and
+// weight[ref_frame][4] information from ref_mv_stack[ref_frame][8] and
+// weight[ref_frame][8].
+static INLINE void av1_copy_usable_ref_mv_stack_and_weight(
+ const MACROBLOCKD *xd, MB_MODE_INFO_EXT *const mbmi_ext,
+ MV_REFERENCE_FRAME ref_frame) {
+ memcpy(mbmi_ext->weight[ref_frame], xd->weight[ref_frame],
+ USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0]));
+ memcpy(mbmi_ext->ref_mv_stack[ref_frame], xd->ref_mv_stack[ref_frame],
+ USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0]));
+}
+
#ifdef __cplusplus
} // extern "C"
#endif