Clean ups of the subpel search functions
Removes some unused code and speed features, and organizes the
interfaces for fractional mv step functions for use in new speed
features to come.
In the process a new speed feature - number of iterations per
step during the subpel search - is exposed.
No change when this parameter is set as the original value of 3.
Results:
subpel_iters_per_step = 3: baseline
subpel_iters_per_step = 2: psnr -0.067%, 1% speedup
subpel_iters_per_step = 1: psnr -0.331%, 3-4% speedup
Change-Id: I2eba8a21f6461be8caf56af04a5337257a5693a8
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 67b809f..314e195 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -59,7 +59,7 @@
x,
dst_mv, ref_mv,
x->errorperbit, &v_fn_ptr,
- NULL, NULL,
+ 0, cpi->sf.subpel_iters_per_step, NULL, NULL,
& distortion, &sse);
}
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 77d6554..014f54a 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -245,13 +245,15 @@
}, \
v = INT_MAX;)
-int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1) {
+int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1) {
uint8_t *z = x->plane[0].src.buf;
int src_stride = x->plane[0].src.stride;
MACROBLOCKD *xd = &x->e_mbd;
@@ -262,9 +264,9 @@
unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
- unsigned int halfiters = 4;
- unsigned int quarteriters = 4;
- unsigned int eighthiters = 4;
+ unsigned int halfiters = iters_per_step;
+ unsigned int quarteriters = iters_per_step;
+ unsigned int eighthiters = iters_per_step;
int thismse;
int maxc, minc, maxr, minr;
int y_stride;
@@ -289,7 +291,6 @@
tr = br;
tc = bc;
-
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
// central mv
@@ -303,7 +304,7 @@
// TODO: Each subsequent iteration checks at least one point in
// common with the last iteration could be 2 ( if diag selected)
- while (--halfiters) {
+ while (halfiters--) {
// 1/2 pel
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
@@ -337,41 +338,11 @@
// TODO: Each subsequent iteration checks at least one point in common with
// the last iteration could be 2 ( if diag selected) 1/4 pel
- hstep >>= 1;
- while (--quarteriters) {
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
- // no reason to check the same one again.
- if (tr == br && tc == bc)
- break;
-
- tr = br;
- tc = bc;
- }
-
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) {
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
hstep >>= 1;
- while (--eighthiters) {
+ while (quarteriters--) {
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
@@ -402,6 +373,42 @@
tc = bc;
}
}
+
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ forced_stop == 0) {
+ hstep >>= 1;
+ while (eighthiters--) {
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
+
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+ switch (whichdir) {
+ case 0:
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ }
+
+ // no reason to check the same one again.
+ if (tr == br && tc == bc)
+ break;
+
+ tr = br;
+ tc = bc;
+ }
+ }
+
bestmv->as_mv.row = br;
bestmv->as_mv.col = bc;
@@ -418,14 +425,17 @@
vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
z, src_stride, &sse, second_pred)
-int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
- const uint8_t *second_pred, int w, int h) {
+int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
uint8_t *z = x->plane[0].src.buf;
int src_stride = x->plane[0].src.stride;
MACROBLOCKD *xd = &x->e_mbd;
@@ -436,9 +446,9 @@
unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
- unsigned int halfiters = 4;
- unsigned int quarteriters = 4;
- unsigned int eighthiters = 4;
+ unsigned int halfiters = iters_per_step;
+ unsigned int quarteriters = iters_per_step;
+ unsigned int eighthiters = iters_per_step;
int thismse;
int maxc, minc, maxr, minr;
int y_stride;
@@ -485,7 +495,7 @@
// Each subsequent iteration checks at least one point in
// common with the last iteration could be 2 ( if diag selected)
- while (--halfiters) {
+ while (halfiters--) {
// 1/2 pel
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
@@ -519,41 +529,46 @@
// Each subsequent iteration checks at least one point in common with
// the last iteration could be 2 ( if diag selected) 1/4 pel
- hstep >>= 1;
- while (--quarteriters) {
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ while (quarteriters--) {
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+ switch (whichdir) {
+ case 0:
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ }
+
+ // no reason to check the same one again.
+ if (tr == br && tc == bc)
break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
+
+ tr = br;
+ tc = bc;
}
-
- // no reason to check the same one again.
- if (tr == br && tc == bc)
- break;
-
- tr = br;
- tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) {
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ forced_stop == 0) {
hstep >>= 1;
- while (--eighthiters) {
+ while (eighthiters--) {
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
@@ -594,7 +609,6 @@
return besterr;
}
-
#undef MVC
#undef PRE
#undef DIST
@@ -603,627 +617,8 @@
#undef MIN
#undef MAX
-int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1) {
- int bestmse = INT_MAX;
- int_mv startmv;
- int_mv this_mv;
- int_mv orig_mv;
- int yrow_movedback = 0, ycol_movedback = 0;
- uint8_t *z = x->plane[0].src.buf;
- int src_stride = x->plane[0].src.stride;
- int left, right, up, down, diag;
- unsigned int sse;
- int whichdir;
- int thismse;
- int y_stride;
- MACROBLOCKD *xd = &x->e_mbd;
-
- uint8_t *y = xd->plane[0].pre[0].buf +
- (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
- bestmv->as_mv.col;
- y_stride = xd->plane[0].pre[0].stride;
-
- // central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
- startmv = *bestmv;
- orig_mv = *bestmv;
-
- // calculate central point error
- bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
- *distortion = bestmse;
- bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- // go left then right and check error
- this_mv.as_mv.row = startmv.as_mv.row;
- this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
- thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (left < bestmse) {
- *bestmv = this_mv;
- bestmse = left;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.col += 8;
- thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
- right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (right < bestmse) {
- *bestmv = this_mv;
- bestmse = right;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- // go up then down and check error
- this_mv.as_mv.col = startmv.as_mv.col;
- this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
- thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- if (up < bestmse) {
- *bestmv = this_mv;
- bestmse = up;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.row += 8;
- thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (down < bestmse) {
- *bestmv = this_mv;
- bestmse = down;
- *distortion = thismse;
- *sse1 = sse;
- }
-
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- // for(whichdir =0;whichdir<4;whichdir++)
- // {
- this_mv = startmv;
-
- switch (whichdir) {
- case 0:
- this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
- this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
- thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride,
- &sse);
- break;
- case 1:
- this_mv.as_mv.col += 4;
- this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
- thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride,
- &sse);
- break;
- case 2:
- this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
- this_mv.as_mv.row += 4;
- thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse);
- break;
- case 3:
- default:
- this_mv.as_mv.col += 4;
- this_mv.as_mv.row += 4;
- thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse);
- break;
- }
-
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (diag < bestmse) {
- *bestmv = this_mv;
- bestmse = diag;
- *distortion = thismse;
- *sse1 = sse;
- }
-
-// }
-
-
- // time to check quarter pels.
- if (bestmv->as_mv.row < startmv.as_mv.row) {
- y -= y_stride;
- yrow_movedback = 1;
- }
-
- if (bestmv->as_mv.col < startmv.as_mv.col) {
- y--;
- ycol_movedback = 1;
- }
-
- startmv = *bestmv;
-
-
-
- // go left then right and check error
- this_mv.as_mv.row = startmv.as_mv.row;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col = startmv.as_mv.col - 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
- src_stride, &sse);
- }
-
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (left < bestmse) {
- *bestmv = this_mv;
- bestmse = left;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.col += 4;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (right < bestmse) {
- *bestmv = this_mv;
- bestmse = right;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- // go up then down and check error
- this_mv.as_mv.col = startmv.as_mv.col;
-
- if (startmv.as_mv.row & 7) {
- this_mv.as_mv.row = startmv.as_mv.row - 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
- thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6),
- z, src_stride, &sse);
- }
-
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- if (up < bestmse) {
- *bestmv = this_mv;
- bestmse = up;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.row += 4;
- thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
-
- if (down < bestmse) {
- *bestmv = this_mv;
- bestmse = down;
- *distortion = thismse;
- *sse1 = sse;
- }
-
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-// for(whichdir=0;whichdir<4;whichdir++)
-// {
- this_mv = startmv;
-
- switch (whichdir) {
- case 0:
-
- if (startmv.as_mv.row & 7) {
- this_mv.as_mv.row -= 2;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col -= 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - 1, y_stride,
- SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse);
- }
- } else {
- this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col -= 2;
- thismse = vfp->svf(y - y_stride, y_stride,
- SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - y_stride - 1, y_stride,
- SP(6), SP(6), z, src_stride, &sse);
- }
- }
-
- break;
- case 1:
- this_mv.as_mv.col += 2;
-
- if (startmv.as_mv.row & 7) {
- this_mv.as_mv.row -= 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
- thismse = vfp->svf(y - y_stride, y_stride,
- SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse);
- }
-
- break;
- case 2:
- this_mv.as_mv.row += 2;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col -= 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
- src_stride, &sse);
- }
-
- break;
- case 3:
- this_mv.as_mv.col += 2;
- this_mv.as_mv.row += 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- break;
- }
-
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (diag < bestmse) {
- *bestmv = this_mv;
- bestmse = diag;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)))
- return bestmse;
-
- /* Now do 1/8th pixel */
- if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) {
- y -= y_stride;
- yrow_movedback = 1;
- }
-
- if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) {
- y--;
- ycol_movedback = 1;
- }
-
- startmv = *bestmv;
-
- // go left then right and check error
- this_mv.as_mv.row = startmv.as_mv.row;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col = startmv.as_mv.col - 1;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
- thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- }
-
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (left < bestmse) {
- *bestmv = this_mv;
- bestmse = left;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.col += 2;
- thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (right < bestmse) {
- *bestmv = this_mv;
- bestmse = right;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- // go up then down and check error
- this_mv.as_mv.col = startmv.as_mv.col;
-
- if (startmv.as_mv.row & 7) {
- this_mv.as_mv.row = startmv.as_mv.row - 1;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
- thismse = vfp->svf(y - y_stride, y_stride,
- SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
- }
-
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- if (up < bestmse) {
- *bestmv = this_mv;
- bestmse = up;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.row += 2;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (down < bestmse) {
- *bestmv = this_mv;
- bestmse = down;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-// for(whichdir=0;whichdir<4;whichdir++)
-// {
- this_mv = startmv;
-
- switch (whichdir) {
- case 0:
-
- if (startmv.as_mv.row & 7) {
- this_mv.as_mv.row -= 1;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col -= 1;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
- thismse = vfp->svf(y - 1, y_stride,
- SP(7), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- }
- } else {
- this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col -= 1;
- thismse = vfp->svf(y - y_stride, y_stride,
- SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
- thismse = vfp->svf(y - y_stride - 1, y_stride,
- SP(7), SP(7), z, src_stride, &sse);
- }
- }
-
- break;
- case 1:
- this_mv.as_mv.col += 1;
-
- if (startmv.as_mv.row & 7) {
- this_mv.as_mv.row -= 1;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
- thismse = vfp->svf(y - y_stride, y_stride,
- SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
- }
-
- break;
- case 2:
- this_mv.as_mv.row += 1;
-
- if (startmv.as_mv.col & 7) {
- this_mv.as_mv.col -= 1;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- } else {
- this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
- thismse = vfp->svf(y - 1, y_stride,
- SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse);
- }
-
- break;
- case 3:
- this_mv.as_mv.col += 1;
- this_mv.as_mv.row += 1;
- thismse = vfp->svf(y, y_stride,
- SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
- z, src_stride, &sse);
- break;
- }
-
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (diag < bestmse) {
- *bestmv = this_mv;
- bestmse = diag;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- return bestmse;
-}
-
#undef SP
-int vp9_find_best_half_pixel_step(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1) {
- int bestmse = INT_MAX;
- int_mv startmv;
- int_mv this_mv;
- uint8_t *z = x->plane[0].src.buf;
- int src_stride = x->plane[0].src.stride;
- int left, right, up, down, diag;
- unsigned int sse;
- int whichdir;
- int thismse;
- int y_stride;
- MACROBLOCKD *xd = &x->e_mbd;
-
- uint8_t *y = xd->plane[0].pre[0].buf +
- (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col;
- y_stride = xd->plane[0].pre[0].stride;
-
- // central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
- startmv = *bestmv;
-
- // calculate central point error
- bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
- *distortion = bestmse;
- bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- // go left then right and check error
- this_mv.as_mv.row = startmv.as_mv.row;
- this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
- thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
- left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (left < bestmse) {
- *bestmv = this_mv;
- bestmse = left;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.col += 8;
- thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
- right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (right < bestmse) {
- *bestmv = this_mv;
- bestmse = right;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- // go up then down and check error
- this_mv.as_mv.col = startmv.as_mv.col;
- this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
- thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
- up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- if (up < bestmse) {
- *bestmv = this_mv;
- bestmse = up;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- this_mv.as_mv.row += 8;
- thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
- down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (down < bestmse) {
- *bestmv = this_mv;
- bestmse = down;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- // now check 1 more diagonal -
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- this_mv = startmv;
-
- switch (whichdir) {
- case 0:
- this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
- this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
- thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride,
- z, src_stride, &sse);
- break;
- case 1:
- this_mv.as_mv.col += 4;
- this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
- thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride,
- z, src_stride, &sse);
- break;
- case 2:
- this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
- this_mv.as_mv.row += 4;
- thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse);
- break;
- case 3:
- default:
- this_mv.as_mv.col += 4;
- this_mv.as_mv.row += 4;
- thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse);
- break;
- }
-
- diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (diag < bestmse) {
- *bestmv = this_mv;
- bestmse = diag;
- *distortion = thismse;
- *sse1 = sse;
- }
-
- return bestmse;
-}
-
#define CHECK_BOUNDS(range) \
{\
all_in = 1;\
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 5d0c0e8..b91e6fd 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -67,12 +67,19 @@
int_mv *center_mv,
int_mv *best_mv);
-typedef int (fractional_mv_step_fp) (MACROBLOCK *x, int_mv
- *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse);
-extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively;
-extern fractional_mv_step_fp vp9_find_best_sub_pixel_step;
-extern fractional_mv_step_fp vp9_find_best_half_pixel_step;
+typedef int (fractional_mv_step_fp) (
+ MACROBLOCK *x,
+ int_mv *bestmv,
+ int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
+ int iters_per_step,
+ int *mvjcost,
+ int *mvcost[2],
+ int *distortion,
+ unsigned int *sse);
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative;
typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x,
int_mv *ref_mv, int sad_per_bit,
@@ -95,14 +102,17 @@
int *mvjcost, int *mvcost[2],
int_mv *center_mv);
-int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred,
- int w, int h);
+int vp9_find_best_sub_pixel_comp_iterative(
+ MACROBLOCK *x,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h);
int vp9_refining_search_8p_c(MACROBLOCK *x,
int_mv *ref_mv, int error_per_bit,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index d1db910..cf5ae52 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -713,9 +713,8 @@
sf->search_method = NSTEP;
sf->auto_filter = 1;
sf->recode_loop = 1;
- sf->quarter_pixel_search = 1;
- sf->half_pixel_search = 1;
- sf->iterative_sub_pixel = 1;
+ sf->subpel_search_method = SUBPEL_ITERATIVE;
+ sf->subpel_iters_per_step = 3;
sf->optimize_coefficients = !cpi->oxcf.lossless;
sf->reduce_first_step_size = 0;
sf->auto_mv_step_size = 0;
@@ -831,6 +830,7 @@
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
sf->auto_mv_step_size = 1;
sf->search_method = SQUARE;
+ sf->subpel_iters_per_step = 2;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -851,6 +851,7 @@
sf->disable_splitmv = 1;
sf->auto_mv_step_size = 1;
sf->search_method = BIGDIA;
+ sf->subpel_iters_per_step = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -875,6 +876,7 @@
sf->disable_splitmv = 1;
sf->search_method = HEX;
+ sf->subpel_iters_per_step = 1;
}
/*
if (speed == 2) {
@@ -918,12 +920,11 @@
cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
- if (cpi->sf.iterative_sub_pixel == 1) {
- cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively;
- } else if (cpi->sf.quarter_pixel_search) {
- cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step;
- } else if (cpi->sf.half_pixel_search) {
- cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step;
+ if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative;
+ } else {
+ // TODO(debargha): Other methods to come
+ assert(0);
}
cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 7eae700..1249107 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -232,14 +232,18 @@
FLAG_SKIP_INTRA_LOWVAR = 32,
} MODE_SEARCH_SKIP_LOGIC;
+typedef enum {
+ SUBPEL_ITERATIVE = 0,
+ // Other methods to come
+} SUBPEL_SEARCH_METHODS;
+
typedef struct {
int RD;
SEARCH_METHODS search_method;
int auto_filter;
int recode_loop;
- int iterative_sub_pixel;
- int half_pixel_search;
- int quarter_pixel_search;
+ SUBPEL_SEARCH_METHODS subpel_search_method;
+ int subpel_iters_per_step;
int thresh_mult[MAX_MODES];
int max_step_search_steps;
int reduce_first_step_size;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 238c981..90d35f8 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1967,6 +1967,7 @@
unsigned int sse;
cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ 0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&distortion, &sse);
@@ -2547,6 +2548,7 @@
cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
+ 0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
@@ -2673,13 +2675,15 @@
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
- &ref_mv[id],
- x->errorperbit,
- &cpi->fn_ptr[block_size],
- x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
- pw, ph);
+ bestsme = vp9_find_best_sub_pixel_comp_iterative(
+ x, &tmp_mv,
+ &ref_mv[id],
+ x->errorperbit,
+ &cpi->fn_ptr[block_size],
+ 0, cpi->sf.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred,
+ pw, ph);
}
if (id)
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 11d601b..3052e8f 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -170,6 +170,7 @@
&best_ref_mv1,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
+ 0, cpi->sf.subpel_iters_per_step,
NULL, NULL,
&distortion, &sse);
}