Add Lucas-Kanade algorithm to optical flow. Change-Id: I4b60bdb1f946e1eed0aa89118d6f49524c836d32
diff --git a/av1/encoder/optical_flow.c b/av1/encoder/optical_flow.c index e54d276..eed1def 100644 --- a/av1/encoder/optical_flow.c +++ b/av1/encoder/optical_flow.c
@@ -26,6 +26,11 @@ return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; } +// Helper function to determine whether optical flow method is sparse. +static INLINE int is_sparse(const OPFL_PARAMS *opfl_params) { + return (opfl_params->flags & OPFL_FLAG_SPARSE) ? 1 : 0; +} + typedef struct LOCALMV { double row; double col; @@ -469,6 +474,151 @@ } } } + +// Computes optical flow at a single pyramid level, +// using Lucas-Kanade algorithm. +// Modifies mvs array. +void lucas_kanade(const YV12_BUFFER_CONFIG *frame_to_filter, + const YV12_BUFFER_CONFIG *ref_frame, const int level, + const LK_PARAMS *lk_params, const int num_ref_corners, + int *ref_corners, const int highres_frame_width, + const int bit_depth, LOCALMV *mvs) { + assert(lk_params->window_size > 0 && lk_params->window_size % 2 == 0); + const int n = lk_params->window_size; + // algorithm is sensitive to window size + double *i_x = (double *)aom_malloc(n * n * sizeof(double)); + double *i_y = (double *)aom_malloc(n * n * sizeof(double)); + double *i_t = (double *)aom_malloc(n * n * sizeof(double)); + const int expand_multiplier = (int)pow(2, level); + double sigma = 0.2 * n; + double *weights = (double *)aom_malloc(n * n * sizeof(double)); + // normalizing doesn't really affect anything since it's applied + // to every component of M and b + gaussian(sigma, n, 0, weights); + for (int i = 0; i < num_ref_corners; i++) { + const double x_coord = 1.0 * ref_corners[i * 2] / expand_multiplier; + const double y_coord = 1.0 * ref_corners[i * 2 + 1] / expand_multiplier; + int highres_x = ref_corners[i * 2]; + int highres_y = ref_corners[i * 2 + 1]; + int mv_idx = highres_y * (highres_frame_width) + highres_x; + LOCALMV mv_old = mvs[mv_idx]; + mv_old.row = mv_old.row / expand_multiplier; + mv_old.col = mv_old.col / expand_multiplier; + // using this instead of memset, since it's not completely + // clear if zero memset works on double arrays + for (int j = 0; j < n * n; j++) { + i_x[j] = 0; + i_y[j] = 0; + i_t[j] = 0; + } + gradients_over_window(frame_to_filter, ref_frame, x_coord, y_coord, n, + bit_depth, i_x, i_y, i_t, &mv_old); + double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 }; + double bres1[1] = { 0 }, bres2[1] = { 0 }; + for (int j = 0; j < n * n; j++) { + Mres1[0] += weights[j] * i_x[j] * i_x[j]; + Mres2[0] += weights[j] * i_x[j] * i_y[j]; + Mres3[0] += weights[j] * i_y[j] * i_y[j]; + bres1[0] += weights[j] * i_x[j] * i_t[j]; + bres2[0] += weights[j] * i_y[j] * i_t[j]; + } + double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] }; + double b[2] = { -1 * bres1[0], -1 * bres2[0] }; + double eig[2] = { 1, 1 }; + eigenvalues_2x2(M, eig); + double threshold = 0.1; + if (fabs(eig[0]) > threshold) { + // if M is not invertible, then displacement + // will default to zeros + double u[2] = { 0, 0 }; + linsolve(2, M, 2, b, u); + int mult = 1; + if (level != 0) + mult = expand_multiplier; // mv doubles when resolution doubles + LOCALMV mv = { .row = (mult * (u[0] + mv_old.row)), + .col = (mult * (u[1] + mv_old.col)) }; + mvs[mv_idx] = mv; + mvs[mv_idx] = mv; + } + } + aom_free(weights); + aom_free(i_t); + aom_free(i_x); + aom_free(i_y); +} + +// Apply optical flow iteratively at each pyramid level +void pyramid_optical_flow(const YV12_BUFFER_CONFIG *from_frame, + const YV12_BUFFER_CONFIG *to_frame, + const int bit_depth, const OPFL_PARAMS *opfl_params, + const OPTFLOW_METHOD method, LOCALMV *mvs) { + assert(opfl_params->pyramid_levels > 0 && + opfl_params->pyramid_levels <= MAX_PYRAMID_LEVELS); + int levels = opfl_params->pyramid_levels; + const int frame_height = from_frame->y_crop_height; + const int frame_width = from_frame->y_crop_width; + if ((frame_height / pow(2.0, levels - 1) < 50 || + frame_height / pow(2.0, levels - 1) < 50) && + levels > 1) + levels = levels - 1; + uint8_t *images1[MAX_PYRAMID_LEVELS]; + uint8_t *images2[MAX_PYRAMID_LEVELS]; + images1[0] = from_frame->y_buffer; + images2[0] = to_frame->y_buffer; + YV12_BUFFER_CONFIG *buffers1 = + aom_malloc(levels * sizeof(YV12_BUFFER_CONFIG)); + YV12_BUFFER_CONFIG *buffers2 = + aom_malloc(levels * sizeof(YV12_BUFFER_CONFIG)); + buffers1[0] = *from_frame; + buffers2[0] = *to_frame; + int fw = frame_width; + int fh = frame_height; + for (int i = 1; i < levels; i++) { + images1[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(uint8_t)); + images2[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(uint8_t)); + int stride; + if (i == 1) + stride = from_frame->y_stride; + else + stride = fw; + reduce(images1[i - 1], fh, fw, stride, images1[i]); + reduce(images2[i - 1], fh, fw, stride, images2[i]); + fh /= 2; + fw /= 2; + YV12_BUFFER_CONFIG a = { .y_buffer = images1[i], + .y_crop_width = fw, + .y_crop_height = fh, + .y_stride = fw }; + YV12_BUFFER_CONFIG b = { .y_buffer = images2[i], + .y_crop_width = fw, + .y_crop_height = fh, + .y_stride = fw }; + buffers1[i] = a; + buffers2[i] = b; + } + // Compute corners for specific frame + int maxcorners = from_frame->y_crop_width * from_frame->y_crop_height; + int *ref_corners = aom_malloc(maxcorners * 2 * sizeof(int)); + int num_ref_corners = 0; + if (is_sparse(opfl_params)) { + num_ref_corners = detect_corners(from_frame, to_frame, maxcorners, + ref_corners, bit_depth); + } + const int stop_level = 0; + for (int i = levels - 1; i >= stop_level; i--) { + if (method == LUCAS_KANADE) { + assert(is_sparse(opfl_params)); + lucas_kanade(&buffers1[i], &buffers2[i], i, opfl_params->lk_params, + num_ref_corners, ref_corners, buffers1[0].y_crop_width, + bit_depth, mvs); + } + } + for (int i = 1; i < levels; i++) { + aom_free(images1[i]); + aom_free(images2[i]); + } + aom_free(ref_corners); +} // Computes optical flow by applying algorithm at // multiple pyramid levels of images (lower-resolution, smoothed images) // This accounts for larger motions. @@ -490,11 +640,6 @@ const OPFL_PARAMS *opfl_params, const MV_FILTER_TYPE mv_filter, const OPTFLOW_METHOD method, MV *mvs) { - // parameters to be used in later implementations - (void)to_frame; - (void)bit_depth; - (void)opfl_params; - (void)method; const int frame_height = from_frame->y_crop_height; const int frame_width = from_frame->y_crop_width; // TODO(any): deal with the case where frames are not of the same dimensions @@ -520,6 +665,8 @@ localmvs[i] = localmv; } // Apply optical flow algorithm + pyramid_optical_flow(from_frame, to_frame, bit_depth, opfl_params, method, + localmvs); // Update original mvs array for (int j = 0; j < frame_height; j++) {
diff --git a/av1/encoder/optical_flow.h b/av1/encoder/optical_flow.h index 5056af3..9b7cd62 100644 --- a/av1/encoder/optical_flow.h +++ b/av1/encoder/optical_flow.h
@@ -28,9 +28,10 @@ MV_FILTER_MEDIAN } MV_FILTER_TYPE; +#define MAX_PYRAMID_LEVELS 5 // default options for optical flow #define OPFL_WINDOW_SIZE 15 -#define OPFL_PYRAMID_LEVELS 3 // total levels (max is 5) +#define OPFL_PYRAMID_LEVELS 3 // total levels // parameters specific to Lucas-Kanade typedef struct lk_params { @@ -42,8 +43,11 @@ typedef struct opfl_params { int pyramid_levels; LK_PARAMS *lk_params; + int flags; } OPFL_PARAMS; +#define OPFL_FLAG_SPARSE 1 + void init_opfl_params(OPFL_PARAMS *opfl_params) { opfl_params->pyramid_levels = OPFL_PYRAMID_LEVELS; opfl_params->lk_params = NULL;