| /* |
| * Copyright (c) 2021, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 3-Clause Clear License |
| * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear |
| * License was not distributed with this source code in the LICENSE file, you |
| * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the |
| * Alliance for Open Media Patent License 1.0 was not distributed with this |
| * source code in the PATENTS file, you can obtain it at |
| * aomedia.org/license/patent-license/. |
| */ |
| |
| // NOTE: To build this utility in libaom please configure and build with |
| // -DCONFIG_TENSORFLOW_LITE=1 -DENABLE_EXAMPLES=1 cmake flag. |
| |
| #include <cstdio> |
| #include <memory> |
| #include <vector> |
| |
| #include "common/tf_lite_includes.h" |
| #include "av1/common/resize.h" |
| #include "common/tools_common.h" |
| |
| #define Y4M_HDR_MAX_LEN 256 |
| #define Y4M_HDR_MAX_WORDS 16 |
| #define NUM_THREADS 8 |
| #define USE_XNNPACK 1 |
| |
| // TODO(any): Check why dynamic resizing does not work |
| #define USE_DYNAMIC_MODEL 0 |
| |
| #define MAX(a, b) ((a) < (b) ? (b) : (a)) |
| |
| // Usage: |
| // deep_flow_y4m |
| // <y4m_input> |
| // <frame_1> |
| // <frame_2> |
| // <flowx_output> |
| // <flowy_output> |
| |
| namespace { |
| |
| #if USE_DYNAMIC_MODEL |
| #include "examples/deep_flow/pwcnet_l7_no_dense_s4_static_autoflow_ft_dynamic_16f.h" |
| #define MODEL_DATA pwcnet_l7_no_dense_s4_static_autoflow_ft_dynamic_16f |
| #else |
| #include "examples/deep_flow/pwcnet_l7_no_dense_s4_static_autoflow_ft_384x512_16f.h" |
| #define MODEL_DATA pwcnet_l7_no_dense_s4_static_autoflow_ft_384x512_16f |
| #endif // USE_DYNAMIC_MODEL |
| |
| // Tensor indices for the Resampler custom op. |
| constexpr int kInputTensorSourceIndex = 0; |
| constexpr int kInputTensorWarpIndex = 1; |
| constexpr int kOutputTensorDestinationIndex = 0; |
| |
| const std::string kImage1_tensor_name = "serving_default_image0:0"; |
| const std::string kImage2_tensor_name = "serving_default_image1:0"; |
| const std::string kOutput_tensor_name = "StatefulPartitionedCall:6"; |
| |
| // A Prepare function for the Resampler custom op. |
| // Checks dimensions and types of inputs and outputs of the node. |
| TfLiteStatus ResamplerPrepare(TfLiteContext *context, TfLiteNode *node) { |
| TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2); |
| TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1); |
| |
| const TfLiteTensor *source = |
| ::tflite::GetInput(context, node, kInputTensorSourceIndex); |
| TF_LITE_ENSURE(context, source != nullptr); |
| TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(source), 4); |
| TF_LITE_ENSURE_EQ(context, source->type, kTfLiteFloat32); |
| |
| const TfLiteTensor *warp = |
| ::tflite::GetInput(context, node, kInputTensorWarpIndex); |
| TF_LITE_ENSURE(context, warp != nullptr); |
| TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(warp), 4); |
| TF_LITE_ENSURE_EQ(context, warp->type, kTfLiteFloat32); |
| TF_LITE_ENSURE_EQ(context, warp->dims->data[3], 2); |
| |
| TfLiteTensor *output = |
| ::tflite::GetOutput(context, node, kOutputTensorDestinationIndex); |
| TF_LITE_ENSURE(context, output != nullptr); |
| TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); |
| TfLiteIntArray *output_size = TfLiteIntArrayCreate(4); |
| output_size->data[0] = source->dims->data[0]; |
| output_size->data[1] = source->dims->data[1]; |
| output_size->data[2] = source->dims->data[2]; |
| output_size->data[3] = source->dims->data[3]; |
| if (context->ResizeTensor(context, output, output_size) != kTfLiteOk) { |
| return kTfLiteError; |
| } |
| return kTfLiteOk; |
| } |
| |
| static void remap_pixel_bilinear(float *dst, float *src, float map_x, |
| float map_y, int d, int width, int height) { |
| int x0 = (int)floor(map_x); |
| int y0 = (int)floor(map_y); |
| int x1 = x0 + 1; |
| int y1 = y0 + 1; |
| float alphax = map_x - x0; |
| float alphay = map_y - y0; |
| float m00 = (float)(1.0 - alphax) * (float)(1.0 - alphay); |
| float m01 = (float)(1.0 - alphax) * (alphay); |
| float m10 = (alphax) * (float)(1.0 - alphay); |
| float m11 = (alphax) * (alphay); |
| x0 = (x0 < 0 ? 0 : x0 >= width ? width - 1 : x0); |
| y0 = (y0 < 0 ? 0 : y0 >= height ? height - 1 : y0); |
| x1 = (x1 < 0 ? 0 : x1 >= width ? width - 1 : x1); |
| y1 = (y1 < 0 ? 0 : y1 >= height ? height - 1 : y1); |
| for (int c = 0; c < d; ++c) { |
| const float v00 = src[y0 * width * d + x0 * d + c]; |
| const float v10 = src[y0 * width * d + x1 * d + c]; |
| const float v01 = src[y1 * width * d + x0 * d + c]; |
| const float v11 = src[y1 * width * d + x1 * d + c]; |
| dst[c] = m00 * v00 + m10 * v10 + m01 * v01 + m11 * v11; |
| } |
| } |
| |
| // Eval function for the custom Resampler op. |
| TfLiteStatus ResamplerEval(TfLiteContext *context, TfLiteNode *node) { |
| const TfLiteTensor *src = |
| ::tflite::GetInput(context, node, kInputTensorSourceIndex); |
| const TfLiteTensor *warp = |
| ::tflite::GetInput(context, node, kInputTensorWarpIndex); |
| const TfLiteTensor *dst = |
| ::tflite::GetOutput(context, node, kOutputTensorDestinationIndex); |
| TF_LITE_ENSURE(context, src != nullptr); |
| TF_LITE_ENSURE(context, warp != nullptr); |
| TF_LITE_ENSURE(context, dst != nullptr); |
| float *src_data = reinterpret_cast<float *>(src->data.data); |
| float *warp_data = reinterpret_cast<float *>(warp->data.data); |
| float *dst_data = reinterpret_cast<float *>(dst->data.data); |
| |
| const int b = src->dims->data[0]; |
| const int h = src->dims->data[1]; |
| const int w = src->dims->data[2]; |
| const int d = src->dims->data[3]; |
| for (int batch = 0; batch < b; ++batch) { |
| const size_t data_offset = h * w * d * batch; |
| const int warp_offset = h * w * 2 * batch; |
| float *src_batch = src_data + data_offset; |
| float *dst_batch = dst_data + data_offset; |
| float *warp_batch = warp_data + warp_offset; |
| for (int i = 0; i < h * w; ++i) { |
| remap_pixel_bilinear(dst_batch, src_batch, warp_batch[0], warp_batch[1], |
| d, w, h); |
| dst_batch += d; |
| warp_batch += 2; |
| } |
| } |
| return kTfLiteOk; |
| } |
| |
| // Custom operation implementation. |
| TfLiteRegistration *ResamplerOp() { |
| static TfLiteRegistration reg = { |
| /*.init=*/ |
| [](TfLiteContext *, const char *, size_t) -> void * { |
| return new TfLitePaddingValues(); |
| }, |
| /*.free=*/ |
| [](TfLiteContext *, void *buffer) -> void { |
| delete reinterpret_cast<TfLitePaddingValues *>(buffer); |
| }, |
| /*.prepare=*/ResamplerPrepare, |
| /*.invoke=*/ResamplerEval, |
| /*.profiling_string=*/nullptr, |
| /*.builtin_code=*/0, |
| /*.custom_name=*/"Resampler.", 0 |
| }; |
| return ® |
| } |
| |
| static TfLiteDelegate *get_tflite_xnnpack_delegate(int num_threads) { |
| TfLiteXNNPackDelegateOptions xnnpack_options = |
| TfLiteXNNPackDelegateOptionsDefault(); |
| xnnpack_options.num_threads = MAX(num_threads, 1); |
| return TfLiteXNNPackDelegateCreate(&xnnpack_options); |
| } |
| |
| static void get_input_tensor_indices( |
| std::unique_ptr<tflite::Interpreter> &interpreter, int *image1_tensor_index, |
| int *image2_tensor_index) { |
| *image1_tensor_index = -1; |
| *image2_tensor_index = -1; |
| for (int i = 0; i < (int)interpreter->inputs().size(); ++i) { |
| const auto name = interpreter->GetInputName(i); |
| if (name == kImage1_tensor_name) { |
| *image1_tensor_index = i; |
| } else if (name == kImage2_tensor_name) { |
| *image2_tensor_index = i; |
| } |
| } |
| } |
| |
| static void get_output_tensor_index( |
| std::unique_ptr<tflite::Interpreter> &interpreter, |
| int *output_tensor_index) { |
| *output_tensor_index = -1; |
| for (int i = 0; i < (int)interpreter->outputs().size(); ++i) { |
| const auto name = interpreter->GetOutputName(i); |
| if (name == kOutput_tensor_name) { |
| *output_tensor_index = i; |
| } |
| } |
| } |
| |
| // Builds and returns the TFlite interpreter. |
| static std::unique_ptr<tflite::Interpreter> get_tflite_interpreter( |
| int width, int height, int num_threads, TfLiteDelegate *xnnpack_delegate) { |
| (void)width; |
| (void)height; |
| const unsigned char *const model_tflite_data = MODEL_DATA; |
| |
| auto model = tflite::GetModel(model_tflite_data); |
| if (model == nullptr) return nullptr; |
| |
| tflite::ops::builtin::BuiltinOpResolver resolver; |
| resolver.AddCustom("Resampler", ResamplerOp()); |
| tflite::InterpreterBuilder builder(model, resolver); |
| std::unique_ptr<tflite::Interpreter> interpreter; |
| builder(&interpreter); |
| |
| tflite::ErrorReporter *reporter = tflite::DefaultErrorReporter(); |
| |
| if (xnnpack_delegate) { |
| if (interpreter->ModifyGraphWithDelegate(xnnpack_delegate) != kTfLiteOk) { |
| reporter->Report("Failed at modifying graph with XNNPack delegate"); |
| return nullptr; |
| } |
| } |
| #if USE_DYNAMIC_MODEL |
| // We only need to resize the input tensors. All other tensors (including |
| // output tensor) will be resized automatically. |
| // Dimension order: batch_size, height, width, num_channels. |
| // Note: height comes before width here! |
| const std::vector<int> in_dims = { 1, height, width, 3 }; |
| int image1_tensor_index = -1; |
| int image2_tensor_index = -1; |
| get_input_tensor_indices(interpreter, &image1_tensor_index, |
| &image2_tensor_index); |
| printf("input indices %d %d\n", image1_tensor_index, image2_tensor_index); |
| if (interpreter->ResizeInputTensor(interpreter->inputs()[image1_tensor_index], |
| in_dims) != kTfLiteOk) { |
| reporter->Report("Failed at input tensor resize"); |
| return nullptr; |
| } |
| if (interpreter->ResizeInputTensor(interpreter->inputs()[image2_tensor_index], |
| in_dims) != kTfLiteOk) { |
| reporter->Report("Failed at input tensor resize"); |
| return nullptr; |
| } |
| #endif // USE_DYNAMIC_MODEL |
| |
| if (interpreter->AllocateTensors() != kTfLiteOk) { |
| reporter->Report("Failed at tensor allocation"); |
| return nullptr; |
| } |
| interpreter->SetNumThreads(MAX(num_threads, 1)); |
| return interpreter; |
| } |
| |
| } // namespace |
| |
| static void bilinear_interp_lowbd(uint8_t *dst, int dst_stride, uint8_t *src, |
| int src_stride, double *flow_x, |
| double *flow_y, int flow_stride, int width, |
| int height) { |
| for (int i = 0; i < height; ++i) { |
| for (int j = 0; j < width; ++j) { |
| const double vx = flow_x[i * flow_stride + j]; |
| const double vy = flow_y[i * flow_stride + j]; |
| int x0 = j + (int)floor(vx); |
| int y0 = i + (int)floor(vy); |
| int x1 = x0 + 1; |
| int y1 = y0 + 1; |
| double alphax = j + vx - x0; |
| double alphay = i + vy - y0; |
| double m00 = (1.0 - alphax) * (1.0 - alphay); |
| double m01 = (1.0 - alphax) * (alphay); |
| double m10 = (alphax) * (1.0 - alphay); |
| double m11 = (alphax) * (alphay); |
| x0 = (x0 < 0 ? 0 : x0 >= width ? width - 1 : x0); |
| y0 = (y0 < 0 ? 0 : y0 >= height ? height - 1 : y0); |
| x1 = (x1 < 0 ? 0 : x1 >= width ? width - 1 : x1); |
| y1 = (y1 < 0 ? 0 : y1 >= height ? height - 1 : y1); |
| const int v00 = src[y0 * src_stride + x0]; |
| const int v10 = src[y0 * src_stride + x1]; |
| const int v01 = src[y1 * src_stride + x0]; |
| const int v11 = src[y1 * src_stride + x1]; |
| const int v = (int)rint(m00 * v00 + m10 * v10 + m01 * v01 + m11 * v11); |
| dst[i * dst_stride + j] = clip_pixel(v); |
| } |
| } |
| } |
| |
| static void bilinear_interp_highbd(uint16_t *dst, int dst_stride, uint16_t *src, |
| int src_stride, double *flow_x, |
| double *flow_y, int flow_stride, int width, |
| int height, int bd) { |
| for (int i = 0; i < height; ++i) { |
| for (int j = 0; j < width; ++j) { |
| const double vx = flow_x[i * flow_stride + j]; |
| const double vy = flow_y[i * flow_stride + j]; |
| int x0 = j + (int)floor(vx); |
| int y0 = i + (int)floor(vy); |
| int x1 = x0 + 1; |
| int y1 = y0 + 1; |
| double alphax = j + vx - x0; |
| double alphay = i + vy - y0; |
| double m00 = (1.0 - alphax) * (1.0 - alphay); |
| double m01 = (1.0 - alphax) * (alphay); |
| double m10 = (alphax) * (1.0 - alphay); |
| double m11 = (alphax) * (alphay); |
| x0 = (x0 < 0 ? 0 : x0 >= width ? width - 1 : x0); |
| y0 = (y0 < 0 ? 0 : y0 >= height ? height - 1 : y0); |
| x1 = (x1 < 0 ? 0 : x1 >= width ? width - 1 : x1); |
| y1 = (y1 < 0 ? 0 : y1 >= height ? height - 1 : y1); |
| const int v00 = src[y0 * src_stride + x0]; |
| const int v10 = src[y0 * src_stride + x1]; |
| const int v01 = src[y1 * src_stride + x0]; |
| const int v11 = src[y1 * src_stride + x1]; |
| const int v = (int)rint(m00 * v00 + m10 * v10 + m01 * v01 + m11 * v11); |
| dst[i * dst_stride + j] = clip_pixel_highbd(v, bd); |
| } |
| } |
| } |
| |
| static double compute_mse_lowbd(uint8_t *src, int src_stride, uint8_t *rec, |
| int rec_stride, int width, int height) { |
| uint64_t sse = 0; |
| for (int i = 0; i < height; ++i) { |
| for (int j = 0; j < width; ++j) { |
| const int diff = src[i * src_stride + j] - rec[i * rec_stride + j]; |
| sse += diff * diff; |
| } |
| } |
| return (double)sse / (width * height); |
| } |
| |
| static double compute_mse_highbd(uint16_t *src, int src_stride, uint16_t *rec, |
| int rec_stride, int width, int height) { |
| uint64_t sse = 0; |
| for (int i = 0; i < height; ++i) { |
| for (int j = 0; j < width; ++j) { |
| const int diff = src[i * src_stride + j] - rec[i * rec_stride + j]; |
| sse += diff * diff; |
| } |
| } |
| return (double)sse / (width * height); |
| } |
| |
| static void usage_and_exit(char *prog) { |
| printf("Usage:\n"); |
| printf(" %s\n", prog); |
| printf(" <y4m_input>\n"); |
| printf(" <frame_1>\n"); |
| printf(" <frame_2>\n"); |
| printf(" <flow_x_output>\n"); |
| printf(" <flow_y_output>\n"); |
| printf(" \n"); |
| exit(EXIT_FAILURE); |
| } |
| |
| static int split_words(char *buf, char delim, int nmax, char **words) { |
| char *y = buf; |
| char *x; |
| int n = 0; |
| while ((x = strchr(y, delim)) != NULL) { |
| *x = 0; |
| words[n++] = y; |
| if (n == nmax) return n; |
| y = x + 1; |
| } |
| words[n++] = y; |
| assert(n > 0 && n <= nmax); |
| return n; |
| } |
| |
| static int parse_info(char *hdrwords[], int nhdrwords, int *width, int *height, |
| int *bitdepth, int *subx, int *suby) { |
| *bitdepth = 8; |
| *subx = 1; |
| *suby = 1; |
| if (nhdrwords < 4) return 0; |
| if (strcmp(hdrwords[0], "YUV4MPEG2")) return 0; |
| if (sscanf(hdrwords[1], "W%d", width) != 1) return 0; |
| if (sscanf(hdrwords[2], "H%d", height) != 1) return 0; |
| if (hdrwords[3][0] != 'F') return 0; |
| for (int i = 4; i < nhdrwords; ++i) { |
| if (!strncmp(hdrwords[i], "C420", 4)) { |
| *subx = 1; |
| *suby = 1; |
| if (hdrwords[i][4] == 'p') *bitdepth = atoi(&hdrwords[i][5]); |
| } else if (!strncmp(hdrwords[i], "C422", 4)) { |
| *subx = 1; |
| *suby = 0; |
| if (hdrwords[i][4] == 'p') *bitdepth = atoi(&hdrwords[i][5]); |
| } else if (!strncmp(hdrwords[i], "C444", 4)) { |
| *subx = 0; |
| *suby = 0; |
| if (hdrwords[i][4] == 'p') *bitdepth = atoi(&hdrwords[i][5]); |
| } |
| } |
| return 1; |
| } |
| |
| // Populates the tensor with a resized and normalized version of image. |
| static int fill_input_tensor_lowbd(const uint8_t *image, int width, int height, |
| int stride, TfLiteTensor *tensor) { |
| if (tensor->type != kTfLiteFloat32) { |
| printf("Expected float32 inputs.\n"); |
| return 0; |
| } |
| if (tensor->dims->size != 4) { |
| printf("Expected 4 dimensional inputs.\n"); |
| return 0; |
| } |
| if (tensor->dims->data[0] != 1) { |
| printf("Expected batch size 1.\n"); |
| return 0; |
| } |
| if (tensor->dims->data[3] != 3) { |
| printf("Expected RGB inputs.\n"); |
| return 0; |
| } |
| |
| const int h = tensor->dims->data[1]; |
| const int w = tensor->dims->data[2]; |
| uint8_t *image_resized = (uint8_t *)malloc(h * w * sizeof(*image_resized)); |
| av1_resize_plane(image, height, width, stride, image_resized, h, w, w); |
| |
| float *data = static_cast<float *>(tensor->data.data); |
| constexpr float kScale = 1 / 127.5f; |
| constexpr float kOffset = -1.f; |
| for (int y = 0; y < h; ++y) { |
| for (int x = 0; x < w; ++x) { |
| const float v = image_resized[y * w + x] * kScale + kOffset; |
| *data++ = v; |
| *data++ = v; |
| *data++ = v; |
| } |
| } |
| free(image_resized); |
| return 1; |
| } |
| |
| static int fill_input_tensor_highbd(const uint16_t *image, int width, |
| int height, int stride, |
| TfLiteTensor *tensor, int bit_depth) { |
| if (tensor->type != kTfLiteFloat32) { |
| printf("Expected float32 inputs.\n"); |
| return 0; |
| } |
| if (tensor->dims->size != 4) { |
| printf("Expected 4 dimensional inputs.\n"); |
| return 0; |
| } |
| if (tensor->dims->data[0] != 1) { |
| printf("Expected batch size 1.\n"); |
| return 0; |
| } |
| if (tensor->dims->data[3] != 3) { |
| printf("Expected RGB inputs.\n"); |
| return 0; |
| } |
| |
| const int h = tensor->dims->data[1]; |
| const int w = tensor->dims->data[2]; |
| uint16_t *image_resized = (uint16_t *)malloc(h * w * sizeof(*image_resized)); |
| av1_highbd_resize_plane(image, height, width, stride, image_resized, h, w, w, |
| bit_depth); |
| |
| float *data = static_cast<float *>(tensor->data.data); |
| const float kScale = (float)2.0 / ((1 << bit_depth) - 1); |
| const float kOffset = -1.f; |
| for (int y = 0; y < h; ++y) { |
| for (int x = 0; x < w; ++x) { |
| const float v = image_resized[y * w + x] * kScale + kOffset; |
| *data++ = v; |
| *data++ = v; |
| *data++ = v; |
| } |
| } |
| free(image_resized); |
| return 1; |
| } |
| |
| static int extract_output_flow(TfLiteTensor *tensor, double *flow_x, |
| double *flow_y, int width, int height, |
| int flow_stride) { |
| static const double output_scale = 20.f; |
| if (tensor->type != kTfLiteFloat32) { |
| fprintf(stderr, "Expected float32 output.\n"); |
| return 0; |
| } |
| if (tensor->dims->size != 4) { |
| fprintf(stderr, "Expected 4 dimensional output.\n"); |
| return 0; |
| } |
| if (tensor->dims->data[0] != 1) { |
| fprintf(stderr, "Expected batch size 1.\n"); |
| return 0; |
| } |
| if (tensor->dims->data[3] != 2) { |
| fprintf(stderr, "Expected 2-channel output.\n"); |
| return 0; |
| } |
| |
| const int h = tensor->dims->data[1]; |
| const int w = tensor->dims->data[2]; |
| const float *tensor_data = static_cast<float *>(tensor->data.data); |
| |
| double *flow_x_model_res = |
| (double *)malloc(h * w * sizeof(*flow_x_model_res)); |
| double *flow_y_model_res = |
| (double *)malloc(h * w * sizeof(*flow_y_model_res)); |
| for (int y = 0; y < h; ++y) { |
| for (int x = 0; x < w; ++x) { |
| flow_x_model_res[y * w + x] = (double)*tensor_data++; |
| flow_y_model_res[y * w + x] = (double)*tensor_data++; |
| } |
| } |
| printf("Computed flow at size %dx%d\n", w, h); |
| |
| av1_resize_plane_double(flow_x_model_res, h, w, w, flow_x, height, width, |
| flow_stride); |
| av1_resize_plane_double(flow_y_model_res, h, w, w, flow_y, height, width, |
| flow_stride); |
| const double x_scale = output_scale * static_cast<double>(width) / w; |
| const double y_scale = output_scale * static_cast<double>(height) / h; |
| for (int i = 0; i < height; ++i) { |
| for (int j = 0; j < width; ++j) { |
| flow_x[i * flow_stride + j] *= x_scale; |
| flow_y[i * flow_stride + j] *= y_scale; |
| } |
| } |
| printf("Resized flow to size %dx%d\n", width, height); |
| return 1; |
| } |
| |
| static int deep_flow_img_tflite_lowbd(const uint8_t *src, int width, int height, |
| int src_stride, const uint8_t *ref, |
| int ref_stride, double *flow_x, |
| double *flow_y, int flow_stride) { |
| static const int use_xnnpack = USE_XNNPACK; |
| |
| TfLiteDelegate *xnnpack_delegate = |
| use_xnnpack ? get_tflite_xnnpack_delegate(NUM_THREADS) : nullptr; |
| std::unique_ptr<tflite::Interpreter> interpreter = |
| get_tflite_interpreter(width, height, NUM_THREADS, xnnpack_delegate); |
| if (interpreter == nullptr) return 0; |
| |
| int image1_tensor_index = -1; |
| int image2_tensor_index = -1; |
| get_input_tensor_indices(interpreter, &image1_tensor_index, |
| &image2_tensor_index); |
| int output_tensor_index = -1; |
| get_output_tensor_index(interpreter, &output_tensor_index); |
| |
| // Prepare input. |
| if (!fill_input_tensor_lowbd( |
| src, width, height, src_stride, |
| interpreter->input_tensor(image1_tensor_index))) { |
| fprintf(stderr, "Could not load image1 input tensor.\n"); |
| return 0; |
| } |
| if (!fill_input_tensor_lowbd( |
| ref, width, height, ref_stride, |
| interpreter->input_tensor(image2_tensor_index))) { |
| fprintf(stderr, "Could not load image2 input tensor.\n"); |
| return 0; |
| } |
| |
| // Invoke TFlite inference. |
| tflite::ErrorReporter *reporter = tflite::DefaultErrorReporter(); |
| auto status = interpreter->Invoke(); |
| if (status != kTfLiteOk) { |
| reporter->Report("Failed at interpreter invocation"); |
| return 0; |
| } |
| |
| if (!extract_output_flow(interpreter->output_tensor(output_tensor_index), |
| flow_x, flow_y, width, height, flow_stride)) { |
| fprintf(stderr, "Could not extract output flow tensor.\n"); |
| return 0; |
| } |
| // IMPORTANT: release the interpreter before destroying the delegate. |
| interpreter.reset(); |
| if (xnnpack_delegate) TfLiteXNNPackDelegateDelete(xnnpack_delegate); |
| |
| return 1; |
| } |
| |
| static int deep_flow_img_tflite_highbd(const uint16_t *src, int width, |
| int height, int src_stride, |
| const uint16_t *ref, int ref_stride, |
| double *flow_x, double *flow_y, |
| int flow_stride, int bit_depth) { |
| static const int use_xnnpack = USE_XNNPACK; |
| |
| TfLiteDelegate *xnnpack_delegate = |
| use_xnnpack ? get_tflite_xnnpack_delegate(NUM_THREADS) : nullptr; |
| std::unique_ptr<tflite::Interpreter> interpreter = |
| get_tflite_interpreter(width, height, NUM_THREADS, xnnpack_delegate); |
| |
| if (interpreter == nullptr) return 0; |
| |
| int image1_tensor_index = -1; |
| int image2_tensor_index = -1; |
| get_input_tensor_indices(interpreter, &image1_tensor_index, |
| &image2_tensor_index); |
| int output_tensor_index = -1; |
| get_output_tensor_index(interpreter, &output_tensor_index); |
| |
| // Prepare input. |
| if (!fill_input_tensor_highbd(src, width, height, src_stride, |
| interpreter->input_tensor(image1_tensor_index), |
| bit_depth)) { |
| fprintf(stderr, "Could not load image1 input tensor.\n"); |
| return 0; |
| } |
| if (!fill_input_tensor_highbd(ref, width, height, ref_stride, |
| interpreter->input_tensor(image2_tensor_index), |
| bit_depth)) { |
| fprintf(stderr, "Could not load image2 input tensor.\n"); |
| return 0; |
| } |
| |
| // Invoke TFlite inference. |
| tflite::ErrorReporter *reporter = tflite::DefaultErrorReporter(); |
| auto status = interpreter->Invoke(); |
| if (status != kTfLiteOk) { |
| reporter->Report("Failed at interpreter invocation"); |
| return 0; |
| } |
| |
| if (!extract_output_flow(interpreter->output_tensor(output_tensor_index), |
| flow_x, flow_y, width, height, flow_stride)) { |
| fprintf(stderr, "Could not extract output flow tensor.\n"); |
| return 0; |
| } |
| // IMPORTANT: release the interpreter before destroying the delegate. |
| interpreter.reset(); |
| if (xnnpack_delegate) TfLiteXNNPackDelegateDelete(xnnpack_delegate); |
| return 1; |
| } |
| |
| int main(int argc, char *argv[]) { |
| int ywidth, yheight; |
| |
| if (argc < 6) { |
| printf("Not enough arguments\n"); |
| usage_and_exit(argv[0]); |
| } |
| if (!strcmp(argv[1], "-help") || !strcmp(argv[1], "-h") || |
| !strcmp(argv[1], "--help") || !strcmp(argv[1], "--h")) |
| usage_and_exit(argv[0]); |
| |
| char *y4m_input = argv[1]; |
| char *flow_x_output = argv[4]; |
| char *flow_y_output = argv[5]; |
| |
| char hdr[Y4M_HDR_MAX_LEN]; |
| int nhdrwords; |
| char *hdrwords[Y4M_HDR_MAX_WORDS]; |
| FILE *fin = fopen(y4m_input, "rb"); |
| if (!fgets(hdr, sizeof(hdr), fin)) { |
| printf("Invalid y4m file %s\n", y4m_input); |
| usage_and_exit(argv[0]); |
| } |
| nhdrwords = split_words(hdr, ' ', Y4M_HDR_MAX_WORDS, hdrwords); |
| |
| int subx, suby; |
| int bitdepth; |
| if (!parse_info(hdrwords, nhdrwords, &ywidth, &yheight, &bitdepth, &suby, |
| &subx)) { |
| printf("Could not parse header from %s\n", y4m_input); |
| usage_and_exit(argv[0]); |
| } |
| const int bytes_per_pel = (bitdepth + 7) / 8; |
| int src_frame = atoi(argv[2]); |
| int ref_frame = atoi(argv[3]); |
| |
| const int uvwidth = subx ? (ywidth + 1) >> 1 : ywidth; |
| const int uvheight = suby ? (yheight + 1) >> 1 : yheight; |
| const int ysize = ywidth * yheight; |
| const int uvsize = uvwidth * uvheight; |
| |
| uint8_t *src_inbuf = |
| (uint8_t *)malloc(ysize * bytes_per_pel * sizeof(uint8_t)); |
| uint8_t *ref_inbuf = |
| (uint8_t *)malloc(ysize * bytes_per_pel * sizeof(uint8_t)); |
| uint8_t *rec_outbuf = |
| (uint8_t *)malloc(ysize * bytes_per_pel * sizeof(uint8_t)); |
| |
| char frametag[] = "FRAME\n"; |
| const long after_hdr_pos = ftell(fin); |
| const int src_offset = src_frame * ((ysize + 2 * uvsize) * bytes_per_pel + 6); |
| const int ref_offset = ref_frame * ((ysize + 2 * uvsize) * bytes_per_pel + 6); |
| |
| char intag[8]; |
| |
| fseek(fin, after_hdr_pos + src_offset, SEEK_SET); |
| if (fread(intag, 6, 1, fin) != 1) { |
| fprintf(stderr, "FRAME not found for src frame in %s\n", y4m_input); |
| exit(1); |
| } |
| intag[6] = 0; |
| if (strcmp(intag, frametag)) { |
| fprintf(stderr, "could not read src frame from %s\n", y4m_input); |
| exit(1); |
| } |
| if (fread(src_inbuf, ysize * bytes_per_pel, 1, fin) != 1) { |
| fprintf(stderr, "could not read src frame from %s\n", y4m_input); |
| exit(1); |
| } |
| |
| fseek(fin, after_hdr_pos + ref_offset, SEEK_SET); |
| if (fread(intag, 6, 1, fin) != 1) { |
| fprintf(stderr, "FRAME not found for ref frame in %s\n", y4m_input); |
| exit(1); |
| } |
| intag[6] = 0; |
| if (strcmp(intag, frametag)) { |
| fprintf(stderr, "could not read ref frame from %s\n", y4m_input); |
| exit(1); |
| } |
| if (fread(ref_inbuf, ysize * bytes_per_pel, 1, fin) != 1) { |
| fprintf(stderr, "could not read ref frame from %s\n", y4m_input); |
| exit(1); |
| } |
| fclose(fin); |
| |
| double *flow_x = (double *)malloc(ysize * sizeof(*flow_x)); |
| double *flow_y = (double *)malloc(ysize * sizeof(*flow_y)); |
| const int flow_stride = ywidth; |
| int ret; |
| if (bytes_per_pel == 2) { |
| ret = deep_flow_img_tflite_highbd((uint16_t *)src_inbuf, ywidth, yheight, |
| ywidth, (uint16_t *)ref_inbuf, ywidth, |
| flow_x, flow_y, flow_stride, bitdepth); |
| |
| } else { |
| ret = deep_flow_img_tflite_lowbd(src_inbuf, ywidth, yheight, ywidth, |
| ref_inbuf, ywidth, flow_x, flow_y, |
| flow_stride); |
| } |
| |
| if (ret) { |
| if (bytes_per_pel == 2) { |
| bilinear_interp_highbd((uint16_t *)rec_outbuf, ywidth, |
| (uint16_t *)ref_inbuf, ywidth, flow_x, flow_y, |
| flow_stride, ywidth, yheight, bitdepth); |
| // Warp mse: mse after warping with flow field generated |
| const double mse_warp = |
| compute_mse_highbd((uint16_t *)src_inbuf, ywidth, |
| (uint16_t *)rec_outbuf, ywidth, ywidth, yheight); |
| // Diff mse: mse with frame difference for comparison |
| const double mse_diff = |
| compute_mse_highbd((uint16_t *)src_inbuf, ywidth, |
| (uint16_t *)ref_inbuf, ywidth, ywidth, yheight); |
| fprintf(stdout, "Flow compute SUCCESS: Mse %f / %f\n", mse_warp, |
| mse_diff); |
| } else { |
| bilinear_interp_lowbd(rec_outbuf, ywidth, ref_inbuf, ywidth, flow_x, |
| flow_y, flow_stride, ywidth, yheight); |
| // Warp mse: mse after warping with flow field generated |
| const double mse_warp = compute_mse_lowbd(src_inbuf, ywidth, rec_outbuf, |
| ywidth, ywidth, yheight); |
| // Diff mse: mse with frame difference for comparison |
| const double mse_diff = compute_mse_lowbd(src_inbuf, ywidth, ref_inbuf, |
| ywidth, ywidth, yheight); |
| fprintf(stdout, "Flow compute SUCCESS: Mse %f / %f\n", mse_warp, |
| mse_diff); |
| } |
| FILE *fout = fopen(flow_x_output, "wb"); |
| fwrite(flow_x, sizeof(*flow_x), ysize, fout); |
| fclose(fout); |
| fout = fopen(flow_y_output, "wb"); |
| fwrite(flow_y, sizeof(*flow_y), ysize, fout); |
| fclose(fout); |
| } else { |
| fprintf(stderr, "Flow compute FAILED\n"); |
| } |
| |
| free(src_inbuf); |
| free(ref_inbuf); |
| free(rec_outbuf); |
| free(flow_x); |
| free(flow_y); |
| |
| return EXIT_SUCCESS; |
| } |