Merge "Cache loop filter errors."
diff --git a/examples.mk b/examples.mk
index 2337d1e..a10ee52 100644
--- a/examples.mk
+++ b/examples.mk
@@ -64,6 +64,10 @@
vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
+ifeq ($(CONFIG_SHARED),no)
+UTILS-$(CONFIG_VP9_ENCODER) += resize_util.c
+endif
+
# XMA example disabled for now, not used in VP8
#UTILS-$(CONFIG_DECODERS) += example_xma.c
#example_xma.GUID = A955FC4A-73F1-44F7-135E-30D84D32F022
diff --git a/resize_util.c b/resize_util.c
new file mode 100644
index 0000000..b068f55
--- /dev/null
+++ b/resize_util.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "./vp9/encoder/vp9_resize.h"
+
+static void usage(char *progname) {
+ printf("Usage:\n");
+ printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
+ progname);
+ printf("<output_yuv> [<frames>]\n");
+}
+
+static int parse_dim(char *v, int *width, int *height) {
+ char *x = strchr(v, 'x');
+ if (x == NULL)
+ x = strchr(v, 'X');
+ if (x == NULL)
+ return 0;
+ *width = atoi(v);
+ *height = atoi(&x[1]);
+ if (*width <= 0 || *height <= 0)
+ return 0;
+ else
+ return 1;
+}
+
+int main(int argc, char *argv[]) {
+ char *fin, *fout;
+ FILE *fpin, *fpout;
+ uint8_t *inbuf, *outbuf;
+ uint8_t *inbuf_u, *outbuf_u;
+ uint8_t *inbuf_v, *outbuf_v;
+ int f, frames;
+ int width, height, target_width, target_height;
+
+ if (argc < 5) {
+ printf("Incorrect parameters:\n");
+ usage(argv[0]);
+ return 1;
+ }
+
+ fin = argv[1];
+ fout = argv[4];
+ if (!parse_dim(argv[2], &width, &height)) {
+ printf("Incorrect parameters: %s\n", argv[2]);
+ usage(argv[0]);
+ return 1;
+ }
+ if (!parse_dim(argv[3], &target_width, &target_height)) {
+ printf("Incorrect parameters: %s\n", argv[3]);
+ usage(argv[0]);
+ return 1;
+ }
+
+ fpin = fopen(fin, "rb");
+ if (fpin == NULL) {
+ printf("Can't open file %s to read\n", fin);
+ usage(argv[0]);
+ return 1;
+ }
+ fpout = fopen(fout, "wb");
+ if (fpout == NULL) {
+ printf("Can't open file %s to write\n", fout);
+ usage(argv[0]);
+ return 1;
+ }
+ if (argc >= 6)
+ frames = atoi(argv[5]);
+ else
+ frames = INT_MAX;
+
+ printf("Input size: %dx%d\n",
+ width, height);
+ printf("Target size: %dx%d, Frames: ",
+ target_width, target_height);
+ if (frames == INT_MAX)
+ printf("All\n");
+ else
+ printf("%d\n", frames);
+
+ inbuf = (uint8_t*)malloc(width * height * 3 / 2);
+ outbuf = (uint8_t*)malloc(target_width * target_height * 3 / 2);
+ inbuf_u = inbuf + width * height;
+ inbuf_v = inbuf_u + width * height / 4;
+ outbuf_u = outbuf + target_width * target_height;
+ outbuf_v = outbuf_u + target_width * target_height / 4;
+ f = 0;
+ while (f < frames) {
+ if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1)
+ break;
+ vp9_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2,
+ height, width,
+ outbuf, target_width, outbuf_u, outbuf_v,
+ target_width / 2,
+ target_height, target_width);
+ fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout);
+ f++;
+ }
+ printf("%d frames processed\n", f);
+ fclose(fpin);
+ fclose(fpout);
+
+ free(inbuf);
+ free(outbuf);
+ return 0;
+}
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 65d087a..baa943b 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -347,29 +347,32 @@
ldrb r12, [r12]
vdup.u8 d0, r12
+ ; preload 8 left
+ vld1.8 d30, [r3]
+
; Load above 8 pixels
vld1.64 {d2}, [r2]
+ vmovl.u8 q10, d30
+
; Compute above - ytop_left
vsubl.u8 q3, d2, d0
; Load left row by row and compute left + (above - ytop_left)
- vld1.u8 {d6}, [r3]
-
; 1st row and 2nd row
- vdup.u8 d0, d6[0]
- vdup.u8 d1, d6[1]
- vaddw.s16 q1, q3, d0
- vaddw.s16 q2, q3, d1
+ vdup.16 q0, d20[0]
+ vdup.16 q1, d20[1]
+ vadd.s16 q0, q3, q0
+ vadd.s16 q1, q3, q1
; 3rd row and 4th row
- vdup.u8 d0, d6[2]
- vdup.u8 d1, d6[3]
- vaddw.s16 q8, q3, d0
- vaddw.s16 q9, q3, d1
+ vdup.16 q8, d20[2]
+ vdup.16 q9, d20[3]
+ vadd.s16 q8, q3, q8
+ vadd.s16 q9, q3, q9
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
+ vqshrun.s16 d0, q0, #0
+ vqshrun.s16 d1, q1, #0
vqshrun.s16 d2, q8, #0
vqshrun.s16 d3, q9, #0
@@ -379,19 +382,19 @@
vst1.64 {d3}, [r0], r1
; 5th row and 6th row
- vdup.u8 d0, d6[4]
- vdup.u8 d1, d6[5]
- vaddw.s16 q1, q3, d0
- vaddw.s16 q2, q3, d1
+ vdup.16 q0, d21[0]
+ vdup.16 q1, d21[1]
+ vadd.s16 q0, q3, q0
+ vadd.s16 q1, q3, q1
- ; 7rd row and 8th row
- vdup.u8 d0, d6[6]
- vdup.u8 d1, d6[7]
- vaddw.s16 q8, q3, d0
- vaddw.s16 q9, q3, d1
+ ; 7th row and 8th row
+ vdup.16 q8, d21[2]
+ vdup.16 q9, d21[3]
+ vadd.s16 q8, q3, q8
+ vadd.s16 q9, q3, q9
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
+ vqshrun.s16 d0, q0, #0
+ vqshrun.s16 d1, q1, #0
vqshrun.s16 d2, q8, #0
vqshrun.s16 d3, q9, #0
diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c
index f15abc0..0766b51 100644
--- a/vp9/encoder/vp9_resize.c
+++ b/vp9/encoder/vp9_resize.c
@@ -16,7 +16,6 @@
#include <string.h>
#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_resize.h"
-#include "vpx/vpx_integer.h"
#define FILTER_BITS 7
@@ -30,8 +29,44 @@
typedef int16_t interp_kernel[INTERP_TAPS];
-// Filters for interpolation - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = {
+// Filters for interpolation (0.5-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
+ {-3, 0, 35, 64, 35, 0, -3, 0},
+ {-3, -1, 34, 64, 36, 1, -3, 0},
+ {-3, -1, 32, 64, 38, 1, -3, 0},
+ {-2, -2, 31, 63, 39, 2, -3, 0},
+ {-2, -2, 29, 63, 41, 2, -3, 0},
+ {-2, -2, 28, 63, 42, 3, -4, 0},
+ {-2, -3, 27, 63, 43, 4, -4, 0},
+ {-2, -3, 25, 62, 45, 5, -4, 0},
+ {-2, -3, 24, 62, 46, 5, -4, 0},
+ {-2, -3, 23, 61, 47, 6, -4, 0},
+ {-2, -3, 21, 60, 49, 7, -4, 0},
+ {-1, -4, 20, 60, 50, 8, -4, -1},
+ {-1, -4, 19, 59, 51, 9, -4, -1},
+ {-1, -4, 17, 58, 52, 10, -4, 0},
+ {-1, -4, 16, 57, 53, 12, -4, -1},
+ {-1, -4, 15, 56, 54, 13, -4, -1},
+ {-1, -4, 14, 55, 55, 14, -4, -1},
+ {-1, -4, 13, 54, 56, 15, -4, -1},
+ {-1, -4, 12, 53, 57, 16, -4, -1},
+ {0, -4, 10, 52, 58, 17, -4, -1},
+ {-1, -4, 9, 51, 59, 19, -4, -1},
+ {-1, -4, 8, 50, 60, 20, -4, -1},
+ {0, -4, 7, 49, 60, 21, -3, -2},
+ {0, -4, 6, 47, 61, 23, -3, -2},
+ {0, -4, 5, 46, 62, 24, -3, -2},
+ {0, -4, 5, 45, 62, 25, -3, -2},
+ {0, -4, 4, 43, 63, 27, -3, -2},
+ {0, -4, 3, 42, 63, 28, -2, -2},
+ {0, -3, 2, 41, 63, 29, -2, -2},
+ {0, -3, 2, 39, 63, 31, -2, -2},
+ {0, -3, 1, 38, 64, 32, -1, -3},
+ {0, -3, 1, 36, 64, 34, -1, -3}
+};
+
+// Filters for interpolation (0.625-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
{-1, -8, 33, 80, 33, -8, -1, 0},
{-1, -8, 30, 80, 35, -8, -1, 1},
{-1, -8, 28, 80, 37, -7, -2, 1},
@@ -66,10 +101,132 @@
{1, -1, -8, 35, 80, 30, -8, -1},
};
+// Filters for interpolation (0.75-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
+ {2, -11, 25, 96, 25, -11, 2, 0},
+ {2, -11, 22, 96, 28, -11, 2, 0},
+ {2, -10, 19, 95, 31, -11, 2, 0},
+ {2, -10, 17, 95, 34, -12, 2, 0},
+ {2, -9, 14, 94, 37, -12, 2, 0},
+ {2, -8, 12, 93, 40, -12, 1, 0},
+ {2, -8, 9, 92, 43, -12, 1, 1},
+ {2, -7, 7, 91, 46, -12, 1, 0},
+ {2, -7, 5, 90, 49, -12, 1, 0},
+ {2, -6, 3, 88, 52, -12, 0, 1},
+ {2, -5, 1, 86, 55, -12, 0, 1},
+ {2, -5, -1, 84, 58, -11, 0, 1},
+ {2, -4, -2, 82, 61, -11, -1, 1},
+ {2, -4, -4, 80, 64, -10, -1, 1},
+ {1, -3, -5, 77, 67, -9, -1, 1},
+ {1, -3, -6, 75, 70, -8, -2, 1},
+ {1, -2, -7, 72, 72, -7, -2, 1},
+ {1, -2, -8, 70, 75, -6, -3, 1},
+ {1, -1, -9, 67, 77, -5, -3, 1},
+ {1, -1, -10, 64, 80, -4, -4, 2},
+ {1, -1, -11, 61, 82, -2, -4, 2},
+ {1, 0, -11, 58, 84, -1, -5, 2},
+ {1, 0, -12, 55, 86, 1, -5, 2},
+ {1, 0, -12, 52, 88, 3, -6, 2},
+ {0, 1, -12, 49, 90, 5, -7, 2},
+ {0, 1, -12, 46, 91, 7, -7, 2},
+ {1, 1, -12, 43, 92, 9, -8, 2},
+ {0, 1, -12, 40, 93, 12, -8, 2},
+ {0, 2, -12, 37, 94, 14, -9, 2},
+ {0, 2, -12, 34, 95, 17, -10, 2},
+ {0, 2, -11, 31, 95, 19, -10, 2},
+ {0, 2, -11, 28, 96, 22, -11, 2}
+};
+
+// Filters for interpolation (0.875-band) - note this also filters integer pels.
+const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
+ {3, -8, 13, 112, 13, -8, 3, 0},
+ {3, -7, 10, 112, 17, -9, 3, -1},
+ {2, -6, 7, 111, 21, -9, 3, -1},
+ {2, -5, 4, 111, 24, -10, 3, -1},
+ {2, -4, 1, 110, 28, -11, 3, -1},
+ {1, -3, -1, 108, 32, -12, 4, -1},
+ {1, -2, -3, 106, 36, -13, 4, -1},
+ {1, -1, -6, 105, 40, -14, 4, -1},
+ {1, -1, -7, 102, 44, -14, 4, -1},
+ {1, 0, -9, 100, 48, -15, 4, -1},
+ {1, 1, -11, 97, 53, -16, 4, -1},
+ {0, 1, -12, 95, 57, -16, 4, -1},
+ {0, 2, -13, 91, 61, -16, 4, -1},
+ {0, 2, -14, 88, 65, -16, 4, -1},
+ {0, 3, -15, 84, 69, -17, 4, 0},
+ {0, 3, -16, 81, 73, -16, 3, 0},
+ {0, 3, -16, 77, 77, -16, 3, 0},
+ {0, 3, -16, 73, 81, -16, 3, 0},
+ {0, 4, -17, 69, 84, -15, 3, 0},
+ {-1, 4, -16, 65, 88, -14, 2, 0},
+ {-1, 4, -16, 61, 91, -13, 2, 0},
+ {-1, 4, -16, 57, 95, -12, 1, 0},
+ {-1, 4, -16, 53, 97, -11, 1, 1},
+ {-1, 4, -15, 48, 100, -9, 0, 1},
+ {-1, 4, -14, 44, 102, -7, -1, 1},
+ {-1, 4, -14, 40, 105, -6, -1, 1},
+ {-1, 4, -13, 36, 106, -3, -2, 1},
+ {-1, 4, -12, 32, 108, -1, -3, 1},
+ {-1, 3, -11, 28, 110, 1, -4, 2},
+ {-1, 3, -10, 24, 111, 4, -5, 2},
+ {-1, 3, -9, 21, 111, 7, -6, 2},
+ {-1, 3, -9, 17, 112, 10, -7, 3}
+};
+
+// Filters for interpolation (full-band) - no filtering for integer pixels
+const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {0, 1, -3, 128, 3, -1, 0, 0},
+ {-1, 2, -6, 127, 7, -2, 1, 0},
+ {-1, 3, -9, 126, 12, -4, 1, 0},
+ {-1, 4, -12, 125, 16, -5, 1, 0},
+ {-1, 4, -14, 123, 20, -6, 2, 0},
+ {-1, 5, -15, 120, 25, -8, 2, 0},
+ {-1, 5, -17, 118, 30, -9, 3, -1},
+ {-1, 6, -18, 114, 35, -10, 3, -1},
+ {-1, 6, -19, 111, 41, -12, 3, -1},
+ {-1, 6, -20, 107, 46, -13, 4, -1},
+ {-1, 6, -21, 103, 52, -14, 4, -1},
+ {-1, 6, -21, 99, 57, -16, 5, -1},
+ {-1, 6, -21, 94, 63, -17, 5, -1},
+ {-1, 6, -20, 89, 68, -18, 5, -1},
+ {-1, 6, -20, 84, 73, -19, 6, -1},
+ {-1, 6, -20, 79, 79, -20, 6, -1},
+ {-1, 6, -19, 73, 84, -20, 6, -1},
+ {-1, 5, -18, 68, 89, -20, 6, -1},
+ {-1, 5, -17, 63, 94, -21, 6, -1},
+ {-1, 5, -16, 57, 99, -21, 6, -1},
+ {-1, 4, -14, 52, 103, -21, 6, -1},
+ {-1, 4, -13, 46, 107, -20, 6, -1},
+ {-1, 3, -12, 41, 111, -19, 6, -1},
+ {-1, 3, -10, 35, 114, -18, 6, -1},
+ {-1, 3, -9, 30, 118, -17, 5, -1},
+ {0, 2, -8, 25, 120, -15, 5, -1},
+ {0, 2, -6, 20, 123, -14, 4, -1},
+ {0, 1, -5, 16, 125, -12, 4, -1},
+ {0, 1, -4, 12, 126, -9, 3, -1},
+ {0, 1, -2, 7, 127, -6, 2, -1},
+ {0, 0, -1, 3, 128, -3, 1, 0}
+};
+
// Filters for factor of 2 downsampling.
static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1};
static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
+static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
+ int outlength16 = outlength * 16;
+ if (outlength16 >= inlength * 16)
+ return vp9_filteredinterp_filters1000;
+ else if (outlength16 >= inlength * 13)
+ return vp9_filteredinterp_filters875;
+ else if (outlength16 >= inlength * 11)
+ return vp9_filteredinterp_filters750;
+ else if (outlength16 >= inlength * 9)
+ return vp9_filteredinterp_filters625;
+ else
+ return vp9_filteredinterp_filters500;
+}
+
static void interpolate(const uint8_t *const input, int inlength,
uint8_t *output, int outlength) {
const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) /
@@ -81,6 +238,9 @@
int x, x1, x2, sum, k, int_pel, sub_pel;
int64_t y;
+ const interp_kernel *interp_filters =
+ choose_interp_filter(inlength, outlength);
+
x = 0;
y = offset;
while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
@@ -101,7 +261,7 @@
const int16_t *filter;
int_pel = y >> INTERP_PRECISION_BITS;
sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
- filter = vp9_filteredinterp_filters[sub_pel];
+ filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k) {
const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
@@ -116,7 +276,7 @@
const int16_t *filter;
int_pel = y >> INTERP_PRECISION_BITS;
sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
- filter = vp9_filteredinterp_filters[sub_pel];
+ filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
@@ -129,7 +289,7 @@
const int16_t *filter;
int_pel = y >> INTERP_PRECISION_BITS;
sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
- filter = vp9_filteredinterp_filters[sub_pel];
+ filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
@@ -140,7 +300,7 @@
const int16_t *filter;
int_pel = y >> INTERP_PRECISION_BITS;
sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
- filter = vp9_filteredinterp_filters[sub_pel];
+ filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
diff --git a/vp9/encoder/vp9_resize.h b/vp9/encoder/vp9_resize.h
index c67595a..1818cd4 100644
--- a/vp9/encoder/vp9_resize.h
+++ b/vp9/encoder/vp9_resize.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -12,6 +12,7 @@
#define VP9_ENCODER_VP9_RESIZE_H_
#include <stdio.h>
+#include "vpx/vpx_integer.h"
void vp9_resize_plane(const uint8_t *const input,
int height,