Use aom_sse instead of aom_mse to compute SSE

Use aom_sse and aom_highbd_sse instead of aom_mse16x16 and
aom_highbd_8_mse16x16 respectively to compute SSE for PSNR
calculations. This solves an issue[1] whereby aom_highbd_8_mse16x16
was being used to calculate SSE for 10- and 12-bit input.

[1] https://aomedia-review.googlesource.com/c/aom/+/174142/comments/9c28b3a2_0b2b9ce0

Change-Id: Id2640f98d6811d53c3c7d5320d6e078f83bf09d1
diff --git a/aom_dsp/psnr.c b/aom_dsp/psnr.c
index 08fb69c..f71590c 100644
--- a/aom_dsp/psnr.c
+++ b/aom_dsp/psnr.c
@@ -44,9 +44,9 @@
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
-static int64_t encoder_highbd_8_sse(const uint8_t *a8, int a_stride,
-                                    const uint8_t *b8, int b_stride, int w,
-                                    int h) {
+static int64_t encoder_highbd_sse(const uint8_t *a8, int a_stride,
+                                  const uint8_t *b8, int b_stride, int w,
+                                  int h) {
   const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
   const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
   int64_t sse = 0;
@@ -84,10 +84,8 @@
   for (y = 0; y < height / 16; ++y) {
     const uint8_t *pa = a;
     const uint8_t *pb = b;
-    unsigned int sse;
     for (x = 0; x < width / 16; ++x) {
-      aom_mse16x16(pa, a_stride, pb, b_stride, &sse);
-      total_sse += sse;
+      total_sse += aom_sse(pa, a_stride, pb, b_stride, 16, 16);
 
       pa += 16;
       pb += 16;
@@ -128,22 +126,20 @@
   const int dh = height % 16;
 
   if (dw > 0) {
-    total_sse += encoder_highbd_8_sse(&a[width - dw], a_stride, &b[width - dw],
-                                      b_stride, dw, height);
+    total_sse += encoder_highbd_sse(&a[width - dw], a_stride, &b[width - dw],
+                                    b_stride, dw, height);
   }
   if (dh > 0) {
-    total_sse += encoder_highbd_8_sse(&a[(height - dh) * a_stride], a_stride,
-                                      &b[(height - dh) * b_stride], b_stride,
-                                      width - dw, dh);
+    total_sse += encoder_highbd_sse(&a[(height - dh) * a_stride], a_stride,
+                                    &b[(height - dh) * b_stride], b_stride,
+                                    width - dw, dh);
   }
 
   for (y = 0; y < height / 16; ++y) {
     const uint8_t *pa = a;
     const uint8_t *pb = b;
-    unsigned int sse;
     for (x = 0; x < width / 16; ++x) {
-      aom_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
-      total_sse += sse;
+      total_sse += aom_highbd_sse(pa, a_stride, pb, b_stride, 16, 16);
       pa += 16;
       pb += 16;
     }