Add high bit-depth edge detection

Change-Id: I7277df0d78d2ad68307ebd7aa0cf4da92ecfc552
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 4347f09..673f4a8 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -12583,8 +12583,36 @@
   }
 }
 
-static INLINE uint8_t get_pix(const uint8_t *src, int stride, int i, int j) {
-  return src[i + stride * j];
+/* Use standard 3x3 Sobel matrix. Macro so it can be used for either high or
+   low bit-depth arrays. */
+#define SOBEL_X(src, stride, i, j)                       \
+  ((src)[((i)-1) + (stride) * ((j)-1)] -                 \
+   (src)[((i) + 1) + (stride) * ((j)-1)] +  /* NOLINT */ \
+   2 * (src)[((i)-1) + (stride) * (j)] -    /* NOLINT */ \
+   2 * (src)[((i) + 1) + (stride) * (j)] +  /* NOLINT */ \
+   (src)[((i)-1) + (stride) * ((j) + 1)] -  /* NOLINT */ \
+   (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
+#define SOBEL_Y(src, stride, i, j)                       \
+  ((src)[((i)-1) + (stride) * ((j)-1)] +                 \
+   2 * (src)[(i) + (stride) * ((j)-1)] +    /* NOLINT */ \
+   (src)[((i) + 1) + (stride) * ((j)-1)] -  /* NOLINT */ \
+   (src)[((i)-1) + (stride) * ((j) + 1)] -  /* NOLINT */ \
+   2 * (src)[(i) + (stride) * ((j) + 1)] -  /* NOLINT */ \
+   (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
+
+sobel_xy sobel(const uint8_t *input, int stride, int i, int j, int bd) {
+  int16_t s_x;
+  int16_t s_y;
+  if (bd <= 8) {
+    s_x = SOBEL_X(input, stride, i, j);
+    s_y = SOBEL_Y(input, stride, i, j);
+  } else {
+    const uint16_t *src = CONVERT_TO_SHORTPTR(input);
+    s_x = SOBEL_X(src, stride, i, j);
+    s_y = SOBEL_Y(src, stride, i, j);
+  }
+  sobel_xy r = { .x = s_x, .y = s_y };
+  return r;
 }
 
 // 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128,
@@ -12593,8 +12621,8 @@
                                                                30, 12, 2,  0 };
 
 void gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
-                   uint8_t *dst) {
-  ConvolveParams conv_params = get_conv_params(0, 0, 0);
+                   uint8_t *dst, int bd) {
+  ConvolveParams conv_params = get_conv_params(0, 0, bd);
   InterpFilterParams filter = { .filter_ptr = gauss_filter,
                                 .taps = 8,
                                 .subpel_shifts = 0,
@@ -12604,29 +12632,17 @@
   assert(w % 8 == 0);
   // Because we use an eight tap filter, the stride should be at least 7 + w.
   assert(src_stride >= w + 7);
-  av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
-                     &conv_params);
+  if (bd <= 8) {
+    av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
+                       &conv_params);
+  } else {
+    av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride,
+                              CONVERT_TO_SHORTPTR(dst), w, w, h, &filter,
+                              &filter, 0, 0, &conv_params, bd);
+  }
 }
 
-/* Use standard 3x3 Sobel matrix. */
-sobel_xy sobel(const uint8_t *input, int stride, int i, int j) {
-  const int16_t s_x = get_pix(input, stride, i - 1, j - 1) -
-                      get_pix(input, stride, i + 1, j - 1) +
-                      2 * get_pix(input, stride, i - 1, j) -
-                      2 * get_pix(input, stride, i + 1, j) +
-                      get_pix(input, stride, i - 1, j + 1) -
-                      get_pix(input, stride, i + 1, j + 1);
-  const int16_t s_y = get_pix(input, stride, i - 1, j - 1) +
-                      2 * get_pix(input, stride, i, j - 1) +
-                      get_pix(input, stride, i + 1, j - 1) -
-                      get_pix(input, stride, i - 1, j + 1) -
-                      2 * get_pix(input, stride, i, j + 1) -
-                      get_pix(input, stride, i + 1, j + 1);
-  sobel_xy r = { .x = s_x, .y = s_y };
-  return r;
-}
-
-static uint16_t edge_probability(const uint8_t *input, int w, int h) {
+static uint16_t edge_probability(const uint8_t *input, int w, int h, int bd) {
   // The probability of an edge in the whole image is the same as the highest
   // probability of an edge for any individual pixel. Use Sobel as the metric
   // for finding an edge.
@@ -12634,8 +12650,11 @@
   // Ignore the 1 pixel border around the image for the computation.
   for (int j = 1; j < h - 1; ++j) {
     for (int i = 1; i < w - 1; ++i) {
-      sobel_xy g = sobel(input, w, i, j);
-      uint16_t magnitude = (uint16_t)sqrt(g.x * g.x + g.y * g.y);
+      sobel_xy g = sobel(input, w, i, j, bd);
+      // Scale down to 8-bit to get same output regardless of bit depth.
+      int16_t g_x = g.x >> (bd - 8);
+      int16_t g_y = g.y >> (bd - 8);
+      uint16_t magnitude = (uint16_t)sqrt(g_x * g_x + g_y * g_y);
       highest = AOMMAX(highest, magnitude);
     }
   }
@@ -12645,18 +12664,27 @@
 /* Uses most of the Canny edge detection algorithm to find if there are any
  * edges in the image.
  */
-uint16_t av1_edge_exists(const uint8_t *src, int src_stride, int w, int h) {
+uint16_t av1_edge_exists(const uint8_t *src, int src_stride, int w, int h,
+                         int bd) {
   if (w < 3 || h < 3) {
     return 0;
   }
-  uint8_t *blurred = NULL;
-  blurred = (uint8_t *)aom_memalign(32, sizeof(*blurred) * w * h);
-  gaussian_blur(src, src_stride, w, h, blurred);
+  uint8_t *blurred;
+  if (bd <= 8) {
+    blurred = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * w * h);
+  } else {
+    blurred = CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * w * h));
+  }
+  gaussian_blur(src, src_stride, w, h, blurred, bd);
   // Skip the non-maximum suppression step in Canny edge detection. We just
   // want a probability of an edge existing in the buffer, which is determined
   // by the strongest edge in it -- we don't need to eliminate the weaker
   // edges. Use Sobel for the edge detection.
-  uint16_t prob = edge_probability(blurred, w, h);
-  aom_free(blurred);
+  uint16_t prob = edge_probability(blurred, w, h, bd);
+  if (bd <= 8) {
+    aom_free(blurred);
+  } else {
+    aom_free(CONVERT_TO_SHORTPTR(blurred));
+  }
   return prob;
 }
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index f0b0b35..65a6e36 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -129,15 +129,16 @@
 /** Returns an integer indicating the strength of the edge.
  * 0 means no edge found, 556 is the strength of a solid black/white edge,
  * and the number may range higher if the signal is even stronger (e.g., on a
- * corner).
+ * corner). bd is the bit depth.
  */
-uint16_t av1_edge_exists(const uint8_t *src, int src_stride, int w, int h);
+uint16_t av1_edge_exists(const uint8_t *src, int src_stride, int w, int h,
+                         int bd);
 
 /** Applies a Gaussian blur with sigma = 1.3. Used by av1_edge_exists and
  * tests.
  */
 void gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
-                   uint8_t *dst);
+                   uint8_t *dst, int bd);
 
 /* Applies standard 3x3 Sobel matrix. */
 typedef struct {
@@ -145,7 +146,7 @@
   int16_t y;
 } sobel_xy;
 
-sobel_xy sobel(const uint8_t *input, int stride, int i, int j);
+sobel_xy sobel(const uint8_t *input, int stride, int i, int j, int bd);
 
 #if CONFIG_COLLECT_INTER_MODE_RD_STATS
 void av1_inter_mode_data_init(struct TileDataEnc *tile_data);
diff --git a/test/edge_detect_test.cc b/test/edge_detect_test.cc
index 272da62..3c8edf0 100644
--- a/test/edge_detect_test.cc
+++ b/test/edge_detect_test.cc
@@ -11,6 +11,7 @@
 
 #include "aom_mem/aom_mem.h"
 #include "av1/encoder/rdopt.h"
+#include "test/util.h"
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
 namespace {
@@ -18,15 +19,34 @@
 using ::testing::get;
 using ::testing::tuple;
 
-class EdgeDetectBrightnessTest :
-    // Parameters are (brightness, width, height).
-    public ::testing::TestWithParam<tuple<int, int, int> > {};
-
-/** Get the (x, y) value from the input; if i or j is outside of the width
+/** Get the (i, j) value from the input; if i or j is outside of the width
  * or height, the nearest pixel value is returned.
  */
-static uint8_t get_xy(const uint8_t *data, int w, int h, int i, int j) {
-  return data[AOMMAX(AOMMIN(i, w - 1), 0) + w * AOMMAX(AOMMIN(j, h - 1), 0)];
+
+static int get_nearest_pix(const uint8_t *buf, int w, int h, int i, int j,
+                           int bd) {
+  int offset = AOMMAX(AOMMIN(i, w - 1), 0) + w * AOMMAX(AOMMIN(j, h - 1), 0);
+  if (bd <= 8) {
+    return buf[offset];
+  } else {
+    return *CONVERT_TO_SHORTPTR(buf + offset);
+  }
+}
+
+static int get_pix(uint8_t *buf, int i, int bd) {
+  if (bd <= 8) {
+    return buf[i];
+  } else {
+    return *CONVERT_TO_SHORTPTR(buf + i);
+  }
+}
+
+static void set_pix(uint8_t *buf, int i, int v, int bd) {
+  if (bd <= 8) {
+    buf[i] = v;
+  } else {
+    *CONVERT_TO_SHORTPTR(buf + i) = v;
+  }
 }
 
 /** Given the image data, creates a new image with padded values, so an
@@ -34,7 +54,7 @@
  * value in the image. Returns a pointer to the start of the image in the
  * padded data. Must be freed with free_pad_8tap.
  */
-uint8_t *pad_8tap_convolve(const uint8_t *data, int w, int h) {
+static uint8_t *pad_8tap_convolve(const uint8_t *data, int w, int h, int bd) {
   // SIMD optimizations require the width to be a multiple of 8 and the height
   // to be multiples of 4.
   assert(w % 8 == 0);
@@ -43,112 +63,284 @@
   // and 4 lines on the right and bottom, for 7 extra lines.
   const int pad_w = w + 7;
   const int pad_h = h + 7;
-  uint8_t *dst = (uint8_t *)aom_memalign(32, pad_w * pad_h);
-  // Fill in the data from the original.
+
+  uint8_t *dst;
+  if (bd <= 8) {
+    dst = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * pad_w * pad_h);
+  } else {
+    dst =
+        CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * pad_w * pad_h));
+  }
+
   for (int j = 0; j < pad_h; ++j) {
     for (int i = 0; i < pad_w; ++i) {
-      dst[i + j * pad_w] = get_xy(data, w, h, i - 3, j - 3);
+      const int v = get_nearest_pix(data, w, h, i - 3, j - 3, bd);
+      if (bd <= 8) {
+        dst[i + j * pad_w] = v;
+      } else {
+        *CONVERT_TO_SHORTPTR(dst + i + j * pad_w) = v;
+      }
     }
   }
   return dst + (w + 7) * 3 + 3;
 }
 
-static void free_pad_8tap(uint8_t *padded, int width) {
-  aom_free(padded - (width + 7) * 3 - 3);
+static int stride_8tap(int width) { return width + 7; }
+
+static void free_pad_8tap(uint8_t *padded, int width, int bd) {
+  if (bd <= 8) {
+    aom_free(padded - (width + 7) * 3 - 3);
+  } else {
+    aom_free(CONVERT_TO_SHORTPTR(padded - (width + 7) * 3 - 3));
+  }
 }
 
-static int stride_8tap(int width) { return width + 7; }
+static uint8_t *malloc_bd(int num_entries, int bd) {
+  const int bytes_per_entry = bd <= 8 ? sizeof(uint8_t) : sizeof(uint16_t);
+
+  uint8_t *buf = (uint8_t *)aom_memalign(32, bytes_per_entry * num_entries);
+  if (bd <= 8) {
+    return buf;
+  } else {
+    return CONVERT_TO_BYTEPTR(buf);
+  }
+}
+
+static void free_bd(uint8_t *p, int bd) {
+  if (bd <= 8) {
+    aom_free(p);
+  } else {
+    aom_free(CONVERT_TO_SHORTPTR(p));
+  }
+}
+
+class EdgeDetectBrightnessTest :
+    // Parameters are (brightness, width, height, bit depth).
+    public ::testing::TestWithParam<tuple<int, int, int, int> > {
+ protected:
+  void SetUp() override {
+    // Allocate a (width by height) array of luma values in orig_.
+    // padded_ will be filled by the pad() call, which adds a border around
+    // the orig_. The output_ array has enough space for the computation.
+    const int width = GET_PARAM(1);
+    const int height = GET_PARAM(2);
+    const int bd = GET_PARAM(3);
+    orig_ = malloc_bd(width * height, bd);
+    padded_ = nullptr;
+    output_ = malloc_bd(width * height, bd);
+  }
+
+  void TearDown() override {
+    const int bd = GET_PARAM(3);
+    if (orig_ != nullptr) {
+      free_bd(orig_, bd);
+    }
+    if (padded_ != nullptr) {
+      const int width = GET_PARAM(1);
+      free_pad_8tap(padded_, width, bd);
+    }
+    free_bd(output_, bd);
+  }
+
+  void pad() {
+    const int width = GET_PARAM(1);
+    const int height = GET_PARAM(2);
+    const int bd = GET_PARAM(3);
+    padded_ = pad_8tap_convolve(orig_, width, height, bd);
+    // Get rid of the original buffer, it should not be used further.
+    free_bd(orig_, bd);
+    orig_ = nullptr;
+  }
+
+  uint8_t *orig_;
+  uint8_t *padded_;
+  uint8_t *output_;
+};
 
 TEST_P(EdgeDetectBrightnessTest, BlurUniformBrightness) {
   // For varying levels of brightness, the algorithm should
   // produce the same output.
-  const int brightness = get<0>(GetParam());
-  const int width = get<1>(GetParam());
-  const int height = get<2>(GetParam());
-  uint8_t *orig = (uint8_t *)malloc(width * height);
-  for (int i = 0; i < width * height; ++i) {
-    orig[i] = brightness;
+  const int brightness = GET_PARAM(0);
+  const int width = GET_PARAM(1);
+  const int height = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  // Skip the tests where brightness exceeds the bit-depth; we run into this
+  // issue because of gtest's limitation on valid combinations of test
+  // parameters.
+  if (brightness >= (1 << bd)) {
+    return;
   }
-  uint8_t *padded = pad_8tap_convolve(orig, width, height);
-  free(orig);
-  uint8_t *output = (uint8_t *)aom_memalign(32, width * height);
-  gaussian_blur(padded, stride_8tap(width), width, height, output);
   for (int i = 0; i < width * height; ++i) {
-    ASSERT_EQ(brightness, output[i]);
+    set_pix(orig_, i, brightness, bd);
   }
-  free_pad_8tap(padded, width);
-  aom_free(output);
+  pad();
+  gaussian_blur(padded_, stride_8tap(width), width, height, output_, bd);
+  for (int i = 0; i < width * height; ++i) {
+    ASSERT_EQ(brightness, get_pix(output_, i, bd));
+  }
 }
 
 // No edges on a uniformly bright image.
 TEST_P(EdgeDetectBrightnessTest, DetectUniformBrightness) {
-  const int brightness = get<0>(GetParam());
-  const int width = get<1>(GetParam());
-  const int height = get<2>(GetParam());
-  uint8_t *orig = (uint8_t *)malloc(width * height);
-  for (int i = 0; i < width * height; ++i) {
-    orig[i] = brightness;
+  const int brightness = GET_PARAM(0);
+  const int width = GET_PARAM(1);
+  const int height = GET_PARAM(2);
+  const int bd = GET_PARAM(3);
+  // Skip the tests where brightness exceeds the bit-depth; we run into this
+  // issue because of gtest's limitation on valid combinations of test
+  // parameters.
+  if (brightness >= (1 << bd)) {
+    return;
   }
-  uint8_t *padded = pad_8tap_convolve(orig, width, height);
-  free(orig);
-  ASSERT_EQ(0, av1_edge_exists(padded, stride_8tap(width), width, height));
-  free_pad_8tap(padded, width);
+  for (int i = 0; i < width * height; ++i) {
+    set_pix(orig_, i, brightness, bd);
+  }
+  pad();
+  ASSERT_EQ(0, av1_edge_exists(padded_, stride_8tap(width), width, height, bd));
 }
 
 INSTANTIATE_TEST_CASE_P(ImageBrightnessTests, EdgeDetectBrightnessTest,
                         ::testing::Combine(
                             // Brightness
-                            ::testing::Values(0, 1, 2, 127, 128, 129, 254, 255),
+                            ::testing::Values(0, 1, 2, 127, 128, 129, 254, 255,
+                                              256, 511, 512, 1023, 1024, 2048,
+                                              4095),
                             // Width
                             ::testing::Values(8, 16, 32),
                             // Height
-                            ::testing::Values(4, 8, 12, 32)));
+                            ::testing::Values(4, 8, 12, 32),
+                            // Bit depth
+                            ::testing::Values(8, 10, 12)));
 
 class EdgeDetectImageTest :
-    // Parameters are (width, height).
-    public ::testing::TestWithParam<tuple<int, int> > {};
+    // Parameters are (width, height, bit depth).
+    public ::testing::TestWithParam<tuple<int, int, int> > {};
 
 // Generate images with black on one side and white on the other.
 TEST_P(EdgeDetectImageTest, BlackWhite) {
-  const int width = get<0>(GetParam());
-  const int height = get<1>(GetParam());
-  uint8_t *orig = (uint8_t *)malloc(width * height);
+  const int width = GET_PARAM(0);
+  const int height = GET_PARAM(1);
+  const int bd = GET_PARAM(2);
+  const int white = (1 << bd) - 1;
+  uint8_t *orig = malloc_bd(width * height, bd);
   for (int j = 0; j < height; ++j) {
     for (int i = 0; i < width; ++i) {
       if (i < width / 2) {
-        orig[i + j * width] = 0;
+        set_pix(orig, i + j * width, 0, bd);
       } else {
-        orig[i + j * width] = 255;
+        set_pix(orig, i + j * width, white, bd);
       }
     }
   }
-  uint8_t *padded = pad_8tap_convolve(orig, width, height);
-  free(orig);
-  ASSERT_LE(556, av1_edge_exists(padded, stride_8tap(width), width, height));
-  free_pad_8tap(padded, width);
+  uint8_t *padded = pad_8tap_convolve(orig, width, height, bd);
+  free_bd(orig, bd);
+  // Value should be between 556 and 560.
+  ASSERT_LE(556,
+            av1_edge_exists(padded, stride_8tap(width), width, height, bd));
+  ASSERT_GE(560,
+            av1_edge_exists(padded, stride_8tap(width), width, height, bd));
+
+  free_pad_8tap(padded, width, bd);
 }
 
+// Hardcoded blur tests.
+static const uint8_t luma[32] = { 241, 147, 7,   90,  184, 103, 28,  186,
+                                  2,   248, 49,  242, 114, 146, 127, 22,
+                                  121, 228, 167, 108, 158, 174, 41,  168,
+                                  214, 99,  184, 109, 114, 247, 117, 119 };
+static const uint8_t expected[] = { 161, 138, 119, 118, 123, 118, 113, 122,
+                                    143, 140, 134, 133, 134, 126, 116, 114,
+                                    147, 149, 145, 142, 143, 138, 126, 118,
+                                    164, 156, 148, 144, 148, 148, 138, 126 };
+
 TEST(EdgeDetectImageTest, HardcodedBlurTest) {
-  // Randomly generated 8x4.
-  const uint8_t luma[32] = { 241, 147, 7,   90,  184, 103, 28,  186,
-                             2,   248, 49,  242, 114, 146, 127, 22,
-                             121, 228, 167, 108, 158, 174, 41,  168,
-                             214, 99,  184, 109, 114, 247, 117, 119 };
-  uint8_t expected[] = { 161, 138, 119, 118, 123, 118, 113, 122, 143, 140, 134,
-                         133, 134, 126, 116, 114, 147, 149, 145, 142, 143, 138,
-                         126, 118, 164, 156, 148, 144, 148, 148, 138, 126 };
   const int w = 8;
   const int h = 4;
-  uint8_t *padded = pad_8tap_convolve(luma, w, h);
-  uint8_t *output = (uint8_t *)aom_memalign(32, w * h);
-  gaussian_blur(padded, stride_8tap(w), w, h, output);
-
+  int bd = 8;
+  uint8_t *output = malloc_bd(w * h, bd);
+  uint8_t *padded = pad_8tap_convolve(luma, w, h, bd);
+  gaussian_blur(padded, stride_8tap(w), w, h, output, bd);
   for (int i = 0; i < w * h; ++i) {
-    ASSERT_EQ(expected[i], output[i]);
+    ASSERT_EQ(expected[i], get_pix(output, i, bd));
   }
+  free_pad_8tap(padded, w, bd);
+  free_bd(output, bd);
 
-  free_pad_8tap(padded, w);
-  aom_free(output);
+  // High bit-depth tests.
+  for (bd = 10; bd <= 12; bd += 2) {
+    uint16_t luma16[32];
+    for (int i = 0; i < 32; ++i) {
+      luma16[i] = luma[i];
+    }
+    uint8_t *output = malloc_bd(w * h, bd);
+    uint8_t *padded = pad_8tap_convolve(CONVERT_TO_BYTEPTR(luma16), w, h, bd);
+    gaussian_blur(padded, stride_8tap(w), w, h, output, bd);
+    for (int i = 0; i < w * h; ++i) {
+      ASSERT_EQ(expected[i], get_pix(output, i, bd));
+    }
+    free_pad_8tap(padded, w, bd);
+    free_bd(output, bd);
+  }
+  // If we multiply the inputs by a constant factor, the output should not vary
+  // more than 0.5 * factor.
+  for (bd = 10; bd <= 12; bd += 2) {
+    for (int c = 2; c < (1 << (bd - 8)); ++c) {
+      uint16_t luma16[32];
+      for (int i = 0; i < 32; ++i) {
+        luma16[i] = luma[i] * c;
+      }
+      uint8_t *output = malloc_bd(w * h, bd);
+      uint8_t *padded = pad_8tap_convolve(CONVERT_TO_BYTEPTR(luma16), w, h, bd);
+      gaussian_blur(padded, stride_8tap(w), w, h, output, bd);
+      for (int i = 0; i < w * h; ++i) {
+        ASSERT_GE(c / 2, abs(expected[i] * c - get_pix(output, i, bd)));
+      }
+      free_pad_8tap(padded, w, bd);
+      free_bd(output, bd);
+    }
+  }
+}
+
+TEST(EdgeDetectImageTest, HardcodedHighBdBlurTest) {
+  // Randomly generated 8x4.
+  const uint16_t luma[32] = { 241, 147, 7,   90,  184, 103, 28,  186,
+                              2,   248, 49,  242, 114, 146, 127, 22,
+                              121, 228, 167, 108, 158, 174, 41,  168,
+                              214, 99,  184, 109, 114, 247, 117, 119 };
+  uint16_t expected[] = { 161, 138, 119, 118, 123, 118, 113, 122, 143, 140, 134,
+                          133, 134, 126, 116, 114, 147, 149, 145, 142, 143, 138,
+                          126, 118, 164, 156, 148, 144, 148, 148, 138, 126 };
+  const int w = 8;
+  const int h = 4;
+  for (int bd = 10; bd <= 12; bd += 2) {
+    uint8_t *padded = pad_8tap_convolve(CONVERT_TO_BYTEPTR(luma), w, h, bd);
+    uint8_t *output = malloc_bd(w * h, bd);
+    gaussian_blur(padded, stride_8tap(w), w, h, output, bd);
+
+    for (int i = 0; i < w * h; ++i) {
+      ASSERT_EQ(expected[i], get_pix(output, i, bd));
+    }
+    free_pad_8tap(padded, w, bd);
+    free_bd(output, bd);
+  }
+}
+
+TEST(EdgeDetectImageTest, SobelTest) {
+  // Randomly generated 3x3. Compute Sobel for middle value.
+  const uint8_t buf[9] = { 241, 147, 7, 90, 184, 103, 28, 186, 2 };
+  const int stride = 3;
+  int bd = 8;
+  sobel_xy result = sobel(buf, stride, 1, 1, bd);
+  ASSERT_EQ(234, result.x);
+  ASSERT_EQ(140, result.y);
+
+  // Verify it works for high bit-depth values as well.
+  const uint16_t buf16[9] = { 241, 147, 7, 90, 184, 2003, 1028, 186, 2 };
+  for (bd = 10; bd <= 12; bd += 2) {
+    result = sobel(CONVERT_TO_BYTEPTR(buf16), stride, 1, 1, bd);
+    ASSERT_EQ(-2566, result.x);
+    ASSERT_EQ(-860, result.y);
+  }
 }
 
 INSTANTIATE_TEST_CASE_P(EdgeDetectImages, EdgeDetectImageTest,
@@ -156,6 +348,7 @@
                             // Width
                             ::testing::Values(8, 16, 32),
                             // Height
-                            ::testing::Values(4, 8, 12, 32)));
-
+                            ::testing::Values(4, 8, 12, 32),
+                            // Bit depth
+                            ::testing::Values(8, 10, 12)));
 }  // namespace