Make subtraction support 2x2 level block sizes

Use c functions for these block sizes.

Change-Id: Ie0fabc184b6e728f6f3bc7c0bfada2f9d1465a18
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 0319db6..0e857d9 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -34,6 +34,9 @@
 #include "av1/encoder/pvq_encoder.h"
 #endif
 
+// Check if one needs to use c version subtraction.
+static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }
+
 void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
@@ -41,6 +44,21 @@
   const int bw = block_size_wide[plane_bsize];
   const int bh = block_size_high[plane_bsize];
 
+  if (check_subtract_block_size(bw, bh)) {
+#if CONFIG_AOM_HIGHBITDEPTH
+    if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      aom_highbd_subtract_block_c(bh, bw, p->src_diff, bw, p->src.buf,
+                                  p->src.stride, pd->dst.buf, pd->dst.stride,
+                                  x->e_mbd.bd);
+      return;
+    }
+#endif  // CONFIG_AOM_HIGHBITDEPTH
+    aom_subtract_block_c(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
+                         pd->dst.buf, pd->dst.stride);
+
+    return;
+  }
+
 #if CONFIG_AOM_HIGHBITDEPTH
   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     aom_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
@@ -940,18 +958,35 @@
   mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
   av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
                           dst_stride, dst, dst_stride, blk_col, blk_row, plane);
+
+  if (check_subtract_block_size(tx1d_width, tx1d_height)) {
 #if CONFIG_AOM_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    aom_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
-                              src, src_stride, dst, dst_stride, xd->bd);
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      aom_highbd_subtract_block_c(tx1d_height, tx1d_width, src_diff,
+                                  diff_stride, src, src_stride, dst, dst_stride,
+                                  xd->bd);
+    } else {
+      aom_subtract_block_c(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+                           src_stride, dst, dst_stride);
+    }
+#else
+    aom_subtract_block_c(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+                         src_stride, dst, dst_stride);
+#endif  // CONFIG_AOM_HIGHBITDEPTH
   } else {
+#if CONFIG_AOM_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      aom_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
+                                src, src_stride, dst, dst_stride, xd->bd);
+    } else {
+      aom_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+                         src_stride, dst, dst_stride);
+    }
+#else
     aom_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
                        src_stride, dst, dst_stride);
-  }
-#else
-  aom_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
-                     src_stride, dst, dst_stride);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
+  }
 
   a = &args->ta[blk_col];
   l = &args->tl[blk_row];