Make ext-inter use new rectangular intra predictor

Now that https://aomedia-review.googlesource.com/#/c/6729/
has been merged, build_intra_predictors_for_interintra() is
now redundant, so replace it by a direct call to
av1_predict_intra_block() and remove the old function.

Reset rect_interintra back to 1.

To do this, we need to make the intra predictor take a
BLOCK_SIZE instead of a TX_SIZE. This is because we need to
be able to predict 32x64 and 64x32 blocks, but there is no
TX_32X64 or TX_64X32.

No effect on output or performance.

Change-Id: I8c185a211c97a85012cc54ec293c785a693608ed
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 29dcd50..f066201 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -40,7 +40,7 @@
 
 #if CONFIG_EXT_INTER
 // Should we try rectangular interintra predictions?
-#define USE_RECT_INTERINTRA 0
+#define USE_RECT_INTERINTRA 1
 
 #if CONFIG_COMPOUND_SEGMENT
 
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index a352ce5..c0c21ce 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -2588,113 +2588,18 @@
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-// TODO(urvang/davidbarker): Refactor with av1_predict_intra_block().
-static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
-                                                  int ref_stride, uint8_t *dst,
-                                                  int dst_stride,
-                                                  PREDICTION_MODE mode,
-                                                  BLOCK_SIZE bsize, int plane) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
-  const int bwl = block_size_wide[plane_bsize];
-  const int bhl = block_size_high[plane_bsize];
-  TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
-#if USE_RECT_INTERINTRA
-  const int pxbw = block_size_wide[plane_bsize];
-  const int pxbh = block_size_high[plane_bsize];
-#if CONFIG_AOM_HIGHBITDEPTH
-  uint16_t tmp16[MAX_SB_SIZE];
-#endif
-  uint8_t tmp[MAX_SB_SIZE];
-#endif
-
-  if (bwl == bhl) {
-    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, ref,
-                            ref_stride, dst, dst_stride, 0, 0, plane);
-#if !USE_RECT_INTERINTRA
-  } else {
-    assert(0);
-  }
-#else
-  } else if (bwl < bhl) {
-    uint8_t *src_2 = ref + pxbw * ref_stride;
-    uint8_t *dst_2 = dst + pxbw * dst_stride;
-    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, ref,
-                            ref_stride, dst, dst_stride, 0, 0, plane);
-#if CONFIG_AOM_HIGHBITDEPTH
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
-      uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2);
-      memcpy(tmp16, src_216 - ref_stride, sizeof(*src_216) * pxbw);
-      memcpy(src_216 - ref_stride, dst_216 - dst_stride,
-             sizeof(*src_216) * pxbw);
-    } else {
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-      memcpy(tmp, src_2 - ref_stride, sizeof(*src_2) * pxbw);
-      memcpy(src_2 - ref_stride, dst_2 - dst_stride, sizeof(*src_2) * pxbw);
-#if CONFIG_AOM_HIGHBITDEPTH
-    }
-#endif
-    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, src_2,
-                            ref_stride, dst_2, dst_stride, 0,
-                            mi_size_wide[plane_bsize], plane);
-#if CONFIG_AOM_HIGHBITDEPTH
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
-      memcpy(src_216 - ref_stride, tmp16, sizeof(*src_216) * pxbw);
-    } else {
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-      memcpy(src_2 - ref_stride, tmp, sizeof(*src_2) * pxbw);
-#if CONFIG_AOM_HIGHBITDEPTH
-    }
-#endif
-  } else {  // bwl > bhl
-    int i;
-    uint8_t *src_2 = ref + pxbh;
-    uint8_t *dst_2 = dst + pxbh;
-    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, ref,
-                            ref_stride, dst, dst_stride, 0, 0, plane);
-#if CONFIG_AOM_HIGHBITDEPTH
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
-      uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2);
-      for (i = 0; i < pxbh; ++i) {
-        tmp16[i] = src_216[i * ref_stride - 1];
-        src_216[i * ref_stride - 1] = dst_216[i * dst_stride - 1];
-      }
-    } else {
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-      for (i = 0; i < pxbh; ++i) {
-        tmp[i] = src_2[i * ref_stride - 1];
-        src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
-      }
-#if CONFIG_AOM_HIGHBITDEPTH
-    }
-#endif
-    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, src_2,
-                            ref_stride, dst_2, dst_stride,
-                            mi_size_high[plane_bsize], 0, plane);
-#if CONFIG_AOM_HIGHBITDEPTH
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
-      for (i = 0; i < pxbh; ++i) src_216[i * ref_stride - 1] = tmp16[i];
-    } else {
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-      for (i = 0; i < pxbh; ++i) src_2[i * ref_stride - 1] = tmp[i];
-#if CONFIG_AOM_HIGHBITDEPTH
-    }
-#endif
-  }
-#endif
-}
-
 void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
                                                BLOCK_SIZE bsize, int plane,
                                                BUFFER_SET *ctx, uint8_t *dst,
                                                int dst_stride) {
-  build_intra_predictors_for_interintra(
-      xd, ctx->plane[plane], ctx->stride[plane], dst, dst_stride,
-      interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode], bsize, plane);
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
+  PREDICTION_MODE mode =
+      interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode];
+
+  av1_predict_intra_block(xd, pd->width, pd->height, plane_bsize, mode,
+                          ctx->plane[plane], ctx->stride[plane], dst,
+                          dst_stride, 0, 0, plane);
 }
 
 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 4411639..5545c18 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -1977,40 +1977,41 @@
 }
 
 void av1_predict_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
-                             TX_SIZE tx_size, PREDICTION_MODE mode,
+                             BLOCK_SIZE bsize, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride, uint8_t *dst,
                              int dst_stride, int col_off, int row_off,
                              int plane) {
-  const int tx_width = tx_size_wide[tx_size];
-  const int tx_height = tx_size_high[tx_size];
-  if (tx_width == tx_height) {
+  const int block_width = block_size_wide[bsize];
+  const int block_height = block_size_high[bsize];
+  TX_SIZE tx_size = max_txsize_lookup[bsize];
+  assert(tx_size < TX_SIZES);
+  if (block_width == block_height) {
     predict_square_intra_block(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
                                dst, dst_stride, col_off, row_off, plane);
   } else {
-#if (CONFIG_VAR_TX || CONFIG_RECT_TX)
+#if (CONFIG_VAR_TX || CONFIG_RECT_TX) || \
+    (CONFIG_EXT_INTER && USE_RECT_INTERINTRA)
 #if CONFIG_AOM_HIGHBITDEPTH
     uint16_t tmp16[MAX_SB_SIZE];
 #endif
     uint8_t tmp[MAX_SB_SIZE];
-    const TX_SIZE sub_tx_size = txsize_sqr_map[tx_size];
-    assert(sub_tx_size < TX_SIZES);
-    assert((tx_width == wpx && tx_height == hpx) ||
-           (tx_width == (wpx >> 1) && tx_height == hpx) ||
-           (tx_width == wpx && tx_height == (hpx >> 1)));
+    assert((block_width == wpx && block_height == hpx) ||
+           (block_width == (wpx >> 1) && block_height == hpx) ||
+           (block_width == wpx && block_height == (hpx >> 1)));
 
-    if (tx_width < tx_height) {
-      assert(tx_height == (tx_width << 1));
+    if (block_width < block_height) {
+      assert(block_height == (block_width << 1));
       // Predict the top square sub-block.
-      predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, ref,
-                                 ref_stride, dst, dst_stride, col_off, row_off,
-                                 plane);
+      predict_square_intra_block(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
+                                 dst, dst_stride, col_off, row_off, plane);
       {
-        const int half_tx_height = tx_height >> 1;
-        const int half_txh_unit = tx_size_high_unit[tx_size] >> 1;
+        const int half_block_height = block_height >> 1;
+        const int half_block_height_unit =
+            half_block_height >> tx_size_wide_log2[0];
         // Cast away const to modify 'ref' temporarily; will be restored later.
-        uint8_t *src_2 = (uint8_t *)ref + half_tx_height * ref_stride;
-        uint8_t *dst_2 = dst + half_tx_height * dst_stride;
-        const int row_off_2 = row_off + half_txh_unit;
+        uint8_t *src_2 = (uint8_t *)ref + half_block_height * ref_stride;
+        uint8_t *dst_2 = dst + half_block_height * dst_stride;
+        const int row_off_2 = row_off + half_block_height_unit;
         // Save the last row of top square sub-block as 'above' row for bottom
         // square sub-block.
         if (src_2 != dst_2 || ref_stride != dst_stride) {
@@ -2018,20 +2019,21 @@
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
             uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
             uint16_t *dst_2_16 = CONVERT_TO_SHORTPTR(dst_2);
-            memcpy(tmp16, src_2_16 - ref_stride, tx_width * sizeof(*src_2_16));
+            memcpy(tmp16, src_2_16 - ref_stride,
+                   block_width * sizeof(*src_2_16));
             memcpy(src_2_16 - ref_stride, dst_2_16 - dst_stride,
-                   tx_width * sizeof(*src_2_16));
+                   block_width * sizeof(*src_2_16));
           } else {
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-            memcpy(tmp, src_2 - ref_stride, tx_width * sizeof(*src_2));
+            memcpy(tmp, src_2 - ref_stride, block_width * sizeof(*src_2));
             memcpy(src_2 - ref_stride, dst_2 - dst_stride,
-                   tx_width * sizeof(*src_2));
+                   block_width * sizeof(*src_2));
 #if CONFIG_AOM_HIGHBITDEPTH
           }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
         }
         // Predict the bottom square sub-block.
-        predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, src_2,
+        predict_square_intra_block(xd, wpx, hpx, tx_size, mode, src_2,
                                    ref_stride, dst_2, dst_stride, col_off,
                                    row_off_2, plane);
         // Restore the last row of top square sub-block.
@@ -2039,29 +2041,30 @@
 #if CONFIG_AOM_HIGHBITDEPTH
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
             uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
-            memcpy(src_2_16 - ref_stride, tmp16, tx_width * sizeof(*src_2_16));
+            memcpy(src_2_16 - ref_stride, tmp16,
+                   block_width * sizeof(*src_2_16));
           } else {
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-            memcpy(src_2 - ref_stride, tmp, tx_width * sizeof(*src_2));
+            memcpy(src_2 - ref_stride, tmp, block_width * sizeof(*src_2));
 #if CONFIG_AOM_HIGHBITDEPTH
           }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
         }
       }
-    } else {  // tx_width > tx_height
-      assert(tx_width == (tx_height << 1));
+    } else {  // block_width > block_height
+      assert(block_width == (block_height << 1));
       // Predict the left square sub-block
-      predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, ref,
-                                 ref_stride, dst, dst_stride, col_off, row_off,
-                                 plane);
+      predict_square_intra_block(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
+                                 dst, dst_stride, col_off, row_off, plane);
       {
         int i;
-        const int half_tx_width = tx_width >> 1;
-        const int half_txw_unit = tx_size_wide_unit[tx_size] >> 1;
+        const int half_block_width = block_width >> 1;
+        const int half_block_width_unit =
+            half_block_width >> tx_size_wide_log2[0];
         // Cast away const to modify 'ref' temporarily; will be restored later.
-        uint8_t *src_2 = (uint8_t *)ref + half_tx_width;
-        uint8_t *dst_2 = dst + half_tx_width;
-        const int col_off_2 = col_off + half_txw_unit;
+        uint8_t *src_2 = (uint8_t *)ref + half_block_width;
+        uint8_t *dst_2 = dst + half_block_width;
+        const int col_off_2 = col_off + half_block_width_unit;
         // Save the last column of left square sub-block as 'left' column for
         // right square sub-block.
         if (src_2 != dst_2 || ref_stride != dst_stride) {
@@ -2069,13 +2072,13 @@
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
             uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
             uint16_t *dst_2_16 = CONVERT_TO_SHORTPTR(dst_2);
-            for (i = 0; i < tx_height; ++i) {
+            for (i = 0; i < block_height; ++i) {
               tmp16[i] = src_2_16[i * ref_stride - 1];
               src_2_16[i * ref_stride - 1] = dst_2_16[i * dst_stride - 1];
             }
           } else {
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-            for (i = 0; i < tx_height; ++i) {
+            for (i = 0; i < block_height; ++i) {
               tmp[i] = src_2[i * ref_stride - 1];
               src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
             }
@@ -2084,7 +2087,7 @@
 #endif  // CONFIG_AOM_HIGHBITDEPTH
         }
         // Predict the right square sub-block.
-        predict_square_intra_block(xd, wpx, hpx, sub_tx_size, mode, src_2,
+        predict_square_intra_block(xd, wpx, hpx, tx_size, mode, src_2,
                                    ref_stride, dst_2, dst_stride, col_off_2,
                                    row_off, plane);
         // Restore the last column of left square sub-block.
@@ -2092,12 +2095,12 @@
 #if CONFIG_AOM_HIGHBITDEPTH
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
             uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
-            for (i = 0; i < tx_height; ++i) {
+            for (i = 0; i < block_height; ++i) {
               src_2_16[i * ref_stride - 1] = tmp16[i];
             }
           } else {
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-            for (i = 0; i < tx_height; ++i) {
+            for (i = 0; i < block_height; ++i) {
               src_2[i * ref_stride - 1] = tmp[i];
             }
 #if CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index d7d8d0e..e1ef27d 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h
@@ -22,7 +22,7 @@
 void av1_init_intra_predictors(void);
 
 void av1_predict_intra_block(const MACROBLOCKD *xd, int bw, int bh,
-                             TX_SIZE tx_size, PREDICTION_MODE mode,
+                             BLOCK_SIZE bsize, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride, uint8_t *dst,
                              int dst_stride, int aoff, int loff, int plane);
 
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 34fe6c3..250fa7a 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -495,8 +495,9 @@
     if (plane == 0) mode = xd->mi[0]->bmi[block_idx].as_mode;
 #endif
 
-  av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
-                          pd->dst.stride, dst, pd->dst.stride, col, row, plane);
+  av1_predict_intra_block(xd, pd->width, pd->height, txsize_to_bsize[tx_size],
+                          mode, dst, pd->dst.stride, dst, pd->dst.stride, col,
+                          row, plane);
 
   if (!mbmi->skip) {
     TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 07ab2ee..982d75e 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -973,8 +973,9 @@
   src_diff =
       &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
   mode = (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
-  av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
-                          dst_stride, dst, dst_stride, blk_col, blk_row, plane);
+  av1_predict_intra_block(xd, pd->width, pd->height, txsize_to_bsize[tx_size],
+                          mode, dst, dst_stride, dst, dst_stride, blk_col,
+                          blk_row, plane);
 
   if (check_subtract_block_size(tx1d_width, tx1d_height)) {
 #if CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
index 52974e2..cfef9c0 100644
--- a/av1/encoder/mbgraph.c
+++ b/av1/encoder/mbgraph.c
@@ -149,7 +149,7 @@
     unsigned int err;
 
     xd->mi[0]->mbmi.mode = mode;
-    av1_predict_intra_block(xd, 16, 16, TX_16X16, mode, x->plane[0].src.buf,
+    av1_predict_intra_block(xd, 16, 16, BLOCK_16X16, mode, x->plane[0].src.buf,
                             x->plane[0].src.stride, xd->plane[0].dst.buf,
                             xd->plane[0].dst.stride, 0, 0, 0);
     err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 3d90b88..15feb9e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2237,9 +2237,9 @@
       struct macroblockd_plane *const pd = &xd->plane[0];
       uint8_t *dst =
           &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
-      av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mbmi->mode,
-                              dst, pd->dst.stride, dst, pd->dst.stride, col,
-                              row, 0);
+      av1_predict_intra_block(xd, pd->width, pd->height,
+                              txsize_to_bsize[tx_size], mbmi->mode, dst,
+                              pd->dst.stride, dst, pd->dst.stride, col, row, 0);
     }
   }
   // RD estimation.
@@ -2558,9 +2558,9 @@
           assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
                          block == 0 || block == 2));
           xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
-          av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
-                                  dst_stride, dst, dst_stride, col + idx,
-                                  row + idy, 0);
+          av1_predict_intra_block(
+              xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode, dst,
+              dst_stride, dst, dst_stride, col + idx, row + idy, 0);
           aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
                                     src_stride, dst, dst_stride, xd->bd);
           if (is_lossless) {
@@ -2717,8 +2717,9 @@
         assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
                        block == 0 || block == 2));
         xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
-        av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
-                                dst_stride, dst, dst_stride,
+        av1_predict_intra_block(xd, pd->width, pd->height,
+                                txsize_to_bsize[tx_size], mode, dst, dst_stride,
+                                dst, dst_stride,
 #if CONFIG_CB4X4
                                 2 * (col + idx), 2 * (row + idy),
 #else