Improve rdopt decisions for ext-inter

Relative to previous ext-inter:
lowres: -0.177%
     or -0.029% (with USE_RECT_INTERINTRA = 0)

* When predicting interintra modes, the previous code did not provide
  the intra predictor with the correct context during rdopt. Add an
  explicit 'ctx' parameter to the relevant functions, to provide this
  context.
  This fixes a nondeterminism bug, which was causing test failures in
  *EncoderThreadTest*

* For rectangular blocks, build_intra_predictors_for_interintra needs
  to overwrite part of the context buffer in order to set up the
  correct context for intra prediction. We now restore the original
  contents afterwards.

* Add a flag to enable/disable rectangular interintra prediction;
  disabling improves encoding speed but reduces BDRATE improvement.

Change-Id: I7458c036c7f94df9ab1ba0c7efa79aeaa7e17118
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 62a196f..db0b697 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -333,16 +333,17 @@
                                       int ic, int mi_row, int mi_col);
 
 void av1_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
-                                    BLOCK_SIZE bsize);
+                                    BUFFER_SET *ctx, BLOCK_SIZE bsize);
 
 void av1_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
-                                    BLOCK_SIZE bsize, int plane);
+                                    BUFFER_SET *ctx, BLOCK_SIZE bsize,
+                                    int plane);
 
 void av1_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
-                                     BLOCK_SIZE bsize);
+                                     BUFFER_SET *ctx, BLOCK_SIZE bsize);
 
 void av1_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
-                                   BLOCK_SIZE bsize);
+                                   BUFFER_SET *ctx, BLOCK_SIZE bsize);
 
 #if CONFIG_SUPERTX
 void av1_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd,
@@ -529,28 +530,26 @@
 void av1_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
                                      uint8_t *upred, uint8_t *vpred,
                                      int ystride, int ustride, int vstride,
-                                     BLOCK_SIZE bsize);
+                                     BUFFER_SET *ctx, BLOCK_SIZE bsize);
 void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
-                                         int ystride, BLOCK_SIZE bsize);
-void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
-                                         int ustride, int plane,
+                                         int ystride, BUFFER_SET *ctx,
                                          BLOCK_SIZE bsize);
+void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
+                                         int ustride, BUFFER_SET *ctx,
+                                         int plane, BLOCK_SIZE bsize);
 void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
                                           uint8_t *vpred, int ustride,
-                                          int vstride, BLOCK_SIZE bsize);
+                                          int vstride, BUFFER_SET *ctx,
+                                          BLOCK_SIZE bsize);
 
 void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
                                                BLOCK_SIZE bsize, int plane,
+                                               BUFFER_SET *ctx,
                                                uint8_t *intra_pred,
                                                int intra_stride);
 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
                             const uint8_t *inter_pred, int inter_stride,
                             const uint8_t *intra_pred, int intra_stride);
-void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
-                                          uint8_t *vpred, int ustride,
-                                          int vstride, BLOCK_SIZE bsize);
-void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
-                                         int ystride, BLOCK_SIZE bsize);
 
 // Encoder only
 void av1_build_inter_predictors_for_planes_single_buf(