Apply skip_ref_frame_mask to all sub blocks

Currently skip_ref_frame_mask only applied to rect
blocks, this CL apply it to all sub blocks including
square blocks in PARTITION_HORZ_A/B and
PARTITION_VERT_A/B.

For encoder, about 0.6% faster shows by encoding
20 frame of BasketballDrill_832x480_50.y4m at 800kbps
on speed 1. The coding performance is about 0.01% loss
on average.

STATS_CHANGED expected

Change-Id: I097245bbc2f954c5a2a4b60c4ea05af8581c1a82
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 85a62e8..fc37a79 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -10312,7 +10312,8 @@
           (cpi->ref_frame_flags & ref_frame_flag_list[rf[1]]))) {
       continue;
     }
-    if (block_size_wide[bsize] != block_size_high[bsize]) {
+    if (mbmi->partition != PARTITION_NONE &&
+        mbmi->partition != PARTITION_SPLIT) {
       if ((skip_ref_frame_mask & (1 << rf[0])) ||
           (skip_ref_frame_mask & (1 << rf[1]))) {
         continue;
@@ -10640,8 +10641,7 @@
   const unsigned char segment_id = mbmi->segment_id;
   const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame;
   const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
-
-  if (block_size_wide[bsize] != block_size_high[bsize]) {
+  if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) {
     if (ctx->skip_ref_frame_mask & (1 << ref_frame[0])) return 1;
     if (ref_frame[1] > 0 && (ctx->skip_ref_frame_mask & (1 << ref_frame[1])))
       return 1;