Merge "Use the aom_writer type rather than the tag in calling code." into nextgenv2
diff --git a/aom_dsp/psnr.c b/aom_dsp/psnr.c
index e69ffb4..db789a3 100644
--- a/aom_dsp/psnr.c
+++ b/aom_dsp/psnr.c
@@ -177,6 +177,14 @@
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
+int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
+                           const YV12_BUFFER_CONFIG *b,
+                           int hstart, int width, int vstart, int height) {
+  return get_sse(a->y_buffer + vstart * a->y_stride + hstart, a->y_stride,
+                 b->y_buffer + vstart * b->y_stride + hstart, b->y_stride,
+                 width, height);
+}
+
 int64_t aom_get_y_sse(const YV12_BUFFER_CONFIG *a,
                       const YV12_BUFFER_CONFIG *b) {
   assert(a->y_crop_width == b->y_crop_width);
@@ -205,6 +213,16 @@
 }
 
 #if CONFIG_AOM_HIGHBITDEPTH
+int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
+                                  const YV12_BUFFER_CONFIG *b,
+                                  int hstart, int width,
+                                  int vstart, int height) {
+  return highbd_get_sse(
+      a->y_buffer + vstart * a->y_stride + hstart, a->y_stride,
+      b->y_buffer + vstart * b->y_stride + hstart, b->y_stride,
+      width, height);
+}
+
 int64_t aom_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
                              const YV12_BUFFER_CONFIG *b) {
   assert(a->y_crop_width == b->y_crop_width);
diff --git a/aom_dsp/psnr.h b/aom_dsp/psnr.h
index 29ccb5f..303573b 100644
--- a/aom_dsp/psnr.h
+++ b/aom_dsp/psnr.h
@@ -35,10 +35,17 @@
 * \param[in]    sse           Sum of squared errors
 */
 double aom_sse_to_psnr(double samples, double peak, double sse);
+int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
+                           const YV12_BUFFER_CONFIG *b,
+                           int hstart, int width, int vstart, int height);
 int64_t aom_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 int64_t aom_get_u_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 int64_t aom_get_v_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 #if CONFIG_AOM_HIGHBITDEPTH
+int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
+                                  const YV12_BUFFER_CONFIG *b,
+                                  int hstart, int width,
+                                  int vstart, int height);
 int64_t aom_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
                              const YV12_BUFFER_CONFIG *b);
 int64_t aom_highbd_get_u_sse(const YV12_BUFFER_CONFIG *a,
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index dab0116..db4fbf7 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -83,6 +83,8 @@
 
 #if CONFIG_LOOP_RESTORATION
 void av1_free_restoration_buffers(AV1_COMMON *cm) {
+  aom_free(cm->rst_info.restoration_type);
+  cm->rst_info.restoration_type = NULL;
   aom_free(cm->rst_info.bilateral_level);
   cm->rst_info.bilateral_level = NULL;
   aom_free(cm->rst_info.vfilter);
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 62bf0a4..61a2c31 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -865,6 +865,17 @@
                                             },
                                           };
 
+#if CONFIG_LOOP_RESTORATION
+const aom_tree_index
+    av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)] = {
+      -RESTORE_NONE, 2,
+      -RESTORE_BILATERAL, -RESTORE_WIENER,
+    };
+
+static const aom_prob
+    default_switchable_restore_prob[RESTORE_SWITCHABLE_TYPES - 1] = {32, 128};
+#endif  // CONFIG_LOOP_RESTORATION
+
 #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_VAR_TX
 // the probability of (0) using recursive square tx partition vs.
 // (1) biggest rect tx for 4X8-8X4/8X16-16X8/16X32-32X16 blocks
@@ -1361,6 +1372,9 @@
 #endif  // CONFIG_EXT_INTRA
   av1_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
   av1_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+#if CONFIG_LOOP_RESTORATION
+  av1_copy(fc->switchable_restore_prob, default_switchable_restore_prob);
+#endif  // CONFIG_LOOP_RESTORATION
 }
 
 #if CONFIG_EXT_INTERP
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 16030c9..3120f90 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -126,6 +126,9 @@
 #if CONFIG_GLOBAL_MOTION
   aom_prob global_motion_types_prob[GLOBAL_MOTION_TYPES - 1];
 #endif  // CONFIG_GLOBAL_MOTION
+#if CONFIG_LOOP_RESTORATION
+  aom_prob switchable_restore_prob[RESTORE_SWITCHABLE_TYPES - 1];
+#endif  // CONFIG_LOOP_RESTORATION
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
@@ -263,6 +266,13 @@
 extern const aom_tree_index av1_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)];
 #endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
 
+#if CONFIG_LOOP_RESTORATION
+#define RESTORE_NONE_BILATERAL_PROB 16
+#define RESTORE_NONE_WIENER_PROB 64
+extern const aom_tree_index
+    av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)];
+#endif  // CONFIG_LOOP_RESTORATION
+
 void av1_setup_past_independence(struct AV1Common *cm);
 
 void av1_adapt_intra_frame_probs(struct AV1Common *cm);
diff --git a/av1/common/enums.h b/av1/common/enums.h
index b1ac2a0..c9d3211 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -433,6 +433,16 @@
 #define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32
 #endif  // CONFIG_SUPERTX
 
+#if CONFIG_LOOP_RESTORATION
+typedef enum {
+  RESTORE_NONE,
+  RESTORE_BILATERAL,
+  RESTORE_WIENER,
+  RESTORE_SWITCHABLE,
+  RESTORE_SWITCHABLE_TYPES = RESTORE_SWITCHABLE,
+  RESTORE_TYPES,
+} RestorationType;
+#endif  // CONFIG_LOOP_RESTORATION
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index 2147bb8..f45f3db 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -16,7 +16,6 @@
 #include "av1/common/loopfilter.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/reconinter.h"
-#include "av1/common/restoration.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
diff --git a/av1/common/loopfilter.h b/av1/common/loopfilter.h
index ae0ef8a..975cbdf 100644
--- a/av1/common/loopfilter.h
+++ b/av1/common/loopfilter.h
@@ -16,7 +16,6 @@
 #include "./aom_config.h"
 
 #include "av1/common/blockd.h"
-#include "av1/common/restoration.h"
 #include "av1/common/seg_common.h"
 
 #ifdef __cplusplus
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index a14b34f..6cd6cbe 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -25,7 +25,9 @@
 #include "av1/common/frame_buffers.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/tile_common.h"
+#if CONFIG_LOOP_RESTORATION
 #include "av1/common/restoration.h"
+#endif  // CONFIG_LOOP_RESTORATION
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 65e3d09..5c59ddc 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -70,36 +70,6 @@
             : bilateral_level_to_params_arr[index];
 }
 
-typedef struct TileParams {
-  int width;
-  int height;
-} TileParams;
-
-static TileParams restoration_tile_sizes[RESTORATION_TILESIZES] = {
-  { 64, 64 }, { 128, 128 }, { 256, 256 }
-};
-
-void av1_get_restoration_tile_size(int tilesize, int width, int height,
-                                   int *tile_width, int *tile_height,
-                                   int *nhtiles, int *nvtiles) {
-  *tile_width = (tilesize < 0)
-                    ? width
-                    : AOMMIN(restoration_tile_sizes[tilesize].width, width);
-  *tile_height = (tilesize < 0)
-                     ? height
-                     : AOMMIN(restoration_tile_sizes[tilesize].height, height);
-  *nhtiles = (width + (*tile_width >> 1)) / *tile_width;
-  *nvtiles = (height + (*tile_height >> 1)) / *tile_height;
-}
-
-int av1_get_restoration_ntiles(int tilesize, int width, int height) {
-  int nhtiles, nvtiles;
-  int tile_width, tile_height;
-  av1_get_restoration_tile_size(tilesize, width, height, &tile_width,
-                                &tile_height, &nhtiles, &nvtiles);
-  return (nhtiles * nvtiles);
-}
-
 void av1_loop_restoration_precal() {
   int i;
   for (i = 0; i < BILATERAL_LEVELS_KF; i++) {
@@ -172,90 +142,75 @@
 void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
                                int kf, int width, int height) {
   int i, tile_idx;
-  rst->restoration_type = rsi->restoration_type;
+  rst->rsi = rsi;
+  rst->keyframe = kf;
   rst->subsampling_x = 0;
   rst->subsampling_y = 0;
-  if (rsi->restoration_type == RESTORE_BILATERAL) {
-    rst->tilesize_index = BILATERAL_TILESIZE;
-    rst->ntiles =
-        av1_get_restoration_ntiles(rst->tilesize_index, width, height);
-    av1_get_restoration_tile_size(rst->tilesize_index, width, height,
-                                  &rst->tile_width, &rst->tile_height,
-                                  &rst->nhtiles, &rst->nvtiles);
-    rst->bilateral_level = rsi->bilateral_level;
-    rst->wr_lut = (uint8_t **)malloc(sizeof(*rst->wr_lut) * rst->ntiles);
-    assert(rst->wr_lut != NULL);
-    rst->wx_lut = (uint8_t(**)[RESTORATION_WIN])malloc(sizeof(*rst->wx_lut) *
-                                                       rst->ntiles);
-    assert(rst->wx_lut != NULL);
+  rst->ntiles =
+      av1_get_rest_ntiles(width, height, &rst->tile_width,
+                          &rst->tile_height, &rst->nhtiles, &rst->nvtiles);
+  if (rsi->frame_restoration_type == RESTORE_WIENER) {
     for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-      const int level = rsi->bilateral_level[tile_idx];
-      if (level >= 0) {
-        rst->wr_lut[tile_idx] = kf ? bilateral_filter_coeffs_r_kf[level]
-                                   : bilateral_filter_coeffs_r[level];
-        rst->wx_lut[tile_idx] = kf ? bilateral_filter_coeffs_s_kf[level]
-                                   : bilateral_filter_coeffs_s[level];
+      rsi->vfilter[tile_idx][RESTORATION_HALFWIN] =
+          rsi->hfilter[tile_idx][RESTORATION_HALFWIN] = RESTORATION_FILT_STEP;
+      for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+        rsi->vfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+            rsi->vfilter[tile_idx][i];
+        rsi->hfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+            rsi->hfilter[tile_idx][i];
+        rsi->vfilter[tile_idx][RESTORATION_HALFWIN] -=
+            2 * rsi->vfilter[tile_idx][i];
+        rsi->hfilter[tile_idx][RESTORATION_HALFWIN] -=
+            2 * rsi->hfilter[tile_idx][i];
       }
     }
-  } else if (rsi->restoration_type == RESTORE_WIENER) {
-    rst->tilesize_index = WIENER_TILESIZE;
-    rst->ntiles =
-        av1_get_restoration_ntiles(rst->tilesize_index, width, height);
-    av1_get_restoration_tile_size(rst->tilesize_index, width, height,
-                                  &rst->tile_width, &rst->tile_height,
-                                  &rst->nhtiles, &rst->nvtiles);
-    rst->wiener_level = rsi->wiener_level;
-    rst->vfilter =
-        (int(*)[RESTORATION_WIN])malloc(sizeof(*rst->vfilter) * rst->ntiles);
-    assert(rst->vfilter != NULL);
-    rst->hfilter =
-        (int(*)[RESTORATION_WIN])malloc(sizeof(*rst->hfilter) * rst->ntiles);
-    assert(rst->hfilter != NULL);
+  } else if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
     for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-      rst->vfilter[tile_idx][RESTORATION_HALFWIN] =
-          rst->hfilter[tile_idx][RESTORATION_HALFWIN] = RESTORATION_FILT_STEP;
-      for (i = 0; i < RESTORATION_HALFWIN; ++i) {
-        rst->vfilter[tile_idx][i] =
-            rst->vfilter[tile_idx][RESTORATION_WIN - 1 - i] =
-                rsi->vfilter[tile_idx][i];
-        rst->hfilter[tile_idx][i] =
-            rst->hfilter[tile_idx][RESTORATION_WIN - 1 - i] =
-                rsi->hfilter[tile_idx][i];
-        rst->vfilter[tile_idx][RESTORATION_HALFWIN] -=
-            2 * rsi->vfilter[tile_idx][i];
-        rst->hfilter[tile_idx][RESTORATION_HALFWIN] -=
-            2 * rsi->hfilter[tile_idx][i];
+      if (rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
+        rsi->vfilter[tile_idx][RESTORATION_HALFWIN] =
+            rsi->hfilter[tile_idx][RESTORATION_HALFWIN] = RESTORATION_FILT_STEP;
+        for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+          rsi->vfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+              rsi->vfilter[tile_idx][i];
+          rsi->hfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+              rsi->hfilter[tile_idx][i];
+          rsi->vfilter[tile_idx][RESTORATION_HALFWIN] -=
+              2 * rsi->vfilter[tile_idx][i];
+          rsi->hfilter[tile_idx][RESTORATION_HALFWIN] -=
+              2 * rsi->hfilter[tile_idx][i];
+        }
       }
     }
   }
 }
 
-static void loop_bilateral_filter(uint8_t *data, int width, int height,
-                                  int stride, RestorationInternal *rst,
-                                  uint8_t *tmpdata, int tmpstride) {
-  int i, j, tile_idx, htile_idx, vtile_idx;
+static void loop_bilateral_filter_tile(uint8_t *data, int tile_idx, int width,
+                                       int height, int stride,
+                                       RestorationInternal *rst,
+                                       uint8_t *tmpdata, int tmpstride) {
+  int i, j, subtile_idx;
   int h_start, h_end, v_start, v_end;
-  int tile_width, tile_height;
+  const int tile_width = rst->tile_width >> rst->subsampling_x;
+  const int tile_height = rst->tile_height >> rst->subsampling_y;
 
-  tile_width = rst->tile_width >> rst->subsampling_x;
-  tile_height = rst->tile_height >> rst->subsampling_y;
-
-  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+  for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
     uint8_t *data_p, *tmpdata_p;
-    const uint8_t *wr_lut_ = rst->wr_lut[tile_idx] + BILATERAL_AMP_RANGE;
+    const int level =
+        rst->rsi->bilateral_level[tile_idx * BILATERAL_SUBTILES + subtile_idx];
+    uint8_t(*wx_lut)[RESTORATION_WIN];
+    uint8_t *wr_lut_;
 
-    if (rst->bilateral_level[tile_idx] < 0) continue;
+    if (level < 0) continue;
+    wr_lut_ = (rst->keyframe ? bilateral_filter_coeffs_r_kf[level]
+                             : bilateral_filter_coeffs_r[level]) +
+              BILATERAL_AMP_RANGE;
+    wx_lut = rst->keyframe ? bilateral_filter_coeffs_s_kf[level]
+                           : bilateral_filter_coeffs_s[level];
 
-    htile_idx = tile_idx % rst->nhtiles;
-    vtile_idx = tile_idx / rst->nhtiles;
-    h_start =
-        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
-                                           : (width - RESTORATION_HALFWIN);
-    v_start =
-        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                           : (height - RESTORATION_HALFWIN);
+    av1_get_rest_tile_limits(tile_idx, subtile_idx, BILATERAL_SUBTILE_BITS,
+                             rst->nhtiles, rst->nvtiles, tile_width,
+                             tile_height, width, height, 1, 1, &h_start, &h_end,
+                             &v_start, &v_end);
 
     data_p = data + h_start + v_start * stride;
     tmpdata_p = tmpdata + h_start + v_start * tmpstride;
@@ -267,8 +222,7 @@
         uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
         for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
           for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
-            wt = (int)rst->wx_lut[tile_idx][y + RESTORATION_HALFWIN]
-                                 [x + RESTORATION_HALFWIN] *
+            wt = (int)wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
                  (int)wr_lut_[data_p2[x] - data_p[j]];
             wtsum += wt;
             flsum += wt * data_p2[x];
@@ -290,6 +244,16 @@
   }
 }
 
+static void loop_bilateral_filter(uint8_t *data, int width, int height,
+                                  int stride, RestorationInternal *rst,
+                                  uint8_t *tmpdata, int tmpstride) {
+  int tile_idx;
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    loop_bilateral_filter_tile(data, tile_idx, width, height, stride, rst,
+                               tmpdata, tmpstride);
+  }
+}
+
 uint8_t hor_sym_filter(uint8_t *d, int *hfilter) {
   int32_t s =
       (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
@@ -308,17 +272,52 @@
   return clip_pixel(s >> RESTORATION_FILT_BITS);
 }
 
+static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
+                                    int height, int stride,
+                                    RestorationInternal *rst, uint8_t *tmpdata,
+                                    int tmpstride) {
+  const int tile_width = rst->tile_width >> rst->subsampling_x;
+  const int tile_height = rst->tile_height >> rst->subsampling_y;
+  int i, j;
+  int h_start, h_end, v_start, v_end;
+  uint8_t *data_p, *tmpdata_p;
+
+  if (rst->rsi->wiener_level[tile_idx] == 0) return;
+  // Filter row-wise
+  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
+                           tile_width, tile_height, width, height, 1, 0,
+                           &h_start, &h_end, &v_start, &v_end);
+  data_p = data + h_start + v_start * stride;
+  tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+  for (i = 0; i < (v_end - v_start); ++i) {
+    for (j = 0; j < (h_end - h_start); ++j) {
+      *tmpdata_p++ = hor_sym_filter(data_p++, rst->rsi->hfilter[tile_idx]);
+    }
+    data_p += stride - (h_end - h_start);
+    tmpdata_p += tmpstride - (h_end - h_start);
+  }
+  // Filter col-wise
+  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
+                           tile_width, tile_height, width, height, 0, 1,
+                           &h_start, &h_end, &v_start, &v_end);
+  data_p = data + h_start + v_start * stride;
+  tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+  for (i = 0; i < (v_end - v_start); ++i) {
+    for (j = 0; j < (h_end - h_start); ++j) {
+      *data_p++ =
+          ver_sym_filter(tmpdata_p++, tmpstride, rst->rsi->vfilter[tile_idx]);
+    }
+    data_p += stride - (h_end - h_start);
+    tmpdata_p += tmpstride - (h_end - h_start);
+  }
+}
+
 static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
                                RestorationInternal *rst, uint8_t *tmpdata,
                                int tmpstride) {
-  int i, j, tile_idx, htile_idx, vtile_idx;
-  int h_start, h_end, v_start, v_end;
-  int tile_width, tile_height;
+  int i, tile_idx;
   uint8_t *data_p, *tmpdata_p;
 
-  tile_width = rst->tile_width >> rst->subsampling_x;
-  tile_height = rst->tile_height >> rst->subsampling_y;
-
   // Initialize tmp buffer
   data_p = data;
   tmpdata_p = tmpdata;
@@ -327,88 +326,65 @@
     data_p += stride;
     tmpdata_p += tmpstride;
   }
-
-  // Filter row-wise tile-by-tile
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-    if (rst->wiener_level[tile_idx] == 0) continue;
-    htile_idx = tile_idx % rst->nhtiles;
-    vtile_idx = tile_idx / rst->nhtiles;
-    h_start =
-        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
-                                           : (width - RESTORATION_HALFWIN);
-    v_start = vtile_idx * tile_height;
-    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                           : height;
-    data_p = data + h_start + v_start * stride;
-    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
-    for (i = 0; i < (v_end - v_start); ++i) {
-      for (j = 0; j < (h_end - h_start); ++j) {
-        *tmpdata_p++ = hor_sym_filter(data_p++, rst->hfilter[tile_idx]);
-      }
-      data_p += stride - (h_end - h_start);
-      tmpdata_p += tmpstride - (h_end - h_start);
-    }
+    loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, tmpdata,
+                            tmpstride);
   }
+}
 
-  // Filter column-wise tile-by-tile (bands of thickness RESTORATION_HALFWIN
-  // at top and bottom of tiles allow filtering overlap, and are not optimally
-  // filtered)
+static void loop_switchable_filter(uint8_t *data, int width, int height,
+                                   int stride, RestorationInternal *rst,
+                                   uint8_t *tmpdata, int tmpstride) {
+  int i, tile_idx;
+  uint8_t *data_p, *tmpdata_p;
+
+  // Initialize tmp buffer
+  data_p = data;
+  tmpdata_p = tmpdata;
+  for (i = 0; i < height; ++i) {
+    memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
+    data_p += stride;
+    tmpdata_p += tmpstride;
+  }
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-    if (rst->wiener_level[tile_idx] == 0) continue;
-    htile_idx = tile_idx % rst->nhtiles;
-    vtile_idx = tile_idx / rst->nhtiles;
-    h_start = htile_idx * tile_width;
-    h_end =
-        (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width) : width;
-    v_start =
-        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                           : (height - RESTORATION_HALFWIN);
-    data_p = data + h_start + v_start * stride;
-    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
-    for (i = 0; i < (v_end - v_start); ++i) {
-      for (j = 0; j < (h_end - h_start); ++j) {
-        *data_p++ =
-            ver_sym_filter(tmpdata_p++, tmpstride, rst->vfilter[tile_idx]);
-      }
-      data_p += stride - (h_end - h_start);
-      tmpdata_p += tmpstride - (h_end - h_start);
+    if (rst->rsi->restoration_type[tile_idx] == RESTORE_BILATERAL) {
+      loop_bilateral_filter_tile(data, tile_idx, width, height, stride, rst,
+                                 tmpdata, tmpstride);
+    } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
+      loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst,
+                              tmpdata, tmpstride);
     }
   }
 }
 
 #if CONFIG_AOM_HIGHBITDEPTH
-static void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height,
-                                         int stride, RestorationInternal *rst,
-                                         uint8_t *tmpdata8, int tmpstride,
-                                         int bit_depth) {
-  int i, j, tile_idx, htile_idx, vtile_idx;
+static void loop_bilateral_filter_tile_highbd(uint16_t *data, int tile_idx,
+                                              int width, int height, int stride,
+                                              RestorationInternal *rst,
+                                              uint16_t *tmpdata, int tmpstride,
+                                              int bit_depth) {
+  const int tile_width = rst->tile_width >> rst->subsampling_x;
+  const int tile_height = rst->tile_height >> rst->subsampling_y;
+  int i, j, subtile_idx;
   int h_start, h_end, v_start, v_end;
-  int tile_width, tile_height;
 
-  uint16_t *data = CONVERT_TO_SHORTPTR(data8);
-  uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
-
-  tile_width = rst->tile_width >> rst->subsampling_x;
-  tile_height = rst->tile_height >> rst->subsampling_y;
-
-  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+  for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
     uint16_t *data_p, *tmpdata_p;
-    const uint8_t *wr_lut_ = rst->wr_lut[tile_idx] + BILATERAL_AMP_RANGE;
+    const int level =
+        rst->rsi->bilateral_level[tile_idx * BILATERAL_SUBTILES + subtile_idx];
+    uint8_t(*wx_lut)[RESTORATION_WIN];
+    uint8_t *wr_lut_;
 
-    if (rst->bilateral_level[tile_idx] < 0) continue;
-
-    htile_idx = tile_idx % rst->nhtiles;
-    vtile_idx = tile_idx / rst->nhtiles;
-    h_start =
-        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
-                                           : (width - RESTORATION_HALFWIN);
-    v_start =
-        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                           : (height - RESTORATION_HALFWIN);
+    if (level < 0) continue;
+    wr_lut_ = (rst->keyframe ? bilateral_filter_coeffs_r_kf[level]
+                             : bilateral_filter_coeffs_r[level]) +
+              BILATERAL_AMP_RANGE;
+    wx_lut = rst->keyframe ? bilateral_filter_coeffs_s_kf[level]
+                           : bilateral_filter_coeffs_s[level];
+    av1_get_rest_tile_limits(tile_idx, subtile_idx, BILATERAL_SUBTILE_BITS,
+                             rst->nhtiles, rst->nvtiles, tile_width,
+                             tile_height, width, height, 1, 1, &h_start, &h_end,
+                             &v_start, &v_end);
 
     data_p = data + h_start + v_start * stride;
     tmpdata_p = tmpdata + h_start + v_start * tmpstride;
@@ -420,8 +396,7 @@
         uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
         for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
           for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
-            wt = (int)rst->wx_lut[tile_idx][y + RESTORATION_HALFWIN]
-                                 [x + RESTORATION_HALFWIN] *
+            wt = (int)wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
                  (int)wr_lut_[data_p2[x] - data_p[j]];
             wtsum += wt;
             flsum += wt * data_p2[x];
@@ -444,6 +419,20 @@
   }
 }
 
+static void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height,
+                                         int stride, RestorationInternal *rst,
+                                         uint8_t *tmpdata8, int tmpstride,
+                                         int bit_depth) {
+  int tile_idx;
+  uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+  uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    loop_bilateral_filter_tile_highbd(data, tile_idx, width, height, stride,
+                                      rst, tmpdata, tmpstride, bit_depth);
+  }
+}
+
 uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) {
   int32_t s =
       (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
@@ -462,20 +451,57 @@
   return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
 }
 
+static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
+                                           int width, int height, int stride,
+                                           RestorationInternal *rst,
+                                           uint16_t *tmpdata, int tmpstride,
+                                           int bit_depth) {
+  const int tile_width = rst->tile_width >> rst->subsampling_x;
+  const int tile_height = rst->tile_height >> rst->subsampling_y;
+  int h_start, h_end, v_start, v_end;
+  int i, j;
+  uint16_t *data_p, *tmpdata_p;
+
+  if (rst->rsi->wiener_level[tile_idx] == 0) return;
+  // Filter row-wise
+  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
+                           tile_width, tile_height, width, height, 1, 0,
+                           &h_start, &h_end, &v_start, &v_end);
+  data_p = data + h_start + v_start * stride;
+  tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+  for (i = 0; i < (v_end - v_start); ++i) {
+    for (j = 0; j < (h_end - h_start); ++j) {
+      *tmpdata_p++ = hor_sym_filter_highbd(
+          data_p++, rst->rsi->hfilter[tile_idx], bit_depth);
+    }
+    data_p += stride - (h_end - h_start);
+    tmpdata_p += tmpstride - (h_end - h_start);
+  }
+  // Filter col-wise
+  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
+                           tile_width, tile_height, width, height, 0, 1,
+                           &h_start, &h_end, &v_start, &v_end);
+  data_p = data + h_start + v_start * stride;
+  tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+  for (i = 0; i < (v_end - v_start); ++i) {
+    for (j = 0; j < (h_end - h_start); ++j) {
+      *data_p++ = ver_sym_filter_highbd(tmpdata_p++, tmpstride,
+                                        rst->rsi->vfilter[tile_idx], bit_depth);
+    }
+    data_p += stride - (h_end - h_start);
+    tmpdata_p += tmpstride - (h_end - h_start);
+  }
+}
+
 static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
                                       int stride, RestorationInternal *rst,
                                       uint8_t *tmpdata8, int tmpstride,
                                       int bit_depth) {
   uint16_t *data = CONVERT_TO_SHORTPTR(data8);
   uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
-  int i, j, tile_idx, htile_idx, vtile_idx;
-  int h_start, h_end, v_start, v_end;
-  int tile_width, tile_height;
+  int i, tile_idx;
   uint16_t *data_p, *tmpdata_p;
 
-  tile_width = rst->tile_width >> rst->subsampling_x;
-  tile_height = rst->tile_height >> rst->subsampling_y;
-
   // Initialize tmp buffer
   data_p = data;
   tmpdata_p = tmpdata;
@@ -484,54 +510,36 @@
     data_p += stride;
     tmpdata_p += tmpstride;
   }
-
-  // Filter row-wise tile-by-tile
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-    if (rst->wiener_level[tile_idx] == 0) continue;
-    htile_idx = tile_idx % rst->nhtiles;
-    vtile_idx = tile_idx / rst->nhtiles;
-    h_start =
-        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
-                                           : (width - RESTORATION_HALFWIN);
-    v_start = vtile_idx * tile_height;
-    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                           : height;
-    data_p = data + h_start + v_start * stride;
-    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
-    for (i = 0; i < (v_end - v_start); ++i) {
-      for (j = 0; j < (h_end - h_start); ++j) {
-        *tmpdata_p++ =
-            hor_sym_filter_highbd(data_p++, rst->hfilter[tile_idx], bit_depth);
-      }
-      data_p += stride - (h_end - h_start);
-      tmpdata_p += tmpstride - (h_end - h_start);
-    }
+    loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
+                                   tmpdata, tmpstride, bit_depth);
   }
+}
 
-  // Filter column-wise tile-by-tile (bands of thickness RESTORATION_HALFWIN
-  // at top and bottom of tiles allow filtering overlap, and are not optimally
-  // filtered)
+static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
+                                          int stride, RestorationInternal *rst,
+                                          uint8_t *tmpdata8, int tmpstride,
+                                          int bit_depth) {
+  uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+  uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+  int i, tile_idx;
+  uint16_t *data_p, *tmpdata_p;
+
+  // Initialize tmp buffer
+  data_p = data;
+  tmpdata_p = tmpdata;
+  for (i = 0; i < height; ++i) {
+    memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
+    data_p += stride;
+    tmpdata_p += tmpstride;
+  }
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-    if (rst->wiener_level[tile_idx] == 0) continue;
-    htile_idx = tile_idx % rst->nhtiles;
-    vtile_idx = tile_idx / rst->nhtiles;
-    h_start = htile_idx * tile_width;
-    h_end =
-        (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width) : width;
-    v_start =
-        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                           : (height - RESTORATION_HALFWIN);
-    data_p = data + h_start + v_start * stride;
-    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
-    for (i = 0; i < (v_end - v_start); ++i) {
-      for (j = 0; j < (h_end - h_start); ++j) {
-        *data_p++ = ver_sym_filter_highbd(tmpdata_p++, tmpstride,
-                                          rst->vfilter[tile_idx], bit_depth);
-      }
-      data_p += stride - (h_end - h_start);
-      tmpdata_p += tmpstride - (h_end - h_start);
+    if (rst->rsi->restoration_type[tile_idx] == RESTORE_BILATERAL) {
+      loop_bilateral_filter_tile_highbd(data, tile_idx, width, height, stride,
+                                        rst, tmpdata, tmpstride, bit_depth);
+    } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
+      loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
+                                     tmpdata, tmpstride, bit_depth);
     }
   }
 }
@@ -548,16 +556,23 @@
   int yend = end_mi_row << MI_SIZE_LOG2;
   int uvend = yend >> cm->subsampling_y;
   restore_func_type restore_func =
-      cm->rst_internal.restoration_type == RESTORE_BILATERAL
+      cm->rst_internal.rsi->frame_restoration_type == RESTORE_BILATERAL
           ? loop_bilateral_filter
-          : loop_wiener_filter;
+          : (cm->rst_internal.rsi->frame_restoration_type == RESTORE_WIENER
+                 ? loop_wiener_filter
+                 : loop_switchable_filter);
 #if CONFIG_AOM_HIGHBITDEPTH
   restore_func_highbd_type restore_func_highbd =
-      cm->rst_internal.restoration_type == RESTORE_BILATERAL
+      cm->rst_internal.rsi->frame_restoration_type == RESTORE_BILATERAL
           ? loop_bilateral_filter_highbd
-          : loop_wiener_filter_highbd;
+          : (cm->rst_internal.rsi->frame_restoration_type == RESTORE_WIENER
+                 ? loop_wiener_filter_highbd
+                 : loop_switchable_filter_highbd);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
   YV12_BUFFER_CONFIG tmp_buf;
+
+  if (cm->rst_internal.rsi->frame_restoration_type == RESTORE_NONE) return;
+
   memset(&tmp_buf, 0, sizeof(YV12_BUFFER_CONFIG));
 
   yend = AOMMIN(yend, cm->height);
@@ -612,25 +627,13 @@
 #endif  // CONFIG_AOM_HIGHBITDEPTH
   }
   aom_free_frame_buffer(&tmp_buf);
-  if (cm->rst_internal.restoration_type == RESTORE_BILATERAL) {
-    free(cm->rst_internal.wr_lut);
-    cm->rst_internal.wr_lut = NULL;
-    free(cm->rst_internal.wx_lut);
-    cm->rst_internal.wx_lut = NULL;
-  }
-  if (cm->rst_internal.restoration_type == RESTORE_WIENER) {
-    free(cm->rst_internal.vfilter);
-    cm->rst_internal.vfilter = NULL;
-    free(cm->rst_internal.hfilter);
-    cm->rst_internal.hfilter = NULL;
-  }
 }
 
 void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                                 RestorationInfo *rsi, int y_only,
                                 int partial_frame) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
-  if (rsi->restoration_type != RESTORE_NONE) {
+  if (rsi->frame_restoration_type != RESTORE_NONE) {
     start_mi_row = 0;
     mi_rows_to_filter = cm->mi_rows;
     if (partial_frame && cm->mi_rows > 8) {
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index d8a312d..3d4802f 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -26,9 +26,10 @@
 #define BILATERAL_LEVELS (1 << BILATERAL_LEVEL_BITS)
 // #define DEF_BILATERAL_LEVEL     2
 
-#define RESTORATION_TILESIZES 3
-#define BILATERAL_TILESIZE 1
-#define WIENER_TILESIZE 2
+#define RESTORATION_TILESIZE_SML 128
+#define RESTORATION_TILESIZE_BIG 256
+#define BILATERAL_SUBTILE_BITS 1
+#define BILATERAL_SUBTILES (1 << (2 * BILATERAL_SUBTILE_BITS))
 
 #define RESTORATION_HALFWIN 3
 #define RESTORATION_HALFWIN1 (RESTORATION_HALFWIN + 1)
@@ -56,43 +57,84 @@
 #define WIENER_FILT_TAP2_MAXV \
   (WIENER_FILT_TAP2_MINV - 1 + (1 << WIENER_FILT_TAP2_BITS))
 
-typedef enum {
-  RESTORE_NONE,
-  RESTORE_BILATERAL,
-  RESTORE_WIENER,
-} RestorationType;
-
 typedef struct {
-  RestorationType restoration_type;
+  RestorationType frame_restoration_type;
+  RestorationType *restoration_type;
   // Bilateral filter
   int *bilateral_level;
   // Wiener filter
   int *wiener_level;
-  int (*vfilter)[RESTORATION_HALFWIN], (*hfilter)[RESTORATION_HALFWIN];
-} RestorationInfo;
-
-typedef struct {
-  RestorationType restoration_type;
-  int subsampling_x;
-  int subsampling_y;
-  int tilesize_index;
-  int ntiles;
-  int tile_width, tile_height;
-  int nhtiles, nvtiles;
-  // Bilateral filter
-  int *bilateral_level;
-  uint8_t (**wx_lut)[RESTORATION_WIN];
-  uint8_t **wr_lut;
-  // Wiener filter
-  int *wiener_level;
   int (*vfilter)[RESTORATION_WIN], (*hfilter)[RESTORATION_WIN];
+} RestorationInfo;
+
+typedef struct {
+  RestorationInfo *rsi;
+  int keyframe;
+  int subsampling_x;
+  int subsampling_y;
+  int ntiles;
+  int tile_width, tile_height;
+  int nhtiles, nvtiles;
 } RestorationInternal;
 
+static INLINE int get_rest_tilesize(int width, int height) {
+  if (width * height <= 352 * 288)
+    return RESTORATION_TILESIZE_SML;
+  else
+    return RESTORATION_TILESIZE_BIG;
+}
+
+static INLINE int av1_get_rest_ntiles(int width, int height,
+                                      int *tile_width, int *tile_height,
+                                      int *nhtiles, int *nvtiles) {
+  int nhtiles_, nvtiles_;
+  int tile_width_, tile_height_;
+  int tilesize = get_rest_tilesize(width, height);
+  tile_width_ = (tilesize < 0) ? width : AOMMIN(tilesize, width);
+  tile_height_ = (tilesize < 0) ? height : AOMMIN(tilesize, height);
+  nhtiles_ = (width + (tile_width_ >> 1)) / tile_width_;
+  nvtiles_ = (height + (tile_height_ >> 1)) / tile_height_;
+  if (tile_width) *tile_width = tile_width_;
+  if (tile_height) *tile_height = tile_height_;
+  if (nhtiles) *nhtiles = nhtiles_;
+  if (nvtiles) *nvtiles = nvtiles_;
+  return (nhtiles_ * nvtiles_);
+}
+
+static INLINE void av1_get_rest_tile_limits(
+    int tile_idx, int subtile_idx, int subtile_bits, int nhtiles, int nvtiles,
+    int tile_width, int tile_height, int im_width, int im_height, int clamp_h,
+    int clamp_v, int *h_start, int *h_end, int *v_start, int *v_end) {
+  const int htile_idx = tile_idx % nhtiles;
+  const int vtile_idx = tile_idx / nhtiles;
+  *h_start = htile_idx * tile_width;
+  *v_start = vtile_idx * tile_height;
+  *h_end = (htile_idx < nhtiles - 1) ? *h_start + tile_width : im_width;
+  *v_end = (vtile_idx < nvtiles - 1) ? *v_start + tile_height : im_height;
+  if (subtile_bits) {
+    const int num_subtiles_1d = (1 << subtile_bits);
+    const int subtile_width = (*h_end - *h_start) >> subtile_bits;
+    const int subtile_height = (*v_end - *v_start) >> subtile_bits;
+    const int subtile_idx_h = subtile_idx & (num_subtiles_1d - 1);
+    const int subtile_idx_v = subtile_idx >> subtile_bits;
+    *h_start += subtile_idx_h * subtile_width;
+    *v_start += subtile_idx_v * subtile_height;
+    *h_end = subtile_idx_h == num_subtiles_1d - 1 ? *h_end
+                                                  : *h_start + subtile_width;
+    *v_end = subtile_idx_v == num_subtiles_1d - 1 ? *v_end
+                                                  : *v_start + subtile_height;
+  }
+  if (clamp_h) {
+    *h_start = AOMMAX(*h_start, RESTORATION_HALFWIN);
+    *h_end = AOMMIN(*h_end, im_width - RESTORATION_HALFWIN);
+  }
+  if (clamp_v) {
+    *v_start = AOMMAX(*v_start, RESTORATION_HALFWIN);
+    *v_end = AOMMIN(*v_end, im_height - RESTORATION_HALFWIN);
+  }
+}
+
 int av1_bilateral_level_bits(const struct AV1Common *const cm);
-int av1_get_restoration_ntiles(int tilesize, int width, int height);
-void av1_get_restoration_tile_size(int tilesize, int width, int height,
-                                   int *tile_width, int *tile_height,
-                                   int *nhtiles, int *nvtiles);
 void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
                                int kf, int width, int height);
 void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 2e550cb..167cb66 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -16,20 +16,20 @@
 
 #include "av1/common/warped_motion.h"
 
-static ProjectPointsType get_project_points_type(TransformationType type) {
+static ProjectPointsFunc get_project_points_type(TransformationType type) {
   switch (type) {
-    case HOMOGRAPHY: return projectPointsHomography;
-    case AFFINE: return projectPointsAffine;
-    case ROTZOOM: return projectPointsRotZoom;
-    case TRANSLATION: return projectPointsTranslation;
+    case HOMOGRAPHY: return project_points_homography;
+    case AFFINE: return project_points_affine;
+    case ROTZOOM: return project_points_rotzoom;
+    case TRANSLATION: return project_points_translation;
     default: assert(0); return NULL;
   }
 }
 
-void projectPointsTranslation(int16_t *mat, int *points, int *proj, const int n,
-                              const int stride_points, const int stride_proj,
-                              const int subsampling_x,
-                              const int subsampling_y) {
+void project_points_translation(int16_t *mat, int *points, int *proj,
+                                const int n, const int stride_points,
+                                const int stride_proj, const int subsampling_x,
+                                const int subsampling_y) {
   int i;
   for (i = 0; i < n; ++i) {
     const int x = *(points++), y = *(points++);
@@ -52,9 +52,9 @@
   }
 }
 
-void projectPointsRotZoom(int16_t *mat, int *points, int *proj, const int n,
-                          const int stride_points, const int stride_proj,
-                          const int subsampling_x, const int subsampling_y) {
+void project_points_rotzoom(int16_t *mat, int *points, int *proj, const int n,
+                            const int stride_points, const int stride_proj,
+                            const int subsampling_x, const int subsampling_y) {
   int i;
   for (i = 0; i < n; ++i) {
     const int x = *(points++), y = *(points++);
@@ -79,9 +79,9 @@
   }
 }
 
-void projectPointsAffine(int16_t *mat, int *points, int *proj, const int n,
-                         const int stride_points, const int stride_proj,
-                         const int subsampling_x, const int subsampling_y) {
+void project_points_affine(int16_t *mat, int *points, int *proj, const int n,
+                           const int stride_points, const int stride_proj,
+                           const int subsampling_x, const int subsampling_y) {
   int i;
   for (i = 0; i < n; ++i) {
     const int x = *(points++), y = *(points++);
@@ -106,9 +106,10 @@
   }
 }
 
-void projectPointsHomography(int16_t *mat, int *points, int *proj, const int n,
-                             const int stride_points, const int stride_proj,
-                             const int subsampling_x, const int subsampling_y) {
+void project_points_homography(int16_t *mat, int *points, int *proj,
+                               const int n, const int stride_points,
+                               const int stride_proj, const int subsampling_x,
+                               const int subsampling_y) {
   int i;
   int64_t x, y, Z;
   int64_t xp, yp;
@@ -226,24 +227,6 @@
   }
 }
 
-/*
-static int32_t do_linear_filter(int32_t *p, int x) {
-  int32_t sum = 0;
-  sum = p[0] * (WARPEDPIXEL_PREC_SHIFTS - x) + p[1] * x;
-  sum <<= (WARPEDPIXEL_FILTER_BITS - WARPEDPIXEL_PREC_BITS);
-  return sum;
-}
-
-static int32_t do_4tap_filter(int32_t *p, int x) {
-  int i;
-  int32_t sum = 0;
-  for (i = 0; i < 4; ++i) {
-    sum += p[i - 1] * filter_4tap[x][i];
-  }
-  return sum;
-}
-*/
-
 static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
                                  int stride, int x, int y_start) {
   int i;
@@ -473,7 +456,7 @@
                                    int subsampling_x, int subsampling_y,
                                    int x_scale, int y_scale, int bd) {
   int i, j;
-  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
   int gm_err = 0, no_gm_err = 0;
@@ -506,7 +489,7 @@
                               int subsampling_y, int x_scale, int y_scale,
                               int bd) {
   int i, j;
-  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
   if (projectpoints == NULL) return;
@@ -534,7 +517,7 @@
   int gm_err = 0, no_gm_err = 0;
   int gm_sumerr = 0, no_gm_sumerr = 0;
   int i, j;
-  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
   for (i = p_row; i < p_row + p_height; ++i) {
     for (j = p_col; j < p_col + p_width; ++j) {
       int in[2], out[2];
@@ -561,7 +544,7 @@
                        int subsampling_x, int subsampling_y, int x_scale,
                        int y_scale) {
   int i, j;
-  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
   if (projectpoints == NULL) return;
   for (i = p_row; i < p_row + p_height; ++i) {
     for (j = p_col; j < p_col + p_width; ++j) {
@@ -591,11 +574,10 @@
     return highbd_warp_erroradv(
         wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
         p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
-  else
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-    return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
-                         p_width, p_height, p_stride, subsampling_x,
-                         subsampling_y, x_scale, y_scale);
+  return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
+                       p_width, p_height, p_stride, subsampling_x,
+                       subsampling_y, x_scale, y_scale);
 }
 
 void av1_warp_plane(WarpedMotionParams *wm,
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index d7e8a22..53f06dd 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef AV1_COMMON_WARPED_MOTION_H
-#define AV1_COMMON_WARPED_MOTION_H
+#ifndef AV1_COMMON_WARPED_MOTION_H_
+#define AV1_COMMON_WARPED_MOTION_H_
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -22,43 +22,29 @@
 #include "aom_dsp/aom_dsp_common.h"
 #include "av1/common/mv.h"
 
-// Bits of precision used for the model
-#define WARPEDMODEL_PREC_BITS 8
-#define WARPEDMODEL_ROW3HOMO_PREC_BITS 12
-
-// Bits of subpel precision for warped interpolation
-#define WARPEDPIXEL_PREC_BITS 6
-#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS)
-
-// Taps for ntap filter
-#define WARPEDPIXEL_FILTER_TAPS 6
-
-// Precision of filter taps
-#define WARPEDPIXEL_FILTER_BITS 7
-
-#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
-
-typedef void (*ProjectPointsType)(int16_t *mat, int *points, int *proj,
+typedef void (*ProjectPointsFunc)(int16_t *mat, int *points, int *proj,
                                   const int n, const int stride_points,
                                   const int stride_proj,
                                   const int subsampling_x,
                                   const int subsampling_y);
 
-void projectPointsHomography(int16_t *mat, int *points, int *proj, const int n,
-                             const int stride_points, const int stride_proj,
-                             const int subsampling_x, const int subsampling_y);
+void project_points_translation(int16_t *mat, int *points, int *proj,
+                                const int n, const int stride_points,
+                                const int stride_proj, const int subsampling_x,
+                                const int subsampling_y);
 
-void projectPointsAffine(int16_t *mat, int *points, int *proj, const int n,
-                         const int stride_points, const int stride_proj,
-                         const int subsampling_x, const int subsampling_y);
+void project_points_rotzoom(int16_t *mat, int *points, int *proj, const int n,
+                            const int stride_points, const int stride_proj,
+                            const int subsampling_x, const int subsampling_y);
 
-void projectPointsRotZoom(int16_t *mat, int *points, int *proj, const int n,
-                          const int stride_points, const int stride_proj,
-                          const int subsampling_x, const int subsampling_y);
+void project_points_affine(int16_t *mat, int *points, int *proj, const int n,
+                           const int stride_points, const int stride_proj,
+                           const int subsampling_x, const int subsampling_y);
 
-void projectPointsTranslation(int16_t *mat, int *points, int *proj, const int n,
-                              const int stride_points, const int stride_proj,
-                              const int subsampling_x, const int subsampling_y);
+void project_points_homography(int16_t *mat, int *points, int *proj,
+                               const int n, const int stride_points,
+                               const int stride_proj, const int subsampling_x,
+                               const int subsampling_y);
 
 double av1_warp_erroradv(WarpedMotionParams *wm,
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -81,4 +67,4 @@
 // Integerize model into the WarpedMotionParams structure
 void av1_integerize_model(const double *model, TransformationType wmtype,
                           WarpedMotionParams *wm);
-#endif  // AV1_COMMON_WARPED_MOTION_H
+#endif  // AV1_COMMON_WARPED_MOTION_H_
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index c3a0a83..b4c3525 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1899,62 +1899,134 @@
 }
 
 #if CONFIG_LOOP_RESTORATION
-static void setup_restoration(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static void decode_restoration_mode(AV1_COMMON *cm,
+                                    struct aom_read_bit_buffer *rb) {
+  RestorationInfo *rsi = &cm->rst_info;
+  if (aom_rb_read_bit(rb)) {
+    rsi->frame_restoration_type =
+        aom_rb_read_bit(rb) ? RESTORE_WIENER : RESTORE_BILATERAL;
+  } else {
+    rsi->frame_restoration_type =
+        aom_rb_read_bit(rb) ? RESTORE_SWITCHABLE : RESTORE_NONE;
+  }
+}
+
+static void decode_restoration(AV1_COMMON *cm, aom_reader *rb) {
   int i;
   RestorationInfo *rsi = &cm->rst_info;
-  int ntiles;
-  if (aom_rb_read_bit(rb)) {
-    if (aom_rb_read_bit(rb)) {
-      rsi->restoration_type = RESTORE_BILATERAL;
-      ntiles =
-          av1_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
+  const int ntiles = av1_get_rest_ntiles(cm->width, cm->height,
+                                         NULL, NULL, NULL, NULL);
+  if (rsi->frame_restoration_type != RESTORE_NONE) {
+    rsi->restoration_type = (RestorationType *)aom_realloc(
+        rsi->restoration_type, sizeof(*rsi->restoration_type) * ntiles);
+    if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
       rsi->bilateral_level = (int *)aom_realloc(
-          rsi->bilateral_level, sizeof(*rsi->bilateral_level) * ntiles);
+          rsi->bilateral_level,
+          sizeof(*rsi->bilateral_level) * ntiles * BILATERAL_SUBTILES);
       assert(rsi->bilateral_level != NULL);
-      for (i = 0; i < ntiles; ++i) {
-        if (aom_rb_read_bit(rb)) {
-          rsi->bilateral_level[i] =
-              aom_rb_read_literal(rb, av1_bilateral_level_bits(cm));
-        } else {
-          rsi->bilateral_level[i] = -1;
-        }
-      }
-    } else {
-      rsi->restoration_type = RESTORE_WIENER;
-      ntiles =
-          av1_get_restoration_ntiles(WIENER_TILESIZE, cm->width, cm->height);
       rsi->wiener_level = (int *)aom_realloc(
           rsi->wiener_level, sizeof(*rsi->wiener_level) * ntiles);
       assert(rsi->wiener_level != NULL);
-      rsi->vfilter = (int(*)[RESTORATION_HALFWIN])aom_realloc(
+      rsi->vfilter = (int(*)[RESTORATION_WIN])aom_realloc(
           rsi->vfilter, sizeof(*rsi->vfilter) * ntiles);
       assert(rsi->vfilter != NULL);
-      rsi->hfilter = (int(*)[RESTORATION_HALFWIN])aom_realloc(
+      rsi->hfilter = (int(*)[RESTORATION_WIN])aom_realloc(
           rsi->hfilter, sizeof(*rsi->hfilter) * ntiles);
       assert(rsi->hfilter != NULL);
       for (i = 0; i < ntiles; ++i) {
-        rsi->wiener_level[i] = aom_rb_read_bit(rb);
-        if (rsi->wiener_level[i]) {
-          rsi->vfilter[i][0] = aom_rb_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+        rsi->restoration_type[i] = aom_read_tree(
+            rb, av1_switchable_restore_tree, cm->fc->switchable_restore_prob);
+        if (rsi->restoration_type[i] == RESTORE_WIENER) {
+          rsi->wiener_level[i] = 1;
+          rsi->vfilter[i][0] =
+              aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+              WIENER_FILT_TAP0_MINV;
+          rsi->vfilter[i][1] =
+              aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+              WIENER_FILT_TAP1_MINV;
+          rsi->vfilter[i][2] =
+              aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+              WIENER_FILT_TAP2_MINV;
+          rsi->hfilter[i][0] =
+              aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+              WIENER_FILT_TAP0_MINV;
+          rsi->hfilter[i][1] =
+              aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+              WIENER_FILT_TAP1_MINV;
+          rsi->hfilter[i][2] =
+              aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+              WIENER_FILT_TAP2_MINV;
+        } else if (rsi->restoration_type[i] == RESTORE_BILATERAL) {
+          int s;
+          for (s = 0; s < BILATERAL_SUBTILES; ++s) {
+            const int j = i * BILATERAL_SUBTILES + s;
+#if BILATERAL_SUBTILES == 0
+            rsi->bilateral_level[j] =
+                aom_read_literal(rb, av1_bilateral_level_bits(cm));
+#else
+            if (aom_read(rb, RESTORE_NONE_BILATERAL_PROB)) {
+              rsi->bilateral_level[j] =
+                  aom_read_literal(rb, av1_bilateral_level_bits(cm));
+            } else {
+              rsi->bilateral_level[j] = -1;
+            }
+#endif
+          }
+        }
+      }
+    } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
+      rsi->wiener_level = (int *)aom_realloc(
+          rsi->wiener_level, sizeof(*rsi->wiener_level) * ntiles);
+      assert(rsi->wiener_level != NULL);
+      rsi->vfilter = (int(*)[RESTORATION_WIN])aom_realloc(
+          rsi->vfilter, sizeof(*rsi->vfilter) * ntiles);
+      assert(rsi->vfilter != NULL);
+      rsi->hfilter = (int(*)[RESTORATION_WIN])aom_realloc(
+          rsi->hfilter, sizeof(*rsi->hfilter) * ntiles);
+      assert(rsi->hfilter != NULL);
+      for (i = 0; i < ntiles; ++i) {
+        if (aom_read(rb, RESTORE_NONE_WIENER_PROB)) {
+          rsi->wiener_level[i] = 1;
+          rsi->restoration_type[i] = RESTORE_WIENER;
+          rsi->vfilter[i][0] = aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
                                WIENER_FILT_TAP0_MINV;
-          rsi->vfilter[i][1] = aom_rb_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+          rsi->vfilter[i][1] = aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
                                WIENER_FILT_TAP1_MINV;
-          rsi->vfilter[i][2] = aom_rb_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+          rsi->vfilter[i][2] = aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
                                WIENER_FILT_TAP2_MINV;
-          rsi->hfilter[i][0] = aom_rb_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+          rsi->hfilter[i][0] = aom_read_literal(rb, WIENER_FILT_TAP0_BITS) +
                                WIENER_FILT_TAP0_MINV;
-          rsi->hfilter[i][1] = aom_rb_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+          rsi->hfilter[i][1] = aom_read_literal(rb, WIENER_FILT_TAP1_BITS) +
                                WIENER_FILT_TAP1_MINV;
-          rsi->hfilter[i][2] = aom_rb_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+          rsi->hfilter[i][2] = aom_read_literal(rb, WIENER_FILT_TAP2_BITS) +
                                WIENER_FILT_TAP2_MINV;
         } else {
-          rsi->vfilter[i][0] = rsi->vfilter[i][1] = rsi->vfilter[i][2] = 0;
-          rsi->hfilter[i][0] = rsi->hfilter[i][1] = rsi->hfilter[i][2] = 0;
+          rsi->wiener_level[i] = 0;
+          rsi->restoration_type[i] = RESTORE_NONE;
+        }
+      }
+    } else {
+      rsi->frame_restoration_type = RESTORE_BILATERAL;
+      rsi->bilateral_level = (int *)aom_realloc(
+          rsi->bilateral_level,
+          sizeof(*rsi->bilateral_level) * ntiles * BILATERAL_SUBTILES);
+      assert(rsi->bilateral_level != NULL);
+      for (i = 0; i < ntiles; ++i) {
+        int s;
+        rsi->restoration_type[i] = RESTORE_BILATERAL;
+        for (s = 0; s < BILATERAL_SUBTILES; ++s) {
+          const int j = i * BILATERAL_SUBTILES + s;
+          if (aom_read(rb, RESTORE_NONE_BILATERAL_PROB)) {
+            rsi->bilateral_level[j] =
+                aom_read_literal(rb, av1_bilateral_level_bits(cm));
+          } else {
+            rsi->bilateral_level[j] = -1;
+          }
         }
       }
     }
   } else {
-    rsi->restoration_type = RESTORE_NONE;
+    rsi->frame_restoration_type = RESTORE_NONE;
   }
 }
 #endif  // CONFIG_LOOP_RESTORATION
@@ -3286,7 +3358,7 @@
   setup_dering(cm, rb);
 #endif
 #if CONFIG_LOOP_RESTORATION
-  setup_restoration(cm, rb);
+  decode_restoration_mode(cm, rb);
 #endif  // CONFIG_LOOP_RESTORATION
   setup_quantization(cm, rb);
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -3468,6 +3540,10 @@
                        "Failed to allocate compressed header ANS decoder");
 #endif  // !CONFIG_ANS
 
+#if CONFIG_LOOP_RESTORATION
+  decode_restoration(cm, &r);
+#endif
+
   if (cm->tx_mode == TX_MODE_SELECT) {
     for (i = 0; i < TX_SIZES - 1; ++i)
       for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 0f2daa1..9c01ca8 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -150,6 +150,9 @@
 #if CONFIG_OBMC || CONFIG_WARPED_MOTION
 static struct av1_token motvar_encodings[MOTION_VARIATIONS];
 #endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+#if CONFIG_LOOP_RESTORATION
+static struct av1_token switchable_restore_encodings[RESTORE_SWITCHABLE_TYPES];
+#endif  // CONFIG_LOOP_RESTORATION
 
 void av1_encode_token_init(void) {
 #if CONFIG_EXT_TX
@@ -176,6 +179,10 @@
   av1_tokens_from_tree(global_motion_types_encodings,
                        av1_global_motion_types_tree);
 #endif  // CONFIG_GLOBAL_MOTION
+#if CONFIG_LOOP_RESTORATION
+  av1_tokens_from_tree(switchable_restore_encodings,
+                       av1_switchable_restore_tree);
+#endif  // CONFIG_LOOP_RESTORATION
 }
 
 static void write_intra_mode(aom_writer *w, PREDICTION_MODE mode,
@@ -2422,42 +2429,102 @@
 }
 
 #if CONFIG_LOOP_RESTORATION
-static void encode_restoration(AV1_COMMON *cm,
-                               struct aom_write_bit_buffer *wb) {
+static void encode_restoration_mode(AV1_COMMON *cm,
+                                    struct aom_write_bit_buffer *wb) {
+  RestorationInfo *rst = &cm->rst_info;
+  switch (rst->frame_restoration_type) {
+    case RESTORE_NONE:
+      aom_wb_write_bit(wb, 0);
+      aom_wb_write_bit(wb, 0);
+      break;
+    case RESTORE_SWITCHABLE:
+      aom_wb_write_bit(wb, 0);
+      aom_wb_write_bit(wb, 1);
+      break;
+    case RESTORE_BILATERAL:
+      aom_wb_write_bit(wb, 1);
+      aom_wb_write_bit(wb, 0);
+      break;
+    case RESTORE_WIENER:
+      aom_wb_write_bit(wb, 1);
+      aom_wb_write_bit(wb, 1);
+      break;
+    default: assert(0);
+  }
+}
+
+static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) {
   int i;
   RestorationInfo *rst = &cm->rst_info;
-  aom_wb_write_bit(wb, rst->restoration_type != RESTORE_NONE);
-  if (rst->restoration_type != RESTORE_NONE) {
-    if (rst->restoration_type == RESTORE_BILATERAL) {
-      aom_wb_write_bit(wb, 1);
+  if (rst->frame_restoration_type != RESTORE_NONE) {
+    if (rst->frame_restoration_type == RESTORE_SWITCHABLE) {
+      // RESTORE_SWITCHABLE
       for (i = 0; i < cm->rst_internal.ntiles; ++i) {
-        if (rst->bilateral_level[i] >= 0) {
-          aom_wb_write_bit(wb, 1);
-          aom_wb_write_literal(wb, rst->bilateral_level[i],
-                               av1_bilateral_level_bits(cm));
+        av1_write_token(
+            wb, av1_switchable_restore_tree,
+            cm->fc->switchable_restore_prob,
+            &switchable_restore_encodings[rst->restoration_type[i]]);
+        if (rst->restoration_type[i] == RESTORE_NONE) {
+        } else if (rst->restoration_type[i] == RESTORE_BILATERAL) {
+          int s;
+          for (s = 0; s < BILATERAL_SUBTILES; ++s) {
+            const int j = i * BILATERAL_SUBTILES + s;
+#if BILATERAL_SUBTILES == 0
+            aom_write_literal(wb, rst->bilateral_level[j],
+                              av1_bilateral_level_bits(cm));
+#else
+            aom_write(wb, rst->bilateral_level[j] >= 0,
+                      RESTORE_NONE_BILATERAL_PROB);
+            if (rst->bilateral_level[j] >= 0) {
+              aom_write_literal(wb, rst->bilateral_level[j],
+                                av1_bilateral_level_bits(cm));
+            }
+#endif
+          }
         } else {
-          aom_wb_write_bit(wb, 0);
+          aom_write_literal(wb, rst->vfilter[i][0] - WIENER_FILT_TAP0_MINV,
+                            WIENER_FILT_TAP0_BITS);
+          aom_write_literal(wb, rst->vfilter[i][1] - WIENER_FILT_TAP1_MINV,
+                            WIENER_FILT_TAP1_BITS);
+          aom_write_literal(wb, rst->vfilter[i][2] - WIENER_FILT_TAP2_MINV,
+                               WIENER_FILT_TAP2_BITS);
+          aom_write_literal(wb, rst->hfilter[i][0] - WIENER_FILT_TAP0_MINV,
+                               WIENER_FILT_TAP0_BITS);
+          aom_write_literal(wb, rst->hfilter[i][1] - WIENER_FILT_TAP1_MINV,
+                               WIENER_FILT_TAP1_BITS);
+          aom_write_literal(wb, rst->hfilter[i][2] - WIENER_FILT_TAP2_MINV,
+                               WIENER_FILT_TAP2_BITS);
         }
       }
-    } else {
-      aom_wb_write_bit(wb, 0);
+    } else if (rst->frame_restoration_type == RESTORE_BILATERAL) {
       for (i = 0; i < cm->rst_internal.ntiles; ++i) {
+        int s;
+        for (s = 0; s < BILATERAL_SUBTILES; ++s) {
+          const int j = i * BILATERAL_SUBTILES + s;
+          aom_write(wb, rst->bilateral_level[j] >= 0,
+                    RESTORE_NONE_BILATERAL_PROB);
+          if (rst->bilateral_level[j] >= 0) {
+            aom_write_literal(wb, rst->bilateral_level[j],
+                                 av1_bilateral_level_bits(cm));
+          }
+        }
+      }
+    } else if (rst->frame_restoration_type == RESTORE_WIENER) {
+      for (i = 0; i < cm->rst_internal.ntiles; ++i) {
+        aom_write(wb, rst->wiener_level[i] != 0, RESTORE_NONE_WIENER_PROB);
         if (rst->wiener_level[i]) {
-          aom_wb_write_bit(wb, 1);
-          aom_wb_write_literal(wb, rst->vfilter[i][0] - WIENER_FILT_TAP0_MINV,
-                               WIENER_FILT_TAP0_BITS);
-          aom_wb_write_literal(wb, rst->vfilter[i][1] - WIENER_FILT_TAP1_MINV,
-                               WIENER_FILT_TAP1_BITS);
-          aom_wb_write_literal(wb, rst->vfilter[i][2] - WIENER_FILT_TAP2_MINV,
-                               WIENER_FILT_TAP2_BITS);
-          aom_wb_write_literal(wb, rst->hfilter[i][0] - WIENER_FILT_TAP0_MINV,
-                               WIENER_FILT_TAP0_BITS);
-          aom_wb_write_literal(wb, rst->hfilter[i][1] - WIENER_FILT_TAP1_MINV,
-                               WIENER_FILT_TAP1_BITS);
-          aom_wb_write_literal(wb, rst->hfilter[i][2] - WIENER_FILT_TAP2_MINV,
-                               WIENER_FILT_TAP2_BITS);
-        } else {
-          aom_wb_write_bit(wb, 0);
+          aom_write_literal(wb, rst->vfilter[i][0] - WIENER_FILT_TAP0_MINV,
+                            WIENER_FILT_TAP0_BITS);
+          aom_write_literal(wb, rst->vfilter[i][1] - WIENER_FILT_TAP1_MINV,
+                            WIENER_FILT_TAP1_BITS);
+          aom_write_literal(wb, rst->vfilter[i][2] - WIENER_FILT_TAP2_MINV,
+                            WIENER_FILT_TAP2_BITS);
+          aom_write_literal(wb, rst->hfilter[i][0] - WIENER_FILT_TAP0_MINV,
+                            WIENER_FILT_TAP0_BITS);
+          aom_write_literal(wb, rst->hfilter[i][1] - WIENER_FILT_TAP1_MINV,
+                            WIENER_FILT_TAP1_BITS);
+          aom_write_literal(wb, rst->hfilter[i][2] - WIENER_FILT_TAP2_MINV,
+                            WIENER_FILT_TAP2_BITS);
         }
       }
     }
@@ -3185,7 +3252,7 @@
   encode_dering(cm->dering_level, wb);
 #endif  // CONFIG_DERING
 #if CONFIG_LOOP_RESTORATION
-  encode_restoration(cm, wb);
+  encode_restoration_mode(cm, wb);
 #endif  // CONFIG_LOOP_RESTORATION
   encode_quantization(cm, wb);
   encode_segmentation(cm, xd, wb);
@@ -3284,6 +3351,11 @@
   header_bc = &real_header_bc;
   aom_start_encode(header_bc, data);
 #endif
+
+#if CONFIG_LOOP_RESTORATION
+  encode_restoration(cm, header_bc);
+#endif  // CONFIG_LOOP_RESTORATION
+
   update_txfm_probs(cm, header_bc, counts);
   update_coef_probs(cpi, header_bc);
 
diff --git a/av1/encoder/corner_detect.c b/av1/encoder/corner_detect.c
index a0500e3..44747d3 100644
--- a/av1/encoder/corner_detect.c
+++ b/av1/encoder/corner_detect.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -14,22 +14,23 @@
 #include <math.h>
 #include <assert.h>
 
-#include "av1/encoder/corner_detect.h"
 #include "third_party/fastfeat/fast.h"
 
+#include "av1/encoder/corner_detect.h"
+
 // Fast_9 wrapper
 #define FAST_BARRIER 40
-int FastCornerDetect(unsigned char *buf, int width, int height, int stride,
-                     int *points, int max_points) {
+int fast_corner_detect(unsigned char *buf, int width, int height, int stride,
+                       int *points, int max_points) {
   int num_points;
-  xy *frm_corners_xy = fast9_detect_nonmax(buf, width, height, stride,
-                                           FAST_BARRIER, &num_points);
+  xy *const frm_corners_xy = fast9_detect_nonmax(buf, width, height, stride,
+                                                 FAST_BARRIER, &num_points);
   num_points = (num_points <= max_points ? num_points : max_points);
   if (num_points > 0 && frm_corners_xy) {
-    memcpy(points, frm_corners_xy, sizeof(xy) * num_points);
+    memcpy(points, frm_corners_xy, sizeof(*frm_corners_xy) * num_points);
     free(frm_corners_xy);
     return num_points;
-  } else {
-    return 0;
   }
+  free(frm_corners_xy);
+  return 0;
 }
diff --git a/av1/encoder/corner_detect.h b/av1/encoder/corner_detect.h
index f658a6b..257f1db 100644
--- a/av1/encoder/corner_detect.h
+++ b/av1/encoder/corner_detect.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -15,7 +15,7 @@
 #include <stdlib.h>
 #include <memory.h>
 
-int FastCornerDetect(unsigned char *buf, int width, int height, int stride,
-                     int *points, int max_points);
+int fast_corner_detect(unsigned char *buf, int width, int height, int stride,
+                       int *points, int max_points);
 
-#endif  // AV1_ENCODER_CORNER_DETECT_H
+#endif  // AV1_ENCODER_CORNER_DETECT_H_
diff --git a/av1/encoder/corner_match.c b/av1/encoder/corner_match.c
index 02e8212..2cb282b 100644
--- a/av1/encoder/corner_match.c
+++ b/av1/encoder/corner_match.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -76,7 +76,7 @@
 static void improve_correspondence(unsigned char *frm, unsigned char *ref,
                                    int width, int height, int frm_stride,
                                    int ref_stride,
-                                   correspondence *correspondences,
+                                   Correspondence *correspondences,
                                    int num_correspondences) {
   int i;
   for (i = 0; i < num_correspondences; ++i) {
@@ -161,7 +161,7 @@
                              double *correspondence_pts) {
   // TODO(sarahparker) Improve this to include 2-way match
   int i, j;
-  correspondence *correspondences = (correspondence *)correspondence_pts;
+  Correspondence *correspondences = (Correspondence *)correspondence_pts;
   int num_correspondences = 0;
   for (i = 0; i < num_frm_corners; ++i) {
     double best_match_ncc = 0.0;
diff --git a/av1/encoder/corner_match.h b/av1/encoder/corner_match.h
index 01c0ea4..521c756 100644
--- a/av1/encoder/corner_match.h
+++ b/av1/encoder/corner_match.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -18,7 +18,7 @@
 typedef struct {
   double x, y;
   double rx, ry;
-} correspondence;
+} Correspondence;
 
 int determine_correspondence(unsigned char *frm, int *frm_corners,
                              int num_frm_corners, unsigned char *ref,
@@ -26,4 +26,4 @@
                              int height, int frm_stride, int ref_stride,
                              double *correspondence_pts);
 
-#endif  // AV1_ENCODER_CORNER_MATCH_H
+#endif  // AV1_ENCODER_CORNER_MATCH_H_
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 7de74d1..cfc4718 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4481,13 +4481,13 @@
   }
 }
 
-static void convert_to_params(double *H, TransformationType type,
+static void convert_to_params(const double *params, TransformationType type,
                               int16_t *model) {
   int i, diag_value;
   int alpha_present = 0;
   int n_params = n_trans_model_params[type];
-  model[0] = (int16_t)floor(H[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
-  model[1] = (int16_t)floor(H[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+  model[0] = (int16_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+  model[1] = (int16_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
   model[0] = (int16_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
              GM_TRANS_DECODE_FACTOR;
   model[1] = (int16_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
@@ -4495,7 +4495,7 @@
 
   for (i = 2; i < n_params; ++i) {
     diag_value = ((i && 1) ? (1 << GM_ALPHA_PREC_BITS) : 0);
-    model[i] = (int16_t)floor(H[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
+    model[i] = (int16_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
     model[i] =
         (int16_t)(clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX) +
                   diag_value) *
@@ -4511,11 +4511,12 @@
   }
 }
 
-static void convert_model_to_params(double *H, TransformationType type,
+static void convert_model_to_params(const double *params,
+                                    TransformationType type,
                                     Global_Motion_Params *model) {
   // TODO(sarahparker) implement for homography
   if (type > HOMOGRAPHY)
-    convert_to_params(H, type, (int16_t *)model->motion_params.wmmat);
+    convert_to_params(params, type, (int16_t *)model->motion_params.wmmat);
   model->gmtype = get_gmtype(model);
   model->motion_params.wmtype = gm_to_trans_type(model->gmtype);
 }
@@ -4547,13 +4548,14 @@
   if (cpi->common.frame_type == INTER_FRAME && cpi->Source) {
     YV12_BUFFER_CONFIG *ref_buf;
     int frame;
-    double H[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    double erroradvantage = 0;
+    double params[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
     for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
       ref_buf = get_ref_frame_buffer(cpi, frame);
       if (ref_buf) {
         if (compute_global_motion_feature_based(GLOBAL_MOTION_MODEL,
-                                                cpi->Source, ref_buf, H)) {
-          convert_model_to_params(H, GLOBAL_MOTION_MODEL,
+                                                cpi->Source, ref_buf, params)) {
+          convert_model_to_params(params, GLOBAL_MOTION_MODEL,
                                   &cm->global_motion[frame]);
           if (get_gmtype(&cm->global_motion[frame]) > GLOBAL_ZERO) {
             refine_integerized_param(
@@ -4565,7 +4567,7 @@
                 ref_buf->y_stride, cpi->Source->y_buffer, cpi->Source->y_width,
                 cpi->Source->y_height, cpi->Source->y_stride, 3);
             // compute the advantage of using gm parameters over 0 motion
-            double erroradvantage = av1_warp_erroradv(
+            erroradvantage = av1_warp_erroradv(
                 &cm->global_motion[frame].motion_params,
 #if CONFIG_AOM_HIGHBITDEPTH
                 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 86503d7..1d1f28b 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -572,6 +572,9 @@
 #if CONFIG_EXT_INTRA
   int intra_filter_cost[INTRA_FILTERS + 1][INTRA_FILTERS];
 #endif  // CONFIG_EXT_INTRA
+#if CONFIG_LOOP_RESTORATION
+  int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
+#endif  // CONFIG_LOOP_RESTORATION
 
   int multi_arf_allowed;
   int multi_arf_enabled;
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index cda6aaf..5d88dbf 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c
@@ -25,12 +25,12 @@
 #define MAX_CORNERS 4096
 #define MIN_INLIER_PROB 0.1
 
-INLINE RansacType get_ransac_type(TransformationType type) {
+INLINE RansacFunc get_ransac_type(TransformationType type) {
   switch (type) {
-    case HOMOGRAPHY: return ransacHomography;
-    case AFFINE: return ransacAffine;
-    case ROTZOOM: return ransacRotZoom;
-    case TRANSLATION: return ransacTranslation;
+    case HOMOGRAPHY: return ransac_homography;
+    case AFFINE: return ransac_affine;
+    case ROTZOOM: return ransac_rotzoom;
+    case TRANSLATION: return ransac_translation;
     default: assert(0); return NULL;
   }
 }
@@ -42,7 +42,7 @@
                                         int *inlier_map) {
   int result;
   int num_inliers = 0;
-  RansacType ransac = get_ransac_type(type);
+  RansacFunc ransac = get_ransac_type(type);
   if (ransac == NULL) return 0;
 
   result = ransac(correspondences, num_correspondences, &num_inliers,
@@ -66,10 +66,12 @@
   int *inlier_map = NULL;
 
   // compute interest points in images using FAST features
-  num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width, frm->y_height,
-                                     frm->y_stride, frm_corners, MAX_CORNERS);
-  num_ref_corners = FastCornerDetect(ref->y_buffer, ref->y_width, ref->y_height,
-                                     ref->y_stride, ref_corners, MAX_CORNERS);
+  num_frm_corners =
+      fast_corner_detect(frm->y_buffer, frm->y_width, frm->y_height,
+                         frm->y_stride, frm_corners, MAX_CORNERS);
+  num_ref_corners =
+      fast_corner_detect(ref->y_buffer, ref->y_width, ref->y_height,
+                         ref->y_stride, ref_corners, MAX_CORNERS);
 
   // find correspondences between the two images
   correspondences =
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 22bd019..00e46a6 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -28,7 +28,53 @@
 #include "av1/encoder/pickrst.h"
 #include "av1/encoder/quantize.h"
 
-static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *sd,
+const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
+
+static int64_t sse_restoration_tile(const YV12_BUFFER_CONFIG *src,
+                                    AV1_COMMON *const cm, int h_start,
+                                    int width, int v_start, int height) {
+  int64_t filt_err;
+#if CONFIG_AOM_HIGHBITDEPTH
+  if (cm->use_highbitdepth) {
+    filt_err = aom_highbd_get_y_sse_part(src, cm->frame_to_show, h_start, width,
+                                         v_start, height);
+  } else {
+    filt_err = aom_get_y_sse_part(src, cm->frame_to_show, h_start, width,
+                                  v_start, height);
+  }
+#else
+  filt_err = aom_get_y_sse_part(src, cm->frame_to_show, h_start, width, v_start,
+                                height);
+#endif  // CONFIG_AOM_HIGHBITDEPTH
+  return filt_err;
+}
+
+static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
+                                    AV1_COMP *const cpi, RestorationInfo *rsi,
+                                    int partial_frame, int tile_idx,
+                                    int subtile_idx, int subtile_bits) {
+  AV1_COMMON *const cm = &cpi->common;
+  int64_t filt_err;
+  int tile_width, tile_height, nhtiles, nvtiles;
+  int h_start, h_end, v_start, v_end;
+  const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
+                                         &tile_height, &nhtiles, &nvtiles);
+  (void)ntiles;
+
+  av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, 1, partial_frame);
+  av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, nhtiles,
+                           nvtiles, tile_width, tile_height, cm->width,
+                           cm->height, 0, 0, &h_start, &h_end, &v_start,
+                           &v_end);
+  filt_err = sse_restoration_tile(src, cm, h_start, h_end - h_start, v_start,
+                                  v_end - v_start);
+
+  // Re-instate the unfiltered frame
+  aom_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show);
+  return filt_err;
+}
+
+static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
                                      AV1_COMP *const cpi, RestorationInfo *rsi,
                                      int partial_frame) {
   AV1_COMMON *const cm = &cpi->common;
@@ -36,12 +82,12 @@
   av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, 1, partial_frame);
 #if CONFIG_AOM_HIGHBITDEPTH
   if (cm->use_highbitdepth) {
-    filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show);
+    filt_err = aom_highbd_get_y_sse(src, cm->frame_to_show);
   } else {
-    filt_err = aom_get_y_sse(sd, cm->frame_to_show);
+    filt_err = aom_get_y_sse(src, cm->frame_to_show);
   }
 #else
-  filt_err = aom_get_y_sse(sd, cm->frame_to_show);
+  filt_err = aom_get_y_sse(src, cm->frame_to_show);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
   // Re-instate the unfiltered frame
@@ -49,20 +95,24 @@
   return filt_err;
 }
 
-static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
+static int search_bilateral_level(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
                                   int filter_level, int partial_frame,
-                                  int *bilateral_level, double *best_cost_ret) {
+                                  int *bilateral_level, double *best_cost_ret,
+                                  double *best_tile_cost) {
   AV1_COMMON *const cm = &cpi->common;
   int i, j, tile_idx;
   int64_t err;
   int bits;
-  double cost, best_cost, cost_norestore, cost_bilateral;
+  double cost, best_cost, cost_norestore, cost_bilateral,
+      cost_norestore_subtile;
   const int bilateral_level_bits = av1_bilateral_level_bits(&cpi->common);
   const int bilateral_levels = 1 << bilateral_level_bits;
   MACROBLOCK *x = &cpi->td.mb;
   RestorationInfo rsi;
-  const int ntiles =
-      av1_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
+  int tile_width, tile_height, nhtiles, nvtiles;
+  int h_start, h_end, v_start, v_end;
+  const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
+                                         &tile_height, &nhtiles, &nvtiles);
 
   //  Make a copy of the unfiltered / processed recon buffer
   aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
@@ -71,53 +121,94 @@
   aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
 
   // RD cost associated with no restoration
-  rsi.restoration_type = RESTORE_NONE;
-  err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
-  bits = 0;
-  cost_norestore =
-      RDCOST_DBL(x->rdmult, x->rddiv, (bits << (AV1_PROB_COST_SHIFT - 4)), err);
-  best_cost = cost_norestore;
+  rsi.frame_restoration_type = RESTORE_NONE;
+  err = try_restoration_frame(src, cpi, &rsi, partial_frame);
+  // err = sse_restoration_tile(src, cm, 0, cm->width, 0, cm->height);
+  bits = frame_level_restore_bits[rsi.frame_restoration_type]
+         << AV1_PROB_COST_SHIFT;
+  cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
 
   // RD cost associated with bilateral filtering
-  rsi.restoration_type = RESTORE_BILATERAL;
-  rsi.bilateral_level =
-      (int *)aom_malloc(sizeof(*rsi.bilateral_level) * ntiles);
+  rsi.frame_restoration_type = RESTORE_BILATERAL;
+  rsi.bilateral_level = (int *)aom_malloc(sizeof(*rsi.bilateral_level) *
+                                          ntiles * BILATERAL_SUBTILES);
   assert(rsi.bilateral_level != NULL);
 
-  for (j = 0; j < ntiles; ++j) bilateral_level[j] = -1;
+  for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j) bilateral_level[j] = -1;
 
+  // TODO(debargha): This is a pretty inefficient way to find the best
+  // parameters per tile. Needs fixing.
   // Find best filter for each tile
   for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    for (j = 0; j < ntiles; ++j) rsi.bilateral_level[j] = -1;
-    best_cost = cost_norestore;
-    for (i = 0; i < bilateral_levels; ++i) {
-      rsi.bilateral_level[tile_idx] = i;
-      err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
-      bits = bilateral_level_bits + 1;
-      // Normally the rate is rate in bits * 256 and dist is sum sq err * 64
-      // when RDCOST is used.  However below we just scale both in the correct
-      // ratios appropriately but not exactly by these values.
-      cost = RDCOST_DBL(x->rdmult, x->rddiv,
-                        (bits << (AV1_PROB_COST_SHIFT - 4)), err);
-      if (cost < best_cost) {
-        bilateral_level[tile_idx] = i;
-        best_cost = cost;
+    int subtile_idx;
+    for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
+      const int fulltile_idx = tile_idx * BILATERAL_SUBTILES + subtile_idx;
+      av1_get_rest_tile_limits(tile_idx, subtile_idx, BILATERAL_SUBTILE_BITS,
+                               nhtiles, nvtiles, tile_width, tile_height,
+                               cm->width, cm->height, 0, 0, &h_start, &h_end,
+                               &v_start, &v_end);
+      err = sse_restoration_tile(src, cm, h_start, h_end - h_start, v_start,
+                                 v_end - v_start);
+#if BILATERAL_SUBTILES
+      // #bits when a subtile is not restored
+      bits = av1_cost_bit(RESTORE_NONE_BILATERAL_PROB, 0);
+#else
+      bits = 0;
+#endif
+      cost_norestore_subtile =
+          RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+      best_cost = cost_norestore_subtile;
+      for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j)
+        rsi.bilateral_level[j] = -1;
+
+      for (i = 0; i < bilateral_levels; ++i) {
+        rsi.bilateral_level[fulltile_idx] = i;
+        err = try_restoration_tile(src, cpi, &rsi, partial_frame, tile_idx,
+                                   subtile_idx, BILATERAL_SUBTILE_BITS);
+        bits = bilateral_level_bits << AV1_PROB_COST_SHIFT;
+        bits += av1_cost_bit(RESTORE_NONE_BILATERAL_PROB, 1);
+        cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+        if (cost < best_cost) {
+          bilateral_level[fulltile_idx] = i;
+          best_cost = cost;
+        }
       }
     }
+    if (best_tile_cost) {
+      bits = 0;
+      for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j)
+        rsi.bilateral_level[j] = -1;
+      for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
+        const int fulltile_idx = tile_idx * BILATERAL_SUBTILES + subtile_idx;
+        rsi.bilateral_level[fulltile_idx] = bilateral_level[fulltile_idx];
+        if (rsi.bilateral_level[fulltile_idx] >= 0)
+          bits += bilateral_level_bits << AV1_PROB_COST_SHIFT;
+#if BILATERAL_SUBTILES
+        bits += av1_cost_bit(RESTORE_NONE_BILATERAL_PROB,
+                             rsi.bilateral_level[fulltile_idx] >= 0);
+#endif
+      }
+      err = try_restoration_tile(src, cpi, &rsi, partial_frame, tile_idx, 0, 0);
+      best_tile_cost[tile_idx] = RDCOST_DBL(
+          x->rdmult, x->rddiv,
+          (bits + cpi->switchable_restore_cost[RESTORE_BILATERAL]) >> 4, err);
+    }
   }
   // Find cost for combined configuration
-  bits = 0;
-  for (j = 0; j < ntiles; ++j) {
+  bits = frame_level_restore_bits[rsi.frame_restoration_type]
+         << AV1_PROB_COST_SHIFT;
+  for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j) {
     rsi.bilateral_level[j] = bilateral_level[j];
     if (rsi.bilateral_level[j] >= 0) {
-      bits += (bilateral_level_bits + 1);
-    } else {
-      bits += 1;
+      bits += bilateral_level_bits << AV1_PROB_COST_SHIFT;
     }
+#if BILATERAL_SUBTILES
+    bits +=
+        av1_cost_bit(RESTORE_NONE_BILATERAL_PROB, rsi.bilateral_level[j] >= 0);
+#endif
   }
-  err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
-  cost_bilateral =
-      RDCOST_DBL(x->rdmult, x->rddiv, (bits << (AV1_PROB_COST_SHIFT - 4)), err);
+  err = try_restoration_frame(src, cpi, &rsi, partial_frame);
+  cost_bilateral = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
 
   aom_free(rsi.bilateral_level);
 
@@ -131,10 +222,11 @@
   }
 }
 
-static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
+static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *src,
                                          AV1_COMP *cpi, int partial_frame,
                                          int *filter_best, int *bilateral_level,
-                                         double *best_cost_ret) {
+                                         double *best_cost_ret,
+                                         double *best_tile_cost) {
   const AV1_COMMON *const cm = &cpi->common;
   const struct loopfilter *const lf = &cm->lf;
   const int min_filter_level = 0;
@@ -147,7 +239,8 @@
   int bilateral_success[MAX_LOOP_FILTER + 1];
 
   const int ntiles =
-      av1_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
+      av1_get_rest_ntiles(cm->width, cm->height, NULL, NULL, NULL, NULL);
+  double *tile_cost = (double *)aom_malloc(sizeof(*tile_cost) * ntiles);
 
   // Start the search at the previous frame filter level unless it is now out of
   // range.
@@ -157,13 +250,14 @@
   // Set each entry to -1
   for (i = 0; i <= MAX_LOOP_FILTER; ++i) ss_err[i] = -1.0;
 
-  tmp_level = (int *)aom_malloc(sizeof(*tmp_level) * ntiles);
+  tmp_level =
+      (int *)aom_malloc(sizeof(*tmp_level) * ntiles * BILATERAL_SUBTILES);
 
   bilateral_success[filt_mid] = search_bilateral_level(
-      sd, cpi, filt_mid, partial_frame, tmp_level, &best_err);
+      src, cpi, filt_mid, partial_frame, tmp_level, &best_err, best_tile_cost);
   filt_best = filt_mid;
   ss_err[filt_mid] = best_err;
-  for (j = 0; j < ntiles; ++j) {
+  for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j) {
     bilateral_level[j] = tmp_level[j];
   }
 
@@ -183,8 +277,9 @@
     if (filt_direction <= 0 && filt_low != filt_mid) {
       // Get Low filter error score
       if (ss_err[filt_low] < 0) {
-        bilateral_success[filt_low] = search_bilateral_level(
-            sd, cpi, filt_low, partial_frame, tmp_level, &ss_err[filt_low]);
+        bilateral_success[filt_low] =
+            search_bilateral_level(src, cpi, filt_low, partial_frame, tmp_level,
+                                   &ss_err[filt_low], tile_cost);
       }
       // If value is close to the best so far then bias towards a lower loop
       // filter value.
@@ -194,26 +289,29 @@
           best_err = ss_err[filt_low];
         }
         filt_best = filt_low;
-        for (j = 0; j < ntiles; ++j) {
+        for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j) {
           bilateral_level[j] = tmp_level[j];
         }
+        memcpy(best_tile_cost, tile_cost, sizeof(*tile_cost) * ntiles);
       }
     }
 
     // Now look at filt_high
     if (filt_direction >= 0 && filt_high != filt_mid) {
       if (ss_err[filt_high] < 0) {
-        bilateral_success[filt_high] = search_bilateral_level(
-            sd, cpi, filt_high, partial_frame, tmp_level, &ss_err[filt_high]);
+        bilateral_success[filt_high] =
+            search_bilateral_level(src, cpi, filt_high, partial_frame,
+                                   tmp_level, &ss_err[filt_high], tile_cost);
       }
       // If value is significantly better than previous best, bias added against
       // raising filter value
       if (ss_err[filt_high] < (best_err - bias)) {
         best_err = ss_err[filt_high];
         filt_best = filt_high;
-        for (j = 0; j < ntiles; ++j) {
+        for (j = 0; j < ntiles * BILATERAL_SUBTILES; ++j) {
           bilateral_level[j] = tmp_level[j];
         }
+        memcpy(best_tile_cost, tile_cost, sizeof(*tile_cost) * ntiles);
       }
     }
 
@@ -226,12 +324,11 @@
       filt_mid = filt_best;
     }
   }
-
   aom_free(tmp_level);
+  aom_free(tile_cost);
 
   // Update best error
   best_err = ss_err[filt_best];
-
   if (best_cost_ret) *best_cost_ret = best_err;
   if (filter_best) *filter_best = filt_best;
 
@@ -546,14 +643,15 @@
 
 static int search_wiener_filter(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
                                 int filter_level, int partial_frame,
-                                int (*vfilter)[RESTORATION_HALFWIN],
-                                int (*hfilter)[RESTORATION_HALFWIN],
-                                int *process_tile, double *best_cost_ret) {
+                                int (*vfilter)[RESTORATION_WIN],
+                                int (*hfilter)[RESTORATION_WIN],
+                                int *wiener_level, double *best_cost_ret,
+                                double *best_tile_cost) {
   AV1_COMMON *const cm = &cpi->common;
   RestorationInfo rsi;
   int64_t err;
   int bits;
-  double cost_wiener, cost_norestore;
+  double cost_wiener, cost_norestore, cost_norestore_tile;
   MACROBLOCK *x = &cpi->td.mb;
   double M[RESTORATION_WIN2];
   double H[RESTORATION_WIN2 * RESTORATION_WIN2];
@@ -564,56 +662,55 @@
   const int src_stride = src->y_stride;
   const int dgd_stride = dgd->y_stride;
   double score;
-  int tile_idx, htile_idx, vtile_idx, tile_width, tile_height, nhtiles, nvtiles;
+  int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
   int h_start, h_end, v_start, v_end;
   int i, j;
 
-  const int tilesize = WIENER_TILESIZE;
-  const int ntiles = av1_get_restoration_ntiles(tilesize, width, height);
-
+  const int ntiles = av1_get_rest_ntiles(width, height, &tile_width,
+                                         &tile_height, &nhtiles, &nvtiles);
   assert(width == dgd->y_crop_width);
   assert(height == dgd->y_crop_height);
   assert(width == src->y_crop_width);
   assert(height == src->y_crop_height);
 
-  av1_get_restoration_tile_size(tilesize, width, height, &tile_width,
-                                &tile_height, &nhtiles, &nvtiles);
-
   //  Make a copy of the unfiltered / processed recon buffer
   aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
   av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level,
                         1, partial_frame);
   aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
 
-  rsi.restoration_type = RESTORE_NONE;
-  err = try_restoration_frame(src, cpi, &rsi, partial_frame);
-  bits = 0;
-  cost_norestore =
-      RDCOST_DBL(x->rdmult, x->rddiv, (bits << (AV1_PROB_COST_SHIFT - 4)), err);
+  rsi.frame_restoration_type = RESTORE_NONE;
+  err = sse_restoration_tile(src, cm, 0, cm->width, 0, cm->height);
+  bits = frame_level_restore_bits[rsi.frame_restoration_type]
+         << AV1_PROB_COST_SHIFT;
+  cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
 
-  rsi.restoration_type = RESTORE_WIENER;
+  rsi.frame_restoration_type = RESTORE_WIENER;
   rsi.vfilter =
-      (int(*)[RESTORATION_HALFWIN])aom_malloc(sizeof(*rsi.vfilter) * ntiles);
+      (int(*)[RESTORATION_WIN])aom_malloc(sizeof(*rsi.vfilter) * ntiles);
   assert(rsi.vfilter != NULL);
   rsi.hfilter =
-      (int(*)[RESTORATION_HALFWIN])aom_malloc(sizeof(*rsi.hfilter) * ntiles);
+      (int(*)[RESTORATION_WIN])aom_malloc(sizeof(*rsi.hfilter) * ntiles);
   assert(rsi.hfilter != NULL);
   rsi.wiener_level = (int *)aom_malloc(sizeof(*rsi.wiener_level) * ntiles);
   assert(rsi.wiener_level != NULL);
 
   // Compute best Wiener filters for each tile
   for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    htile_idx = tile_idx % nhtiles;
-    vtile_idx = tile_idx / nhtiles;
-    h_start =
-        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    h_end = (htile_idx < nhtiles - 1) ? ((htile_idx + 1) * tile_width)
-                                      : (width - RESTORATION_HALFWIN);
-    v_start =
-        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
-    v_end = (vtile_idx < nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
-                                      : (height - RESTORATION_HALFWIN);
+    wiener_level[tile_idx] = 0;
+    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
+                             tile_height, width, height, 0, 0, &h_start, &h_end,
+                             &v_start, &v_end);
+    err = sse_restoration_tile(src, cm, h_start, h_end - h_start, v_start,
+                               v_end - v_start);
+    // #bits when a tile is not restored
+    bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0);
+    cost_norestore_tile = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+    if (best_tile_cost) best_tile_cost[tile_idx] = cost_norestore_tile;
 
+    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
+                             tile_height, width, height, 1, 1, &h_start, &h_end,
+                             &v_start, &v_end);
 #if CONFIG_AOM_HIGHBITDEPTH
     if (cm->use_highbitdepth)
       compute_stats_highbd(dgd->y_buffer, src->y_buffer, h_start, h_end,
@@ -626,12 +723,12 @@
     if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) {
       for (i = 0; i < RESTORATION_HALFWIN; ++i)
         rsi.vfilter[tile_idx][i] = rsi.hfilter[tile_idx][i] = 0;
-      process_tile[tile_idx] = 0;
+      wiener_level[tile_idx] = 0;
       continue;
     }
     quantize_sym_filter(vfilterd, rsi.vfilter[tile_idx]);
     quantize_sym_filter(hfilterd, rsi.hfilter[tile_idx]);
-    process_tile[tile_idx] = 1;
+    wiener_level[tile_idx] = 1;
 
     // Filter score computes the value of the function x'*A*x - x'*b for the
     // learned filter and compares it against identity filer. If there is no
@@ -640,31 +737,41 @@
     if (score > 0.0) {
       for (i = 0; i < RESTORATION_HALFWIN; ++i)
         rsi.vfilter[tile_idx][i] = rsi.hfilter[tile_idx][i] = 0;
-      process_tile[tile_idx] = 0;
+      wiener_level[tile_idx] = 0;
       continue;
     }
 
     for (j = 0; j < ntiles; ++j) rsi.wiener_level[j] = 0;
     rsi.wiener_level[tile_idx] = 1;
 
-    err = try_restoration_frame(src, cpi, &rsi, partial_frame);
-    bits = 1 + WIENER_FILT_BITS;
-    cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv,
-                             (bits << (AV1_PROB_COST_SHIFT - 4)), err);
-    if (cost_wiener >= cost_norestore) process_tile[tile_idx] = 0;
+    err = try_restoration_tile(src, cpi, &rsi, partial_frame, tile_idx, 0, 0);
+    bits = WIENER_FILT_BITS << AV1_PROB_COST_SHIFT;
+    bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1);
+    cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+    if (cost_wiener >= cost_norestore_tile) wiener_level[tile_idx] = 0;
+    if (best_tile_cost) {
+      bits = WIENER_FILT_BITS << AV1_PROB_COST_SHIFT;
+      best_tile_cost[tile_idx] = RDCOST_DBL(
+          x->rdmult, x->rddiv,
+          (bits + cpi->switchable_restore_cost[RESTORE_WIENER]) >> 4, err);
+    }
   }
   // Cost for Wiener filtering
-  bits = 0;
+  bits = frame_level_restore_bits[rsi.frame_restoration_type]
+         << AV1_PROB_COST_SHIFT;
   for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    bits += (process_tile[tile_idx] ? (WIENER_FILT_BITS + 1) : 1);
-    rsi.wiener_level[tile_idx] = process_tile[tile_idx];
+    bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, wiener_level[tile_idx]);
+    if (wiener_level[tile_idx])
+      bits += (WIENER_FILT_BITS << AV1_PROB_COST_SHIFT);
+    rsi.wiener_level[tile_idx] = wiener_level[tile_idx];
   }
+  // TODO(debargha): This is a pretty inefficient way to find the error
+  // for the whole frame. Specialize for a specific tile.
   err = try_restoration_frame(src, cpi, &rsi, partial_frame);
-  cost_wiener =
-      RDCOST_DBL(x->rdmult, x->rddiv, (bits << (AV1_PROB_COST_SHIFT - 4)), err);
+  cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
 
   for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    if (process_tile[tile_idx] == 0) continue;
+    if (wiener_level[tile_idx] == 0) continue;
     for (i = 0; i < RESTORATION_HALFWIN; ++i) {
       vfilter[tile_idx][i] = rsi.vfilter[tile_idx][i];
       hfilter[tile_idx][i] = rsi.hfilter[tile_idx][i];
@@ -685,40 +792,125 @@
   }
 }
 
-void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
+static int search_switchable_restoration(
+    const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int filter_level,
+    int partial_frame, RestorationInfo *rsi, double *tile_cost_bilateral,
+    double *tile_cost_wiener, double *best_cost_ret) {
+  AV1_COMMON *const cm = &cpi->common;
+  const int bilateral_level_bits = av1_bilateral_level_bits(&cpi->common);
+  MACROBLOCK *x = &cpi->td.mb;
+  double err, cost_norestore, cost_norestore_tile, cost_switchable;
+  int bits, tile_idx;
+  int tile_width, tile_height, nhtiles, nvtiles;
+  int h_start, h_end, v_start, v_end;
+  const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
+                                         &tile_height, &nhtiles, &nvtiles);
+
+  //  Make a copy of the unfiltered / processed recon buffer
+  aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level,
+                        1, partial_frame);
+  aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
+
+  // RD cost associated with no restoration
+  rsi->frame_restoration_type = RESTORE_NONE;
+  err = sse_restoration_tile(src, cm, 0, cm->width, 0, cm->height);
+  bits = frame_level_restore_bits[rsi->frame_restoration_type]
+         << AV1_PROB_COST_SHIFT;
+  cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+
+  rsi->frame_restoration_type = RESTORE_SWITCHABLE;
+  bits = frame_level_restore_bits[rsi->frame_restoration_type]
+         << AV1_PROB_COST_SHIFT;
+  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
+                             tile_height, cm->width, cm->height, 0, 0, &h_start,
+                             &h_end, &v_start, &v_end);
+    err = sse_restoration_tile(src, cm, h_start, h_end - h_start, v_start,
+                               v_end - v_start);
+    cost_norestore_tile =
+        RDCOST_DBL(x->rdmult, x->rddiv,
+                   (cpi->switchable_restore_cost[RESTORE_NONE] >> 4), err);
+    if (tile_cost_wiener[tile_idx] > cost_norestore_tile &&
+        tile_cost_bilateral[tile_idx] > cost_norestore_tile) {
+      rsi->restoration_type[tile_idx] = RESTORE_NONE;
+    } else {
+      rsi->restoration_type[tile_idx] =
+          tile_cost_wiener[tile_idx] < tile_cost_bilateral[tile_idx]
+              ? RESTORE_WIENER
+              : RESTORE_BILATERAL;
+      if (rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
+        if (rsi->wiener_level[tile_idx]) {
+          bits += (WIENER_FILT_BITS << AV1_PROB_COST_SHIFT);
+        } else {
+          rsi->restoration_type[tile_idx] = RESTORE_NONE;
+        }
+      } else {
+        int s;
+        for (s = 0; s < BILATERAL_SUBTILES; ++s) {
+#if BILATERAL_SUBTILES
+          bits += av1_cost_bit(
+              RESTORE_NONE_BILATERAL_PROB,
+              rsi->bilateral_level[tile_idx * BILATERAL_SUBTILES + s] >= 0);
+#endif
+          if (rsi->bilateral_level[tile_idx * BILATERAL_SUBTILES + s] >= 0)
+            bits += bilateral_level_bits << AV1_PROB_COST_SHIFT;
+        }
+      }
+    }
+    bits += cpi->switchable_restore_cost[rsi->restoration_type[tile_idx]];
+  }
+  err = try_restoration_frame(src, cpi, rsi, partial_frame);
+  cost_switchable = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+  aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+  if (cost_switchable < cost_norestore) {
+    *best_cost_ret = cost_switchable;
+    return 1;
+  } else {
+    *best_cost_ret = cost_norestore;
+    return 0;
+  }
+}
+
+void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
                                  LPF_PICK_METHOD method) {
   AV1_COMMON *const cm = &cpi->common;
   struct loopfilter *const lf = &cm->lf;
   int wiener_success = 0;
   int bilateral_success = 0;
+  int switchable_success = 0;
   double cost_bilateral = DBL_MAX;
   double cost_wiener = DBL_MAX;
-  double cost_norestore = DBL_MAX;
-  int ntiles;
-
-  ntiles =
-      av1_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
-  cm->rst_info.bilateral_level =
-      (int *)aom_realloc(cm->rst_info.bilateral_level,
-                         sizeof(*cm->rst_info.bilateral_level) * ntiles);
+  // double cost_norestore = DBL_MAX;
+  double cost_switchable = DBL_MAX;
+  double *tile_cost_bilateral, *tile_cost_wiener;
+  const int ntiles =
+      av1_get_rest_ntiles(cm->width, cm->height, NULL, NULL, NULL, NULL);
+  cm->rst_info.restoration_type = (RestorationType *)aom_realloc(
+      cm->rst_info.restoration_type,
+      sizeof(*cm->rst_info.restoration_type) * ntiles);
+  cm->rst_info.bilateral_level = (int *)aom_realloc(
+      cm->rst_info.bilateral_level,
+      sizeof(*cm->rst_info.bilateral_level) * ntiles * BILATERAL_SUBTILES);
   assert(cm->rst_info.bilateral_level != NULL);
 
-  ntiles = av1_get_restoration_ntiles(WIENER_TILESIZE, cm->width, cm->height);
   cm->rst_info.wiener_level = (int *)aom_realloc(
       cm->rst_info.wiener_level, sizeof(*cm->rst_info.wiener_level) * ntiles);
   assert(cm->rst_info.wiener_level != NULL);
-  cm->rst_info.vfilter = (int(*)[RESTORATION_HALFWIN])aom_realloc(
+  cm->rst_info.vfilter = (int(*)[RESTORATION_WIN])aom_realloc(
       cm->rst_info.vfilter, sizeof(*cm->rst_info.vfilter) * ntiles);
   assert(cm->rst_info.vfilter != NULL);
-  cm->rst_info.hfilter = (int(*)[RESTORATION_HALFWIN])aom_realloc(
+  cm->rst_info.hfilter = (int(*)[RESTORATION_WIN])aom_realloc(
       cm->rst_info.hfilter, sizeof(*cm->rst_info.hfilter) * ntiles);
   assert(cm->rst_info.hfilter != NULL);
+  tile_cost_wiener = (double *)aom_malloc(sizeof(cost_wiener) * ntiles);
+  tile_cost_bilateral = (double *)aom_malloc(sizeof(cost_bilateral) * ntiles);
 
   lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
 
   if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
     lf->filter_level = 0;
-    cm->rst_info.restoration_type = RESTORE_NONE;
+    cm->rst_info.frame_restoration_type = RESTORE_NONE;
   } else if (method >= LPF_PICK_FROM_Q) {
     const int min_filter_level = 0;
     const int max_filter_level = av1_get_max_filter_level(cpi);
@@ -749,60 +941,51 @@
     if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
     bilateral_success = search_bilateral_level(
-        sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
-        cm->rst_info.bilateral_level, &cost_bilateral);
+        src, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+        cm->rst_info.bilateral_level, &cost_bilateral, tile_cost_bilateral);
     wiener_success = search_wiener_filter(
-        sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+        src, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
         cm->rst_info.vfilter, cm->rst_info.hfilter, cm->rst_info.wiener_level,
-        &cost_wiener);
-    if (cost_bilateral < cost_wiener) {
-      if (bilateral_success)
-        cm->rst_info.restoration_type = RESTORE_BILATERAL;
-      else
-        cm->rst_info.restoration_type = RESTORE_NONE;
-    } else {
-      if (wiener_success)
-        cm->rst_info.restoration_type = RESTORE_WIENER;
-      else
-        cm->rst_info.restoration_type = RESTORE_NONE;
-    }
+        &cost_wiener, tile_cost_wiener);
+    switchable_success = search_switchable_restoration(
+        src, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+        &cm->rst_info, tile_cost_bilateral, tile_cost_wiener, &cost_switchable);
   } else {
-    int blf_filter_level = -1;
+    // lf->filter_level = av1_search_filter_level(
+    //     src, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cost_norestore);
+    // bilateral_success = search_bilateral_level(
+    //     src, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+    //     cm->rst_info.bilateral_level, &cost_bilateral, tile_cost_bilateral);
     bilateral_success = search_filter_bilateral_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &blf_filter_level,
-        cm->rst_info.bilateral_level, &cost_bilateral);
-    lf->filter_level = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cost_norestore);
+        src, cpi, method == LPF_PICK_FROM_SUBIMAGE, &lf->filter_level,
+        cm->rst_info.bilateral_level, &cost_bilateral, tile_cost_bilateral);
     wiener_success = search_wiener_filter(
-        sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+        src, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
         cm->rst_info.vfilter, cm->rst_info.hfilter, cm->rst_info.wiener_level,
-        &cost_wiener);
-    if (cost_bilateral < cost_wiener) {
-      lf->filter_level = blf_filter_level;
-      if (bilateral_success)
-        cm->rst_info.restoration_type = RESTORE_BILATERAL;
-      else
-        cm->rst_info.restoration_type = RESTORE_NONE;
-    } else {
-      if (wiener_success)
-        cm->rst_info.restoration_type = RESTORE_WIENER;
-      else
-        cm->rst_info.restoration_type = RESTORE_NONE;
-    }
-    // printf("[%d] Costs %g %g (%d) %g (%d)\n", cm->rst_info.restoration_type,
-    //        cost_norestore, cost_bilateral, lf->filter_level, cost_wiener,
-    //        wiener_success);
+        &cost_wiener, tile_cost_wiener);
+    switchable_success = search_switchable_restoration(
+        src, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+        &cm->rst_info, tile_cost_bilateral, tile_cost_wiener, &cost_switchable);
   }
-  if (cm->rst_info.restoration_type != RESTORE_BILATERAL) {
-    aom_free(cm->rst_info.bilateral_level);
-    cm->rst_info.bilateral_level = NULL;
+  if (cost_bilateral < AOMMIN(cost_wiener, cost_switchable)) {
+    if (bilateral_success)
+      cm->rst_info.frame_restoration_type = RESTORE_BILATERAL;
+    else
+      cm->rst_info.frame_restoration_type = RESTORE_NONE;
+  } else if (cost_wiener < AOMMIN(cost_bilateral, cost_switchable)) {
+    if (wiener_success)
+      cm->rst_info.frame_restoration_type = RESTORE_WIENER;
+    else
+      cm->rst_info.frame_restoration_type = RESTORE_NONE;
+  } else {
+    if (switchable_success)
+      cm->rst_info.frame_restoration_type = RESTORE_SWITCHABLE;
+    else
+      cm->rst_info.frame_restoration_type = RESTORE_NONE;
   }
-  if (cm->rst_info.restoration_type != RESTORE_WIENER) {
-    aom_free(cm->rst_info.vfilter);
-    cm->rst_info.vfilter = NULL;
-    aom_free(cm->rst_info.hfilter);
-    cm->rst_info.hfilter = NULL;
-    aom_free(cm->rst_info.wiener_level);
-    cm->rst_info.wiener_level = NULL;
-  }
+  printf("Frame %d frame_restore_type %d [%d]: %f %f %f\n",
+         cm->current_video_frame, cm->rst_info.frame_restoration_type, ntiles,
+         cost_bilateral, cost_wiener, cost_switchable);
+  aom_free(tile_cost_bilateral);
+  aom_free(tile_cost_wiener);
 }
diff --git a/av1/encoder/ransac.c b/av1/encoder/ransac.c
index 81c1c21..a6533ca 100644
--- a/av1/encoder/ransac.c
+++ b/av1/encoder/ransac.c
@@ -28,14 +28,14 @@
 
 static const double TINY_NEAR_ZERO = 1.0E-12;
 
-static inline double SIGN(double a, double b) {
+static INLINE double sign(double a, double b) {
   return ((b) >= 0 ? fabs(a) : -fabs(a));
 }
 
-static inline double PYTHAG(double a, double b) {
-  double absa, absb, ct;
-  absa = fabs(a);
-  absb = fabs(b);
+static INLINE double pythag(double a, double b) {
+  double ct;
+  const double absa = fabs(a);
+  const double absb = fabs(b);
 
   if (absa > absb) {
     ct = absb / absa;
@@ -46,33 +46,27 @@
   }
 }
 
-int IMIN(int a, int b) { return (((a) < (b)) ? (a) : (b)); }
-
-int IMAX(int a, int b) { return (((a) < (b)) ? (b) : (a)); }
-
-void MultiplyMat(double *m1, double *m2, double *res, const int M1,
-                 const int N1, const int N2) {
-  int timesInner = N1;
-  int timesRows = M1;
-  int timesCols = N2;
+static void multiply_mat(const double *m1, const double *m2, double *res,
+                         const int m1_rows, const int inner_dim,
+                         const int m2_cols) {
   double sum;
 
   int row, col, inner;
-  for (row = 0; row < timesRows; ++row) {
-    for (col = 0; col < timesCols; ++col) {
+  for (row = 0; row < m1_rows; ++row) {
+    for (col = 0; col < m2_cols; ++col) {
       sum = 0;
-      for (inner = 0; inner < timesInner; ++inner)
-        sum += m1[row * N1 + inner] * m2[inner * N2 + col];
+      for (inner = 0; inner < inner_dim; ++inner)
+        sum += m1[row * m1_rows + inner] * m2[inner * m2_cols + col];
       *(res++) = sum;
     }
   }
 }
 
-static int svdcmp_(double **u, int m, int n, double w[], double **v) {
+static int svdcmp(double **u, int m, int n, double w[], double **v) {
   const int max_its = 30;
   int flag, i, its, j, jj, k, l, nm;
   double anorm, c, f, g, h, s, scale, x, y, z;
-  double *rv1 = (double *)malloc(sizeof(*rv1) * (n + 1));
+  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
   g = scale = anorm = 0.0;
   for (i = 0; i < n; i++) {
     l = i + 1;
@@ -86,7 +80,7 @@
           s += u[k][i] * u[k][i];
         }
         f = u[i][i];
-        g = -SIGN(sqrt(s), f);
+        g = -sign(sqrt(s), f);
         h = f * g - s;
         u[i][i] = f - g;
         for (j = l; j < n; j++) {
@@ -107,7 +101,7 @@
           s += u[i][k] * u[i][k];
         }
         f = u[i][l];
-        g = -SIGN(sqrt(s), f);
+        g = -sign(sqrt(s), f);
         h = f * g - s;
         u[i][l] = f - g;
         for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
@@ -136,8 +130,7 @@
     g = rv1[i];
     l = i;
   }
-
-  for (i = IMIN(m, n) - 1; i >= 0; i--) {
+  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
     l = i + 1;
     g = w[i];
     for (j = l; j < n; j++) u[i][j] = 0.0;
@@ -173,7 +166,7 @@
           rv1[i] = c * rv1[i];
           if ((double)(fabs(f) + anorm) == anorm) break;
           g = w[i];
-          h = PYTHAG(f, g);
+          h = pythag(f, g);
           w[i] = h;
           h = 1.0 / h;
           c = g * h;
@@ -204,8 +197,8 @@
       g = rv1[nm];
       h = rv1[k];
       f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
-      g = PYTHAG(f, 1.0);
-      f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x;
+      g = pythag(f, 1.0);
+      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
       c = s = 1.0;
       for (j = l; j <= nm; j++) {
         i = j + 1;
@@ -213,7 +206,7 @@
         y = w[i];
         h = s * g;
         g = c * g;
-        z = PYTHAG(f, h);
+        z = pythag(f, h);
         rv1[j] = z;
         c = f / z;
         s = h / z;
@@ -227,7 +220,7 @@
           v[jj][j] = x * c + z * s;
           v[jj][i] = z * c - x * s;
         }
-        z = PYTHAG(f, h);
+        z = pythag(f, h);
         w[j] = z;
         if (z) {
           z = 1.0 / z;
@@ -248,28 +241,27 @@
       w[k] = x;
     }
   }
-  free(rv1);
+  aom_free(rv1);
   return 0;
 }
 
 static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
   // Assumes allocation for U is MxN
-  double **nrU, **nrV;
+  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
+  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
   int problem, i;
 
-  nrU = (double **)malloc((M) * sizeof(*nrU));
-  nrV = (double **)malloc((N) * sizeof(*nrV));
   problem = !(nrU && nrV);
   if (!problem) {
-    problem = 0;
     for (i = 0; i < M; i++) {
       nrU[i] = &U[i * N];
     }
     for (i = 0; i < N; i++) {
       nrV[i] = &V[i * N];
     }
-  }
-  if (problem) {
+  } else {
+    if (nrU) aom_free(nrU);
+    if (nrV) aom_free(nrV);
     return 1;
   }
 
@@ -279,23 +271,25 @@
   }
 
   /* HERE IT IS: do SVD */
-  if (svdcmp_(nrU, M, N, W, nrV)) {
+  if (svdcmp(nrU, M, N, W, nrV)) {
+    aom_free(nrU);
+    aom_free(nrV);
     return 1;
   }
 
-  /* free Numerical Recipes arrays */
-  free(nrU);
-  free(nrV);
+  /* aom_free Numerical Recipes arrays */
+  aom_free(nrU);
+  aom_free(nrV);
 
   return 0;
 }
 
-int PseudoInverse(double *inv, double *matx, const int M, const int N) {
-  double *U, *W, *V, ans;
+int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
+  double ans;
   int i, j, k;
-  U = (double *)malloc(M * N * sizeof(*matx));
-  W = (double *)malloc(N * sizeof(*matx));
-  V = (double *)malloc(N * N * sizeof(*matx));
+  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
+  double *const W = (double *)aom_malloc(N * sizeof(*matx));
+  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
 
   if (!(U && W && V)) {
     return 1;
@@ -318,19 +312,19 @@
       inv[j + M * i] = ans;
     }
   }
-  free(U);
-  free(W);
-  free(V);
+  aom_free(U);
+  aom_free(W);
+  aom_free(V);
   return 0;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // ransac
-typedef int (*isDegenerateType)(double *p);
-typedef void (*normalizeType)(double *p, int np, double *T);
-typedef void (*denormalizeType)(double *H, double *T1, double *T2);
-typedef int (*findTransformationType)(int points, double *points1,
-                                      double *points2, double *H);
+typedef int (*IsDegenerateFunc)(double *p);
+typedef void (*NormalizeFunc)(double *p, int np, double *T);
+typedef void (*DenormalizeFunc)(double *params, double *T1, double *T2);
+typedef int (*FindTransformationFunc)(int points, double *points1,
+                                      double *points2, double *params);
 
 static int get_rand_indices(int npoints, int minpts, int *indices) {
   int i, j;
@@ -354,12 +348,12 @@
   return 1;
 }
 
-int ransac_(double *matched_points, int npoints, int *number_of_inliers,
-            int *best_inlier_mask, double *bestH, const int minpts,
-            const int paramdim, isDegenerateType isDegenerate,
-            normalizeType normalize, denormalizeType denormalize,
-            findTransformationType findTransformation,
-            ProjectPointsType projectpoints, TransformationType type) {
+static int ransac(double *matched_points, int npoints, int *number_of_inliers,
+                  int *best_inlier_mask, double *best_params, const int minpts,
+                  const int paramdim, IsDegenerateFunc is_degenerate,
+                  NormalizeFunc normalize, DenormalizeFunc denormalize,
+                  FindTransformationFunc find_transformation,
+                  ProjectPointsFunc projectpoints, TransformationType type) {
   static const double INLIER_THRESHOLD_NORMALIZED = 0.1;
   static const double INLIER_THRESHOLD_UNNORMALIZED = 1.0;
   static const double PROBABILITY_REQUIRED = 0.9;
@@ -375,7 +369,7 @@
 
   int max_inliers = 0;
   double best_variance = 0.0;
-  double H[MAX_PARAMDIM];
+  double params[MAX_PARAMDIM];
   WarpedMotionParams wm;
   double points1[2 * MAX_MINPTS];
   double points2[2 * MAX_MINPTS];
@@ -405,15 +399,23 @@
   }
 
   memset(&wm, 0, sizeof(wm));
-  best_inlier_set1 = (double *)malloc(sizeof(*best_inlier_set1) * npoints * 2);
-  best_inlier_set2 = (double *)malloc(sizeof(*best_inlier_set2) * npoints * 2);
-  inlier_set1 = (double *)malloc(sizeof(*inlier_set1) * npoints * 2);
-  inlier_set2 = (double *)malloc(sizeof(*inlier_set2) * npoints * 2);
-  corners1 = (double *)malloc(sizeof(*corners1) * npoints * 2);
-  corners1_int = (int *)malloc(sizeof(*corners1_int) * npoints * 2);
-  corners2 = (double *)malloc(sizeof(*corners2) * npoints * 2);
-  image1_coord = (int *)malloc(sizeof(*image1_coord) * npoints * 2);
-  inlier_mask = (int *)malloc(sizeof(*inlier_mask) * npoints);
+  best_inlier_set1 =
+      (double *)aom_malloc(sizeof(*best_inlier_set1) * npoints * 2);
+  best_inlier_set2 =
+      (double *)aom_malloc(sizeof(*best_inlier_set2) * npoints * 2);
+  inlier_set1 = (double *)aom_malloc(sizeof(*inlier_set1) * npoints * 2);
+  inlier_set2 = (double *)aom_malloc(sizeof(*inlier_set2) * npoints * 2);
+  corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2);
+  corners1_int = (int *)aom_malloc(sizeof(*corners1_int) * npoints * 2);
+  corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2);
+  image1_coord = (int *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
+  inlier_mask = (int *)aom_malloc(sizeof(*inlier_mask) * npoints);
+
+  if (!(best_inlier_set1 && best_inlier_set2 && inlier_set1 && inlier_set2 &&
+        corners1 && corners1_int && corners2 && image1_coord && inlier_mask)) {
+    ret_val = 1;
+    goto finish_ransac;
+  }
 
   for (cnp1 = corners1, cnp2 = corners2, i = 0; i < npoints; ++i) {
     *(cnp1++) = *(matched_points++);
@@ -451,14 +453,14 @@
         points2[i * 2 + 1] = corners2[index * 2 + 1];
         i++;
       }
-      degenerate = isDegenerate(points1);
+      degenerate = is_degenerate(points1);
       if (num_degenerate_iter > MAX_DEGENERATE_ITER) {
         ret_val = 1;
         goto finish_ransac;
       }
     }
 
-    if (findTransformation(minpts, points1, points2, H)) {
+    if (find_transformation(minpts, points1, points2, params)) {
       trial_count++;
       continue;
     }
@@ -468,7 +470,7 @@
       corners1_int[2 * i + 1] = (int)corners1[i * 2 + 1];
     }
 
-    av1_integerize_model(H, type, &wm);
+    av1_integerize_model(params, type, &wm);
     projectpoints((int16_t *)wm.wmmat, corners1_int, image1_coord, npoints, 2,
                   2, 0, 0);
 
@@ -500,7 +502,7 @@
           (num_inliers == max_inliers && variance < best_variance)) {
         best_variance = variance;
         max_inliers = num_inliers;
-        memcpy(bestH, H, paramdim * sizeof(*bestH));
+        memcpy(best_params, params, paramdim * sizeof(*best_params));
         memcpy(best_inlier_set1, inlier_set1,
                num_inliers * 2 * sizeof(*best_inlier_set1));
         memcpy(best_inlier_set2, inlier_set2,
@@ -516,33 +518,34 @@
           pNoOutliers = fmin(1 - EPS, pNoOutliers);
           temp = (int)(log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers));
           if (temp > 0 && temp < N) {
-            N = IMAX(temp, MIN_TRIALS);
+            N = AOMMAX(temp, MIN_TRIALS);
           }
         }
       }
     }
     trial_count++;
   }
-  findTransformation(max_inliers, best_inlier_set1, best_inlier_set2, bestH);
+  find_transformation(max_inliers, best_inlier_set1, best_inlier_set2,
+                      best_params);
   if (normalize && denormalize) {
-    denormalize(bestH, T1, T2);
+    denormalize(best_params, T1, T2);
   }
   *number_of_inliers = max_inliers;
 finish_ransac:
-  free(best_inlier_set1);
-  free(best_inlier_set2);
-  free(inlier_set1);
-  free(inlier_set2);
-  free(corners1);
-  free(corners2);
-  free(image1_coord);
-  free(inlier_mask);
+  aom_free(best_inlier_set1);
+  aom_free(best_inlier_set2);
+  aom_free(inlier_set1);
+  aom_free(inlier_set2);
+  aom_free(corners1);
+  aom_free(corners2);
+  aom_free(image1_coord);
+  aom_free(inlier_mask);
   return ret_val;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
-static void normalizeHomography(double *pts, int n, double *T) {
+static void normalize_homography(double *pts, int n, double *T) {
   // Assume the points are 2d coordinates with scale = 1
   double *p = pts;
   double mean[2] = { 0, 0 };
@@ -592,63 +595,63 @@
   iT[8] = 1;
 }
 
-static void denormalizeHomography(double *H, double *T1, double *T2) {
+static void denormalize_homography(double *params, double *T1, double *T2) {
   double iT2[9];
-  double H2[9];
+  double params2[9];
   invnormalize_mat(T2, iT2);
-  MultiplyMat(H, T1, H2, 3, 3, 3);
-  MultiplyMat(iT2, H2, H, 3, 3, 3);
+  multiply_mat(params, T1, params2, 3, 3, 3);
+  multiply_mat(iT2, params2, params, 3, 3, 3);
 }
 
-static void denormalizeAffine(double *H, double *T1, double *T2) {
-  double Ha[MAX_PARAMDIM];
-  Ha[0] = H[0];
-  Ha[1] = H[1];
-  Ha[2] = H[4];
-  Ha[3] = H[2];
-  Ha[4] = H[3];
-  Ha[5] = H[5];
-  Ha[6] = Ha[7] = 0;
-  Ha[8] = 1;
-  denormalizeHomography(Ha, T1, T2);
-  H[0] = Ha[5];
-  H[1] = Ha[2];
-  H[2] = Ha[1];
-  H[3] = Ha[0];
-  H[4] = Ha[3];
-  H[5] = Ha[4];
+static void denormalize_affine(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = params[0];
+  params_denorm[1] = params[1];
+  params_denorm[2] = params[4];
+  params_denorm[3] = params[2];
+  params_denorm[4] = params[3];
+  params_denorm[5] = params[5];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[5];
+  params[1] = params_denorm[2];
+  params[2] = params_denorm[1];
+  params[3] = params_denorm[0];
+  params[4] = params_denorm[3];
+  params[5] = params_denorm[4];
 }
 
-static void denormalizeRotZoom(double *H, double *T1, double *T2) {
-  double Ha[MAX_PARAMDIM];
-  Ha[0] = H[0];
-  Ha[1] = H[1];
-  Ha[2] = H[2];
-  Ha[3] = -H[1];
-  Ha[4] = H[0];
-  Ha[5] = H[3];
-  Ha[6] = Ha[7] = 0;
-  Ha[8] = 1;
-  denormalizeHomography(Ha, T1, T2);
-  H[0] = Ha[5];
-  H[1] = Ha[2];
-  H[2] = Ha[1];
-  H[3] = Ha[0];
+static void denormalize_rotzoom(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = params[0];
+  params_denorm[1] = params[1];
+  params_denorm[2] = params[2];
+  params_denorm[3] = -params[1];
+  params_denorm[4] = params[0];
+  params_denorm[5] = params[3];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[5];
+  params[1] = params_denorm[2];
+  params[2] = params_denorm[1];
+  params[3] = params_denorm[0];
 }
 
-static void denormalizeTranslation(double *H, double *T1, double *T2) {
-  double Ha[MAX_PARAMDIM];
-  Ha[0] = 1;
-  Ha[1] = 0;
-  Ha[2] = H[0];
-  Ha[3] = 0;
-  Ha[4] = 1;
-  Ha[5] = H[1];
-  Ha[6] = Ha[7] = 0;
-  Ha[8] = 1;
-  denormalizeHomography(Ha, T1, T2);
-  H[0] = Ha[5];
-  H[1] = Ha[2];
+static void denormalize_translation(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = 1;
+  params_denorm[1] = 0;
+  params_denorm[2] = params[0];
+  params_denorm[3] = 0;
+  params_denorm[4] = 1;
+  params_denorm[5] = params[1];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[5];
+  params[1] = params_denorm[2];
 }
 
 static int is_collinear3(double *p1, double *p2, double *p3) {
@@ -658,27 +661,27 @@
   return fabs(v) < collinear_eps;
 }
 
-static int isDegenerateTranslation(double *p) {
+static int is_degenerate_translation(double *p) {
   return (p[0] - p[2]) * (p[0] - p[2]) + (p[1] - p[3]) * (p[1] - p[3]) <= 2;
 }
 
-static int isDegenerateAffine(double *p) {
+static int is_degenerate_affine(double *p) {
   return is_collinear3(p, p + 2, p + 4);
 }
 
-static int isDegenerateHomography(double *p) {
+static int is_degenerate_homography(double *p) {
   return is_collinear3(p, p + 2, p + 4) || is_collinear3(p, p + 2, p + 6) ||
          is_collinear3(p, p + 4, p + 6) || is_collinear3(p + 2, p + 4, p + 6);
 }
 
-int findTranslation(const int np, double *pts1, double *pts2, double *mat) {
+int find_translation(const int np, double *pts1, double *pts2, double *mat) {
   int i;
   double sx, sy, dx, dy;
   double sumx, sumy;
 
   double T1[9], T2[9];
-  normalizeHomography(pts1, np, T1);
-  normalizeHomography(pts2, np, T2);
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
 
   sumx = 0;
   sumy = 0;
@@ -693,21 +696,21 @@
   }
   mat[0] = sumx / np;
   mat[1] = sumy / np;
-  denormalizeTranslation(mat, T1, T2);
+  denormalize_translation(mat, T1, T2);
   return 0;
 }
 
-int findRotZoom(const int np, double *pts1, double *pts2, double *mat) {
+int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
   const int np2 = np * 2;
-  double *a = (double *)malloc(sizeof(*a) * np2 * 9);
+  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
   double *b = a + np2 * 4;
   double *temp = b + np2;
   int i;
   double sx, sy, dx, dy;
 
   double T1[9], T2[9];
-  normalizeHomography(pts1, np, T1);
-  normalizeHomography(pts2, np, T2);
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
 
   for (i = 0; i < np; ++i) {
     dx = *(pts2++);
@@ -727,27 +730,27 @@
     b[2 * i] = dx;
     b[2 * i + 1] = dy;
   }
-  if (PseudoInverse(temp, a, np2, 4)) {
-    free(a);
+  if (pseudo_inverse(temp, a, np2, 4)) {
+    aom_free(a);
     return 1;
   }
-  MultiplyMat(temp, b, mat, 4, np2, 1);
-  denormalizeRotZoom(mat, T1, T2);
-  free(a);
+  multiply_mat(temp, b, mat, 4, np2, 1);
+  denormalize_rotzoom(mat, T1, T2);
+  aom_free(a);
   return 0;
 }
 
-int findAffine(const int np, double *pts1, double *pts2, double *mat) {
+int find_affine(const int np, double *pts1, double *pts2, double *mat) {
   const int np2 = np * 2;
-  double *a = (double *)malloc(sizeof(*a) * np2 * 13);
+  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
   double *b = a + np2 * 6;
   double *temp = b + np2;
   int i;
   double sx, sy, dx, dy;
 
   double T1[9], T2[9];
-  normalizeHomography(pts1, np, T1);
-  normalizeHomography(pts2, np, T2);
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
 
   for (i = 0; i < np; ++i) {
     dx = *(pts2++);
@@ -771,28 +774,28 @@
     b[2 * i] = dx;
     b[2 * i + 1] = dy;
   }
-  if (PseudoInverse(temp, a, np2, 6)) {
-    free(a);
+  if (pseudo_inverse(temp, a, np2, 6)) {
+    aom_free(a);
     return 1;
   }
-  MultiplyMat(temp, b, mat, 6, np2, 1);
-  denormalizeAffine(mat, T1, T2);
-  free(a);
+  multiply_mat(temp, b, mat, 6, np2, 1);
+  denormalize_affine(mat, T1, T2);
+  aom_free(a);
   return 0;
 }
 
-int findHomography(const int np, double *pts1, double *pts2, double *mat) {
+int find_homography(const int np, double *pts1, double *pts2, double *mat) {
   // Implemented from Peter Kovesi's normalized implementation
   const int np3 = np * 3;
-  double *a = (double *)malloc(sizeof(*a) * np3 * 18);
+  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
   double *U = a + np3 * 9;
   double S[9], V[9 * 9];
   int i, mini;
   double sx, sy, dx, dy;
   double T1[9], T2[9];
 
-  normalizeHomography(pts1, np, T1);
-  normalizeHomography(pts2, np, T2);
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
 
   for (i = 0; i < np; ++i) {
     dx = *(pts2++);
@@ -828,7 +831,7 @@
   }
 
   if (SVD(U, S, V, a, np3, 9)) {
-    free(a);
+    aom_free(a);
     return 1;
   } else {
     double minS = 1e12;
@@ -842,100 +845,57 @@
   }
 
   for (i = 0; i < 9; i++) mat[i] = V[i * 9 + mini];
-  denormalizeHomography(mat, T1, T2);
-  free(a);
+  denormalize_homography(mat, T1, T2);
+  aom_free(a);
   if (mat[8] == 0.0) {
     return 1;
   }
   return 0;
 }
 
-int findHomographyScale1(const int np, double *pts1, double *pts2,
-                         double *mat) {
-  // This implementation assumes h33 = 1, but does not seem to give good results
-  const int np2 = np * 2;
-  double *a = (double *)malloc(sizeof(*a) * np2 * 17);
-  double *b = a + np2 * 8;
-  double *temp = b + np2;
-  int i, j;
-  double sx, sy, dx, dy;
-  double T1[9], T2[9];
-
-  normalizeHomography(pts1, np, T1);
-  normalizeHomography(pts2, np, T2);
-
-  for (i = 0, j = np; i < np; ++i, ++j) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-    a[i * 8 + 0] = a[j * 8 + 3] = sx;
-    a[i * 8 + 1] = a[j * 8 + 4] = sy;
-    a[i * 8 + 2] = a[j * 8 + 5] = 1;
-    a[i * 8 + 3] = a[i * 8 + 4] = a[i * 8 + 5] = a[j * 8 + 0] = a[j * 8 + 1] =
-        a[j * 8 + 2] = 0;
-    a[i * 8 + 6] = -dx * sx;
-    a[i * 8 + 7] = -dx * sy;
-    a[j * 8 + 6] = -dy * sx;
-    a[j * 8 + 7] = -dy * sy;
-    b[i] = dx;
-    b[j] = dy;
-  }
-
-  if (PseudoInverse(temp, a, np2, 8)) {
-    free(a);
-    return 1;
-  }
-  MultiplyMat(temp, b, &*mat, 8, np2, 1);
-  mat[8] = 1;
-
-  denormalizeHomography(mat, T1, T2);
-  free(a);
-  return 0;
+int ransac_translation(double *matched_points, int npoints,
+                       int *number_of_inliers, int *best_inlier_mask,
+                       double *best_params) {
+  return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
+                best_params, 3, 2, is_degenerate_translation,
+                NULL,  // normalize_homography,
+                NULL,  // denormalize_rotzoom,
+                find_translation, project_points_translation, TRANSLATION);
 }
 
-int ransacTranslation(double *matched_points, int npoints,
+int ransac_rotzoom(double *matched_points, int npoints, int *number_of_inliers,
+                   int *best_inlier_mask, double *best_params) {
+  return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
+                best_params, 3, 4, is_degenerate_affine,
+                NULL,  // normalize_homography,
+                NULL,  // denormalize_rotzoom,
+                find_rotzoom, project_points_rotzoom, ROTZOOM);
+}
+
+int ransac_affine(double *matched_points, int npoints, int *number_of_inliers,
+                  int *best_inlier_mask, double *best_params) {
+  return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
+                best_params, 3, 6, is_degenerate_affine,
+                NULL,  // normalize_homography,
+                NULL,  // denormalize_affine,
+                find_affine, project_points_affine, AFFINE);
+}
+
+int ransac_homography(double *matched_points, int npoints,
                       int *number_of_inliers, int *best_inlier_mask,
-                      double *bestH) {
-  return ransac_(matched_points, npoints, number_of_inliers, best_inlier_mask,
-                 bestH, 3, 2, isDegenerateTranslation,
-                 NULL,  // normalizeHomography,
-                 NULL,  // denormalizeRotZoom,
-                 findTranslation, projectPointsTranslation, TRANSLATION);
-}
-
-int ransacRotZoom(double *matched_points, int npoints, int *number_of_inliers,
-                  int *best_inlier_mask, double *bestH) {
-  return ransac_(matched_points, npoints, number_of_inliers, best_inlier_mask,
-                 bestH, 3, 4, isDegenerateAffine,
-                 NULL,  // normalizeHomography,
-                 NULL,  // denormalizeRotZoom,
-                 findRotZoom, projectPointsRotZoom, ROTZOOM);
-}
-
-int ransacAffine(double *matched_points, int npoints, int *number_of_inliers,
-                 int *best_inlier_mask, double *bestH) {
-  return ransac_(matched_points, npoints, number_of_inliers, best_inlier_mask,
-                 bestH, 3, 6, isDegenerateAffine,
-                 NULL,  // normalizeHomography,
-                 NULL,  // denormalizeAffine,
-                 findAffine, projectPointsAffine, AFFINE);
-}
-
-int ransacHomography(double *matched_points, int npoints,
-                     int *number_of_inliers, int *best_inlier_mask,
-                     double *bestH) {
-  int result = ransac_(matched_points, npoints, number_of_inliers,
-                       best_inlier_mask, bestH, 4, 8, isDegenerateHomography,
-                       NULL,  // normalizeHomography,
-                       NULL,  // denormalizeHomography,
-                       findHomography, projectPointsHomography, HOMOGRAPHY);
+                      double *best_params) {
+  const int result =
+      ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
+             best_params, 4, 8, is_degenerate_homography,
+             NULL,  // normalize_homography,
+             NULL,  // denormalize_homography,
+             find_homography, project_points_homography, HOMOGRAPHY);
   if (!result) {
     // normalize so that H33 = 1
     int i;
-    double m = 1.0 / bestH[8];
-    for (i = 0; i < 8; ++i) bestH[i] *= m;
-    bestH[8] = 1.0;
+    const double m = 1.0 / best_params[8];
+    for (i = 0; i < 8; ++i) best_params[i] *= m;
+    best_params[8] = 1.0;
   }
   return result;
 }
diff --git a/av1/encoder/ransac.h b/av1/encoder/ransac.h
index c8fbdc8..f75d70f 100644
--- a/av1/encoder/ransac.h
+++ b/av1/encoder/ransac.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -18,20 +18,20 @@
 
 #include "av1/common/warped_motion.h"
 
-typedef int (*RansacType)(double *matched_points, int npoints,
+typedef int (*RansacFunc)(double *matched_points, int npoints,
                           int *number_of_inliers, int *best_inlier_mask,
-                          double *bestH);
+                          double *best_params);
 
 /* Each of these functions fits a motion model from a set of
 corresponding points in 2 frames using RANSAC.*/
-int ransacHomography(double *matched_points, int npoints,
-                     int *number_of_inliers, int *best_inlier_indices,
-                     double *bestH);
-int ransacAffine(double *matched_points, int npoints, int *number_of_inliers,
-                 int *best_inlier_indices, double *bestH);
-int ransacRotZoom(double *matched_points, int npoints, int *number_of_inliers,
-                  int *best_inlier_indices, double *bestH);
-int ransacTranslation(double *matched_points, int npoints,
+int ransac_homography(double *matched_points, int npoints,
                       int *number_of_inliers, int *best_inlier_indices,
-                      double *bestH);
-#endif  // AV1_ENCODER_RANSAC_H
+                      double *best_params);
+int ransac_affine(double *matched_points, int npoints, int *number_of_inliers,
+                  int *best_inlier_indices, double *best_params);
+int ransac_rotzoom(double *matched_points, int npoints, int *number_of_inliers,
+                   int *best_inlier_indices, double *best_params);
+int ransac_translation(double *matched_points, int npoints,
+                       int *number_of_inliers, int *best_inlier_indices,
+                       double *best_params);
+#endif  // AV1_ENCODER_RANSAC_H_
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 5660369..b56e3c1 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -146,6 +146,10 @@
     av1_cost_tokens(cpi->intra_filter_cost[i], fc->intra_filter_probs[i],
                     av1_intra_filter_tree);
 #endif  // CONFIG_EXT_INTRA
+#if CONFIG_LOOP_RESTORATION
+  av1_cost_tokens(cpi->switchable_restore_cost, fc->switchable_restore_prob,
+                  av1_switchable_restore_tree);
+#endif  // CONFIG_LOOP_RESTORATION
 }
 
 void av1_fill_token_costs(av1_coeff_cost *c,
diff --git a/third_party/fastfeat/README b/third_party/fastfeat/README.libvpx
similarity index 77%
rename from third_party/fastfeat/README
rename to third_party/fastfeat/README.libvpx
index 0355999..2edd6e7 100644
--- a/third_party/fastfeat/README
+++ b/third_party/fastfeat/README.libvpx
@@ -1,8 +1,10 @@
-This code was taken from http://www.edwardrosten.com/work/fast.html. Fast 10, 11, and 12
-have been deleted.
+URL: https://github.com/edrosten/fast-C-src
+Version: 391d5e939eb1545d24c10533d7de424db8d9c191
+License: BSD
+License File: LICENSE
 
-FAST feature detectors in C Version 2.0
----------------------------------------
+Description:
+Library to compute FAST features with non-maximum suppression.
 
 The files are valid C and C++ code, and have no special requirements for
 compiling, and they do not depend on any libraries. Just compile them along with
@@ -31,4 +33,6 @@
 The detection, scoring and nonmaximal suppression are available as individual
 functions.  To see how to use the individual functions, see fast.c
 
-
+Local Modifications:
+Add lines to turn off clang formatting for these files
+Remove Fast 10, 11 and 12