diff --git a/av1/common/spherical_pred.c b/av1/common/spherical_pred.c
index 3b9a125..9fba493 100644
--- a/av1/common/spherical_pred.c
+++ b/av1/common/spherical_pred.c
@@ -176,7 +176,7 @@
 
       cur_x = x - fo + k;
       cur_x = cur_x % frame_width;
-      cur_x = cur_x > 0 ? cur_x : frame_width + cur_x;
+      cur_x = cur_x >= 0 ? cur_x : frame_width + cur_x;
 
       sum += ref_frame[cur_x + cur_y * ref_frame_stride] * coeff;
     }
@@ -452,8 +452,10 @@
   // Small Diamond Search Pattern on shpere
   SphereMV sdsp_mv[5];
 
-  double search_step_phi = 0.5 * block_height * PI / frame_height;
-  double search_step_theta = 0.5 * block_width * 2 * PI / frame_width;
+  double search_step_phi =
+      AOMMIN(0.2 * block_height, 0.5 * search_range) * PI / frame_height;
+  double search_step_theta =
+      AOMMIN(0.2 * block_width, 0.5 * search_range) * 2 * PI / frame_width;
 
   const uint8_t *cur_block = &cur_frame[block_x + block_y * frame_stride];
   uint8_t pred_block[128 * 128];
@@ -464,8 +466,10 @@
   av1_plane_to_sphere_erp(block_x, block_y, frame_width, frame_height,
                           &start_phi, &start_theta);
 
-  double max_range_phi = start_phi + search_range * PI / frame_height;
-  double min_range_phi = start_phi - search_range * PI / frame_height;
+  double max_range_phi =
+      start_phi + start_mv->phi + search_range * PI / frame_height;
+  double min_range_phi =
+      start_phi + start_mv->theta - search_range * PI / frame_height;
   double max_range_theta = start_theta + search_range * 2 * PI / frame_height;
   double min_range_theta = start_theta - search_range * 2 * PI / frame_height;
 
@@ -480,10 +484,14 @@
                    frame_height, pred_block_stride, pred_block);
   best_sad = get_sad_of_blocks(cur_block, pred_block, block_width, block_height,
                                frame_stride, pred_block_stride);
+  update_sphere_mv_ldsp(ldsp_mv, search_step_phi, search_step_theta);
 
-  do {
-    update_sphere_mv_ldsp(ldsp_mv, search_step_phi, search_step_theta);
+  const double min_scale = 0.125;
 
+  while (start_phi + ldsp_mv[5].phi <= max_range_phi &&
+         start_phi + ldsp_mv[1].phi >= min_range_phi &&
+         start_theta + ldsp_mv[3].theta <= max_range_theta &&
+         start_theta + ldsp_mv[7].theta >= min_range_theta) {
     for (int i = 0; i < 9; i++) {
       av1_get_pred_erp(block_x, block_y, block_width, block_height,
                        ldsp_mv[i].phi, ldsp_mv[i].theta, ref_frame,
@@ -501,11 +509,10 @@
     }  // for
 
     if (best_mv_idx == 0) {
-      if (search_step_phi > PI / frame_height &&
-          search_step_theta > 2 * PI / frame_height) {
+      if (search_step_phi > min_scale * PI / frame_height &&
+          search_step_theta > min_scale * 2 * PI / frame_height) {
         search_step_phi *= 0.5;
         search_step_theta *= 0.5;
-        continue;
       } else {
         break;
       }
@@ -514,31 +521,35 @@
       ldsp_mv[0].theta = ldsp_mv[best_mv_idx].theta;
       best_mv_idx = 0;
     }
-  } while (start_phi + ldsp_mv[5].phi <= max_range_phi &&
-           start_phi + ldsp_mv[1].phi >= min_range_phi &&
-           start_theta + ldsp_mv[3].theta <= max_range_theta &&
-           start_theta + ldsp_mv[7].theta >= min_range_theta);
+    update_sphere_mv_ldsp(ldsp_mv, search_step_phi, search_step_theta);
+  }
 
-  sdsp_mv[0].phi = ldsp_mv[best_mv_idx].phi;
-  sdsp_mv[0].theta = ldsp_mv[best_mv_idx].theta;
+  sdsp_mv[0].phi = ldsp_mv[0].phi;
+  sdsp_mv[0].theta = ldsp_mv[0].theta;
   best_mv_idx = 0;
-  search_step_phi = PI / frame_height;
-  search_step_theta = 2 * PI / frame_width;
-
+  search_step_phi = min_scale * PI / frame_height;
+  search_step_theta = min_scale * 2 * PI / frame_width;
   update_sphere_mv_sdsp(sdsp_mv, search_step_phi, search_step_theta);
-  for (int i = 0; i < 5; i++) {
-    av1_get_pred_erp(block_x, block_y, block_width, block_height,
-                     sdsp_mv[i].phi, sdsp_mv[i].theta, ref_frame, frame_stride,
-                     frame_width, frame_height, pred_block_stride, pred_block);
+  if (start_phi + sdsp_mv[3].phi <= max_range_phi &&
+      start_phi + sdsp_mv[1].phi >= min_range_phi &&
+      start_theta + sdsp_mv[2].theta <= max_range_theta &&
+      start_theta + sdsp_mv[4].theta >= min_range_theta) {
+    for (int i = 0; i < 5; i++) {
+      av1_get_pred_erp(block_x, block_y, block_width, block_height,
+                       sdsp_mv[i].phi, sdsp_mv[i].theta, ref_frame,
+                       frame_stride, frame_width, frame_height,
+                       pred_block_stride, pred_block);
 
-    temp_sad = get_sad_of_blocks(cur_block, pred_block, block_width,
-                                 block_height, frame_stride, pred_block_stride);
+      temp_sad =
+          get_sad_of_blocks(cur_block, pred_block, block_width, block_height,
+                            frame_stride, pred_block_stride);
 
-    if (temp_sad < best_sad) {
-      best_sad = temp_sad;
-      best_mv_idx = i;
-    }
-  }  // for
+      if (temp_sad < best_sad) {
+        best_sad = temp_sad;
+        best_mv_idx = i;
+      }
+    }  // for
+  }
 
   best_mv->phi = sdsp_mv[best_mv_idx].phi;
   best_mv->theta = sdsp_mv[best_mv_idx].theta;
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index a34f62c..8730a44 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -147,14 +147,15 @@
   SphereMV best_sp_mv, start_sp_mv;
   // TODO(bohanli,yaoyaogoogle): start with mbmi->mv[0] may also make sense?
   const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
+  const MV start_mv = mbmi->mv[0].as_mv;
   // assume ref_mv is converted from sphere MV for the top left pixel of the
   // block
   double blk_phi, blk_theta, this_phi, this_theta;
   double this_x, this_y;
   av1_plane_to_sphere_erp(block_x, block_y, frame_width, frame_height, &blk_phi,
                           &blk_theta);
-  av1_plane_to_sphere_erp(block_x + (double)ref_mv.col / 8.0,
-                          block_y + (double)ref_mv.row / 8.0, frame_width,
+  av1_plane_to_sphere_erp(block_x + (double)start_mv.col / 8.0,
+                          block_y + (double)start_mv.row / 8.0, frame_width,
                           frame_height, &this_phi, &this_theta);
   start_sp_mv.phi = this_phi - blk_phi;
   start_sp_mv.theta = this_theta - blk_theta;
@@ -184,6 +185,20 @@
   best_mv->as_mv.col = (int16_t)round((mv_x)*8.0);
   best_mv->as_mv.row = (int16_t)round((mv_y)*8.0);
 
+  SubpelMvLimits mv_limits;
+  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv);
+  clamp_mv(&best_mv->as_mv, &mv_limits);
+
+  if (cpi->common.features.allow_high_precision_mv == 0) {
+    best_mv->as_mv.col = 2 * ((best_mv->as_mv.col + 1) / 2);
+    best_mv->as_mv.row = 2 * ((best_mv->as_mv.row + 1) / 2);
+  }
+
+  if (cpi->common.features.cur_frame_force_integer_mv) {
+    best_mv->as_mv.col = 8 * ((best_mv->as_mv.col + 4) / 8);
+    best_mv->as_mv.row = 8 * ((best_mv->as_mv.row + 4) / 8);
+  }
+
   *rate_mv = av1_mv_bit_cost(&best_mv->as_mv, &ref_mv, mv_costs->nmv_joint_cost,
                              mv_costs->mv_cost_stack, MV_COST_WEIGHT);
 }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index a14fbed..2d92cb5 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1376,6 +1376,8 @@
       assert(!is_comp_pred);
 #if CONFIG_SPHERICAL_PRED
       if (have_newmv_in_inter_mode(this_mode)) {
+        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
+                                 &mbmi->mv[0], NULL);
         av1_erp_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, 30, &mbmi->mv[0]);
         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
       }
