Correct the misleading codes in encode_inter_mb_segment()

In encode_inter_mb_segment(), when BLOCK_8X4 or BLOCK_4X8 is
passed, the nested loop inside it iterates always twice.
(For BLOCK_4X4, loop iterates only once because encode_inter_mb_segment()
is called for each of 4X4 block.)
Then, the k for 1st iteration is always zero, and the k for 2nd
iteration is always (idy * 2 + idx) with either idy == 1 or idx == 1
depending on the sb_type.

Using "+=" there could mislead readers expecting that
the # of iterations is more.
And probably using simple assignment would be more proper here.

Change-Id: I7a11255eca13403bc090ba4f0cd4785db9f0e541
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e139eab..1bd9dfa 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4526,7 +4526,6 @@
                                        ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
                                        int ir, int ic, int mi_row, int mi_col) {
   const AV1_COMMON *const cm = &cpi->common;
-  int k;
   MACROBLOCKD *xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
   struct macroblock_plane *const p = &x->plane[0];
@@ -4584,14 +4583,14 @@
                      8, src, p->src.stride, dst, pd->dst.stride);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-  k = i;
   for (idy = 0; idy < txb_height; idy += num_4x4_h) {
     for (idx = 0; idx < txb_width; idx += num_4x4_w) {
       int64_t dist, ssz, rd, rd1, rd2;
       int block;
       int coeff_ctx;
+      int k;
 
-      k += (idy * 2 + idx);
+      k = i + (idy * 2 + idx);
       if (tx_size == TX_4X4)
         block = k;
       else