Optimize get_br_ctx

1. Unroll loop in get_level_mag_with_txclass
2. Remove useless AOMMIN for each nb_mag.

3. The encoder, about 1.1% faster shows by
encoding 20 frames of foreman_cif.y4m.
(Config with CONFIG_LOWBITDEPTH=1)

Change-Id: Iaa150b499f18cb9195a0411e73b9abe98511ccc9
diff --git a/av1/common/txb_common.h b/av1/common/txb_common.h
index b466cf3..d64bc00 100644
--- a/av1/common/txb_common.h
+++ b/av1/common/txb_common.h
@@ -193,18 +193,6 @@
   return count;
 }
 
-static INLINE void get_level_mag_with_txclass(const uint8_t *const levels,
-                                              const int stride, const int row,
-                                              const int col, int *const mag,
-                                              const TX_CLASS tx_class) {
-  for (int idx = 0; idx < CONTEXT_MAG_POSITION_NUM; ++idx) {
-    const int ref_row = row + mag_ref_offset_with_txclass[tx_class][idx][0];
-    const int ref_col = col + mag_ref_offset_with_txclass[tx_class][idx][1];
-    const int pos = ref_row * stride + ref_col;
-    mag[idx] = levels[pos];
-  }
-}
-
 static INLINE void get_level_mag(const uint8_t *const levels, const int stride,
                                  const int row, const int col, int *const mag) {
   for (int idx = 0; idx < CONTEXT_MAG_POSITION_NUM; ++idx) {
@@ -380,27 +368,32 @@
   const int row = c >> bwl;
   const int col = c - (row << bwl);
   const int stride = (1 << bwl) + TX_PAD_HOR;
-  int mag = 0;
-  int nb_mag[3] = { 0 };
   const TX_CLASS tx_class = tx_type_to_class[tx_type];
-  get_level_mag_with_txclass(levels, stride, row, col, nb_mag, tx_class);
-
-  mag = AOMMIN(nb_mag[0], MAX_BASE_BR_RANGE) +
-        AOMMIN(nb_mag[1], MAX_BASE_BR_RANGE) +
-        AOMMIN(nb_mag[2], MAX_BASE_BR_RANGE);
-  mag = AOMMIN((mag + 1) >> 1, 6);
-  if (c == 0) return mag;
-  if (tx_class == TX_CLASS_2D) {
-    if ((row < 2) && (col < 2)) return mag + 7;
-  } else {
-    if (tx_class == TX_CLASS_HORIZ) {
+  const int pos = row * stride + col;
+  int mag = levels[pos + 1];
+  mag += levels[pos + stride];
+  switch (tx_class) {
+    case TX_CLASS_2D:
+      mag += levels[pos + stride + 1];
+      mag = AOMMIN((mag + 1) >> 1, 6);
+      if (c == 0) return mag;
+      if ((row < 2) && (col < 2)) return mag + 7;
+      break;
+    case TX_CLASS_HORIZ:
+      mag += levels[pos + 2];
+      mag = AOMMIN((mag + 1) >> 1, 6);
+      if (c == 0) return mag;
       if (col == 0) return mag + 7;
-    } else {
-      if (tx_class == TX_CLASS_VERT) {
-        if (row == 0) return mag + 7;
-      }
-    }
+      break;
+    case TX_CLASS_VERT:
+      mag += levels[pos + (stride << 1)];
+      mag = AOMMIN((mag + 1) >> 1, 6);
+      if (c == 0) return mag;
+      if (row == 0) return mag + 7;
+      break;
+    default: break;
   }
+
   return mag + 14;
 }