Pad extra columns in txb levels and signs

This helps the removal of horizontal availability check.

Change-Id: Ie3b97eea63b4cc79ec78df119c2730a4d7cf539d
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index fa31e03..7549795 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -245,7 +245,8 @@
 static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc,
                                  const TxbInfo *const txb_info) {
   txb_info->qcoeff[coeff_idx] = qc;
-  txb_info->levels[coeff_idx] = (uint8_t)clamp(abs(qc), 0, UINT8_MAX);
+  txb_info->levels[get_paded_idx(coeff_idx, txb_info->bwl)] =
+      (uint8_t)clamp(abs(qc), 0, UINT8_MAX);
 }
 
 static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc,
@@ -256,16 +257,23 @@
 }
 
 static INLINE void av1_txb_init_levels(const tran_low_t *const coeff,
-                                       const int width, const int size,
+                                       const int width, const int height,
                                        uint8_t *const levels) {
   const int stride = width + TX_PAD_HOR;
+  uint8_t *ls = levels;
 
   memset(levels - TX_PAD_TOP * stride, 0,
          sizeof(*levels) * TX_PAD_TOP * stride);
-  memset(levels + size, 0, sizeof(*levels) * TX_PAD_BOTTOM * stride);
+  memset(levels + stride * height, 0,
+         sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
 
-  for (int i = 0; i < size; i++) {
-    levels[i] = (uint8_t)clamp(abs(coeff[i]), 0, UINT8_MAX);
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, UINT8_MAX);
+    }
+    for (int j = 0; j < TX_PAD_HOR; j++) {
+      *ls++ = 0;
+    }
   }
 }
 
@@ -298,7 +306,7 @@
 
   if (eob == 0) return;
 
-  av1_txb_init_levels(tcoeff, width, seg_eob, levels);
+  av1_txb_init_levels(tcoeff, width, height, levels);
 
 #if CONFIG_TXK_SEL
   av1_write_tx_type(cm, xd, blk_row, blk_col, block, plane,
@@ -500,19 +508,17 @@
                                     int ctx_set[NUM_BASE_LEVELS]) {
   const int row = c >> bwl;
   const int col = c - (row << bwl);
-  const int stride = 1 << bwl;
+  const int width = 1 << bwl;
+  const int stride = width + TX_PAD_HOR;
   int mag_count[NUM_BASE_LEVELS] = { 0 };
   int nb_mag[NUM_BASE_LEVELS][3] = { { 0 } };
   int idx;
   int i;
 
   for (idx = 0; idx < BASE_CONTEXT_POSITION_NUM; ++idx) {
-    int ref_row = row + base_ref_offset[idx][0];
-    int ref_col = col + base_ref_offset[idx][1];
-    int pos = (ref_row << bwl) + ref_col;
-
-    if (ref_col < 0 || ref_col >= stride) continue;
-
+    const int ref_row = row + base_ref_offset[idx][0];
+    const int ref_col = col + base_ref_offset[idx][1];
+    const int pos = ref_row * stride + ref_col;
     const uint8_t abs_coeff = levels[pos];
 
     for (i = 0; i < NUM_BASE_LEVELS; ++i) {
@@ -593,7 +599,7 @@
   }
   cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
 
-  av1_txb_init_levels(qcoeff, width, tx_size_2d[tx_size], levels);
+  av1_txb_init_levels(qcoeff, width, height, levels);
 
 #if CONFIG_TXK_SEL
   cost += av1_tx_type_cost(cm, x, xd, mbmi->sb_type, plane, tx_size, tx_type);
@@ -1106,10 +1112,10 @@
     for (int i = 0; i < ref_num; ++i) {
       const int nb_row = row - ref_offset[i][0];
       const int nb_col = col - ref_offset[i][1];
-      const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+      const int nb_coeff_idx = nb_row * txb_info->width + nb_col;
 
       if (nb_row < 0 || nb_col < 0 || nb_row >= txb_info->height ||
-          nb_col >= txb_info->stride)
+          nb_col >= txb_info->width)
         continue;
 
       const int nb_scan_idx = iscan[nb_coeff_idx];
@@ -1142,10 +1148,10 @@
     for (int i = 0; i < ref_num; ++i) {
       const int nb_row = row - ref_offset[i][0];
       const int nb_col = col - ref_offset[i][1];
-      const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+      const int nb_coeff_idx = nb_row * txb_info->width + nb_col;
 
       if (nb_row < 0 || nb_col < 0 || nb_row >= txb_info->height ||
-          nb_col >= txb_info->stride)
+          nb_col >= txb_info->width)
         continue;
 
       const int nb_scan_idx = iscan[nb_coeff_idx];
@@ -1178,10 +1184,10 @@
     for (int i = 0; i < ref_num; ++i) {
       const int nb_row = row - ref_offset[i][0];
       const int nb_col = col - ref_offset[i][1];
-      const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+      const int nb_coeff_idx = nb_row * txb_info->width + nb_col;
 
       if (nb_row < 0 || nb_col < 0 || nb_row >= txb_info->height ||
-          nb_col >= txb_info->stride)
+          nb_col >= txb_info->width)
         continue;
 
       const int nb_scan_idx = iscan[nb_coeff_idx];
@@ -1302,10 +1308,10 @@
     const int nb_col = col - sig_ref_offset[i][1];
 
     if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
-          nb_col < txb_info->stride))
+          nb_col < txb_info->width))
       continue;
 
-    const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+    const int nb_coeff_idx = nb_row * txb_info->width + nb_col;
     const int nb_scan_idx = iscan[nb_coeff_idx];
     if (nb_scan_idx < eob) {
       const int scan_idx = iscan[coeff_idx];
@@ -1336,10 +1342,10 @@
   for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) {
     const int nb_row = row - base_ref_offset[i][0];
     const int nb_col = col - base_ref_offset[i][1];
-    const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+    const int nb_coeff_idx = nb_row * txb_info->width + nb_col;
 
     if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
-          nb_col < txb_info->stride))
+          nb_col < txb_info->width))
       continue;
 
     const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
@@ -1373,10 +1379,10 @@
   for (int i = 0; i < BR_CONTEXT_POSITION_NUM; ++i) {
     const int nb_row = row - br_ref_offset[i][0];
     const int nb_col = col - br_ref_offset[i][1];
-    const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+    const int nb_coeff_idx = nb_row * txb_info->width + nb_col;
 
     if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
-          nb_col < txb_info->stride))
+          nb_col < txb_info->width))
       continue;
 
     const int nb_scan_idx = iscan[nb_coeff_idx];
@@ -1483,10 +1489,10 @@
   for (int i = 0; i < ALL_REF_OFFSET_NUM; ++i) {
     int nb_row = row - all_ref_offset[i][0];
     int nb_col = col - all_ref_offset[i][1];
-    int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+    int nb_coeff_idx = nb_row * txb_info->width + nb_col;
     int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx];
     if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 &&
-        nb_row < txb_info->height && nb_col < txb_info->stride) {
+        nb_row < txb_info->height && nb_col < txb_info->width) {
       tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
       int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_costs);
       if (cost_map)
@@ -1500,10 +1506,10 @@
   for (int i = 0; i < ALL_REF_OFFSET_NUM; ++i) {
     int nb_row = row - all_ref_offset[i][0];
     int nb_col = col - all_ref_offset[i][1];
-    int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+    int nb_coeff_idx = nb_row * txb_info->width + nb_col;
     int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx];
     if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 &&
-        nb_row < txb_info->height && nb_col < txb_info->stride) {
+        nb_row < txb_info->height && nb_col < txb_info->width) {
       tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
       int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_costs);
       if (cost_map)
@@ -1658,14 +1664,17 @@
   tran_low_t tmp_qcoeff[MAX_TX_SQUARE];
   tran_low_t tmp_dqcoeff[MAX_TX_SQUARE];
   uint8_t tmp_levels_buf[TX_PAD_2D];
-  uint8_t *const tmp_levels = set_levels(tmp_levels_buf, txb_info->stride);
+  uint8_t *const tmp_levels = set_levels(tmp_levels_buf, txb_info->width);
   const int org_eob = txb_info->eob;
   if (dry_run) {
-    const int stride = txb_info->stride + TX_PAD_HOR;
+    const int stride = txb_info->width + TX_PAD_HOR;
+    const int levels_size =
+
+        (stride * (txb_info->height + TX_PAD_VER) + TX_PAD_END);
     memcpy(tmp_qcoeff, org_qcoeff, sizeof(org_qcoeff[0]) * max_eob);
     memcpy(tmp_dqcoeff, org_dqcoeff, sizeof(org_dqcoeff[0]) * max_eob);
     memcpy(tmp_levels, org_levels - TX_PAD_TOP * stride,
-           sizeof(org_levels[0]) * stride * (txb_info->height + TX_PAD_VER));
+           sizeof(org_levels[0]) * levels_size);
     txb_info->qcoeff = tmp_qcoeff;
     txb_info->dqcoeff = tmp_dqcoeff;
     txb_info->levels = tmp_levels;
@@ -1800,14 +1809,17 @@
   tran_low_t tmp_qcoeff[MAX_TX_SQUARE];
   tran_low_t tmp_dqcoeff[MAX_TX_SQUARE];
   uint8_t tmp_levels_buf[TX_PAD_2D];
-  uint8_t *const tmp_levels = set_levels(tmp_levels_buf, txb_info->stride);
+  uint8_t *const tmp_levels = set_levels(tmp_levels_buf, txb_info->width);
   const int org_eob = txb_info->eob;
   if (dry_run) {
-    const int stride = txb_info->stride + TX_PAD_HOR;
+    const int stride = txb_info->width + TX_PAD_HOR;
+    const int levels_size =
+
+        (stride * (txb_info->height + TX_PAD_VER) + TX_PAD_END);
     memcpy(tmp_qcoeff, org_qcoeff, sizeof(org_qcoeff[0]) * max_eob);
     memcpy(tmp_dqcoeff, org_dqcoeff, sizeof(org_dqcoeff[0]) * max_eob);
     memcpy(tmp_levels, org_levels - TX_PAD_TOP * stride,
-           sizeof(org_levels[0]) * stride * (txb_info->height + TX_PAD_VER));
+           sizeof(org_levels[0]) * levels_size);
     txb_info->qcoeff = tmp_qcoeff;
     txb_info->dqcoeff = tmp_dqcoeff;
     txb_info->levels = tmp_levels;
@@ -1910,7 +1922,6 @@
   const int16_t *dequant = p->dequant_QTX;
   const int seg_eob = tx_size_2d[tx_size];
   const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
-  const int stride = 1 << bwl;
   const int width = tx_size_wide[tx_size];
   const int height = tx_size_high[tx_size];
   const int is_inter = is_inter_block(mbmi);
@@ -1923,13 +1934,14 @@
   uint8_t levels_buf[TX_PAD_2D];
   uint8_t *const levels = set_levels(levels_buf, width);
 
+  assert(width == (1 << bwl));
   TxbInfo txb_info = {
     qcoeff,  levels,  dqcoeff,    tcoeff,  dequant, shift,
-    tx_size, txs_ctx, tx_type,    bwl,     stride,  height,
+    tx_size, txs_ctx, tx_type,    bwl,     width,   height,
     eob,     seg_eob, scan_order, txb_ctx, rdmult,  &cm->coeff_ctx_table
   };
 
-  av1_txb_init_levels(qcoeff, width, tx_size_2d[tx_size], levels);
+  av1_txb_init_levels(qcoeff, width, height, levels);
 
   const int update = optimize_txb(&txb_info, &txb_costs, NULL, 0, fast_mode);
 
@@ -2028,7 +2040,7 @@
     return;
   }
 
-  av1_txb_init_levels(tcoeff, width, tx_size_2d[tx_size], levels);
+  av1_txb_init_levels(tcoeff, width, height, levels);
 
 #if CONFIG_TXK_SEL
   av1_update_tx_type_count(cm, xd, blk_row, blk_col, block, plane,