Inter and intra LGTs

Here we have an LGT to replace ADST for intra residual blocks, and
another LGT to replace ADST for inter residual blocks. The changes
are only applied to transform length 4 and 8, and only for the
lowbitdepth path.

lowres: -0.18%

Change-Id: Iadc1e02b53e3756b44f74ca648cfa8b0e8ca7af4
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index b96cae1..9f90714 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -727,33 +727,7 @@
 
 #ifndef AV1_DCT_GTEST
 
-#if CONFIG_LGT
-static void flgt4(const tran_low_t *input, tran_low_t *output) {
-  if (!(input[0] | input[1] | input[2] | input[3])) {
-    output[0] = output[1] = output[2] = output[3] = 0;
-    return;
-  }
-
-  tran_high_t s[4] = { 0 };
-  for (int i = 0; i < 4; ++i)
-    for (int j = 0; j < 4; ++j) s[j] += lgtbasis4[j][i] * input[i];
-
-  for (int i = 0; i < 4; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
-}
-
-static void flgt8(const tran_low_t *input, tran_low_t *output) {
-  tran_high_t s[8] = { 0 };
-  for (int i = 0; i < 8; ++i)
-    for (int j = 0; j < 8; ++j) s[j] += lgtbasis8[j][i] * input[i];
-
-  for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
-}
-#endif  // CONFIG_LGT
-
 static void fadst4(const tran_low_t *input, tran_low_t *output) {
-#if CONFIG_LGT
-  flgt4(input, output);
-#else
   tran_high_t x0, x1, x2, x3;
   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
 
@@ -791,13 +765,9 @@
   output[1] = (tran_low_t)fdct_round_shift(s1);
   output[2] = (tran_low_t)fdct_round_shift(s2);
   output[3] = (tran_low_t)fdct_round_shift(s3);
-#endif  // CONFIG_LGT
 }
 
 static void fadst8(const tran_low_t *input, tran_low_t *output) {
-#if CONFIG_LGT
-  flgt8(input, output);
-#else
   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
 
   tran_high_t x0 = input[7];
@@ -866,7 +836,6 @@
   output[5] = (tran_low_t)-x7;
   output[6] = (tran_low_t)x5;
   output[7] = (tran_low_t)-x1;
-#endif  // CONFIG_LGT
 }
 
 static void fadst16(const tran_low_t *input, tran_low_t *output) {
@@ -1052,6 +1021,56 @@
   // Note overall scaling factor is 4 times orthogonal
 }
 
+#if CONFIG_LGT
+static void flgt4(const tran_low_t *input, tran_low_t *output,
+                  const tran_high_t *lgtmtx) {
+  if (!(input[0] | input[1] | input[2] | input[3])) {
+    output[0] = output[1] = output[2] = output[3] = 0;
+    return;
+  }
+
+  // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,4
+  tran_high_t s[4] = { 0 };
+  for (int i = 0; i < 4; ++i)
+    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[j * 4 + i] * input[i];
+
+  for (int i = 0; i < 4; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
+}
+
+static void flgt8(const tran_low_t *input, tran_low_t *output,
+                  const tran_high_t *lgtmtx) {
+  // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,8
+  tran_high_t s[8] = { 0 };
+  for (int i = 0; i < 8; ++i)
+    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[j * 8 + i] * input[i];
+
+  for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
+}
+
+// The get_fwd_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
+int get_fwd_lgt4(transform_1d tx_orig, FWD_TXFM_PARAM *fwd_txfm_param,
+                 const tran_high_t *lgtmtx[], int ntx) {
+  // inter/intra split
+  if (tx_orig == &fadst4) {
+    for (int i = 0; i < ntx; ++i)
+      lgtmtx[i] = fwd_txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
+    return 1;
+  }
+  return 0;
+}
+
+int get_fwd_lgt8(transform_1d tx_orig, FWD_TXFM_PARAM *fwd_txfm_param,
+                 const tran_high_t *lgtmtx[], int ntx) {
+  // inter/intra split
+  if (tx_orig == &fadst8) {
+    for (int i = 0; i < ntx; ++i)
+      lgtmtx[i] = fwd_txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
+    return 1;
+  }
+  return 0;
+}
+#endif  // CONFIG_LGT
+
 #if CONFIG_EXT_TX
 // TODO(sarahparker) these functions will be removed once the highbitdepth
 // codepath works properly for rectangular transforms. They have almost
@@ -1198,7 +1217,7 @@
       { fidtx4, fadst4 },  // H_ADST
       { fadst4, fidtx4 },  // V_FLIPADST
       { fidtx4, fadst4 },  // H_FLIPADST
-#endif                     // CONFIG_EXT_TX
+#endif
     };
     const transform_2d ht = FHT[tx_type];
     tran_low_t out[4 * 4];
@@ -1210,18 +1229,37 @@
     maybe_flip_input(&input, &stride, 4, 4, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+    // Choose LGT adaptive to the prediction. We may apply different LGTs for
+    // different rows/columns, indicated by the pointers to 2D arrays
+    const tran_high_t *lgtmtx_col[4];
+    const tran_high_t *lgtmtx_row[4];
+    int use_lgt_col = get_fwd_lgt4(ht.cols, fwd_txfm_param, lgtmtx_col, 4);
+    int use_lgt_row = get_fwd_lgt4(ht.rows, fwd_txfm_param, lgtmtx_row, 4);
+#endif
+
     // Columns
     for (i = 0; i < 4; ++i) {
       for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
       if (i == 0 && temp_in[0]) temp_in[0] += 1;
-      ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+      if (use_lgt_col)
+        flgt4(temp_in, temp_out, lgtmtx_col[i]);
+      else
+#endif
+        ht.cols(temp_in, temp_out);
       for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j];
     }
 
     // Rows
     for (i = 0; i < 4; ++i) {
       for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
-      ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+      if (use_lgt_row)
+        flgt4(temp_in, temp_out, lgtmtx_row[i]);
+      else
+#endif
+        ht.rows(temp_in, temp_out);
       for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
     }
   }
@@ -1261,19 +1299,36 @@
   maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_col[4];
+  const tran_high_t *lgtmtx_row[8];
+  int use_lgt_col = get_fwd_lgt8(ht.cols, fwd_txfm_param, lgtmtx_col, 4);
+  int use_lgt_row = get_fwd_lgt4(ht.rows, fwd_txfm_param, lgtmtx_row, 8);
+#endif
+
   // Rows
   for (i = 0; i < n2; ++i) {
     for (j = 0; j < n; ++j)
       temp_in[j] =
           (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
-    ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_row)
+      flgt4(temp_in, temp_out, lgtmtx_row[i]);
+    else
+#endif
+      ht.rows(temp_in, temp_out);
     for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
   }
 
   // Columns
   for (i = 0; i < n; ++i) {
     for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_col)
+      flgt8(temp_in, temp_out, lgtmtx_col[i]);
+    else
+#endif
+      ht.cols(temp_in, temp_out);
     for (j = 0; j < n2; ++j)
       output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
   }
@@ -1314,19 +1369,36 @@
   maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_col[8];
+  const tran_high_t *lgtmtx_row[4];
+  int use_lgt_col = get_fwd_lgt4(ht.cols, fwd_txfm_param, lgtmtx_col, 8);
+  int use_lgt_row = get_fwd_lgt8(ht.rows, fwd_txfm_param, lgtmtx_row, 4);
+#endif
+
   // Columns
   for (i = 0; i < n2; ++i) {
     for (j = 0; j < n; ++j)
       temp_in[j] =
           (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
-    ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_col)
+      flgt4(temp_in, temp_out, lgtmtx_col[i]);
+    else
+#endif
+      ht.cols(temp_in, temp_out);
     for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
   }
 
   // Rows
   for (i = 0; i < n; ++i) {
     for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_row)
+      flgt8(temp_in, temp_out, lgtmtx_row[i]);
+    else
+#endif
+      ht.rows(temp_in, temp_out);
     for (j = 0; j < n2; ++j)
       output[j + i * n2] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
   }
@@ -1367,10 +1439,20 @@
   maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_row[16];
+  int use_lgt_row = get_fwd_lgt4(ht.rows, fwd_txfm_param, lgtmtx_row, 16);
+#endif
+
   // Rows
   for (i = 0; i < n4; ++i) {
     for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
-    ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_row)
+      flgt4(temp_in, temp_out, lgtmtx_row[i]);
+    else
+#endif
+      ht.rows(temp_in, temp_out);
     for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
   }
 
@@ -1418,10 +1500,20 @@
   maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_col[16];
+  int use_lgt_col = get_fwd_lgt4(ht.cols, fwd_txfm_param, lgtmtx_col, 16);
+#endif
+
   // Columns
   for (i = 0; i < n4; ++i) {
     for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
-    ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_col)
+      flgt4(temp_in, temp_out, lgtmtx_col[i]);
+    else
+#endif
+      ht.cols(temp_in, temp_out);
     for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
   }
 
@@ -1469,12 +1561,22 @@
   maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_row[16];
+  int use_lgt_row = get_fwd_lgt8(ht.rows, fwd_txfm_param, lgtmtx_row, 16);
+#endif
+
   // Rows
   for (i = 0; i < n2; ++i) {
     for (j = 0; j < n; ++j)
       temp_in[j] =
           (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
-    ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_row)
+      flgt8(temp_in, temp_out, lgtmtx_row[i]);
+    else
+#endif
+      ht.rows(temp_in, temp_out);
     for (j = 0; j < n; ++j)
       out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
   }
@@ -1522,12 +1624,22 @@
   maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_col[16];
+  int use_lgt_col = get_fwd_lgt8(ht.cols, fwd_txfm_param, lgtmtx_col, 16);
+#endif
+
   // Columns
   for (i = 0; i < n2; ++i) {
     for (j = 0; j < n; ++j)
       temp_in[j] =
           (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
-    ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_col)
+      flgt8(temp_in, temp_out, lgtmtx_col[i]);
+    else
+#endif
+      ht.cols(temp_in, temp_out);
     for (j = 0; j < n; ++j)
       out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
   }
@@ -1575,10 +1687,20 @@
   maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_row[32];
+  int use_lgt_row = get_fwd_lgt8(ht.rows, fwd_txfm_param, lgtmtx_row, 32);
+#endif
+
   // Rows
   for (i = 0; i < n4; ++i) {
     for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
-    ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_row)
+      flgt8(temp_in, temp_out, lgtmtx_row[i]);
+    else
+#endif
+      ht.rows(temp_in, temp_out);
     for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
   }
 
@@ -1626,10 +1748,20 @@
   maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+  const tran_high_t *lgtmtx_col[32];
+  int use_lgt_col = get_fwd_lgt8(ht.cols, fwd_txfm_param, lgtmtx_col, 32);
+#endif
+
   // Columns
   for (i = 0; i < n4; ++i) {
     for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
-    ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+    if (use_lgt_col)
+      flgt8(temp_in, temp_out, lgtmtx_col[i]);
+    else
+#endif
+      ht.cols(temp_in, temp_out);
     for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
   }
 
@@ -1898,7 +2030,7 @@
       { fidtx8, fadst8 },  // H_ADST
       { fadst8, fidtx8 },  // V_FLIPADST
       { fidtx8, fadst8 },  // H_FLIPADST
-#endif                     // CONFIG_EXT_TX
+#endif
     };
     const transform_2d ht = FHT[tx_type];
     tran_low_t out[64];
@@ -1910,17 +2042,34 @@
     maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type);
 #endif
 
+#if CONFIG_LGT
+    const tran_high_t *lgtmtx_col[8];
+    const tran_high_t *lgtmtx_row[8];
+    int use_lgt_col = get_fwd_lgt8(ht.cols, fwd_txfm_param, lgtmtx_col, 8);
+    int use_lgt_row = get_fwd_lgt8(ht.rows, fwd_txfm_param, lgtmtx_row, 8);
+#endif
+
     // Columns
     for (i = 0; i < 8; ++i) {
       for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
-      ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+      if (use_lgt_col)
+        flgt8(temp_in, temp_out, lgtmtx_col[i]);
+      else
+#endif
+        ht.cols(temp_in, temp_out);
       for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
     }
 
     // Rows
     for (i = 0; i < 8; ++i) {
       for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
-      ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+      if (use_lgt_row)
+        flgt8(temp_in, temp_out, lgtmtx_row[i]);
+      else
+#endif
+        ht.rows(temp_in, temp_out);
       for (j = 0; j < 8; ++j)
         output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
     }
@@ -2004,9 +2153,8 @@
     { fidtx16, fadst16 },  // H_ADST
     { fadst16, fidtx16 },  // V_FLIPADST
     { fidtx16, fadst16 },  // H_FLIPADST
-#endif                     // CONFIG_EXT_TX
+#endif
   };
-
   const transform_2d ht = FHT[tx_type];
   tran_low_t out[256];
   int i, j;
@@ -2158,6 +2306,7 @@
   int16_t flipped_input[64 * 64];
   maybe_flip_input(&input, &stride, 64, 64, flipped_input, tx_type);
 #endif
+
   // Columns
   for (i = 0; i < 64; ++i) {
     for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i];
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index f16a074..ddf8843 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -534,14 +534,16 @@
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ || CONFIG_DAALA_DIST || CONFIG_LGT
   uint8_t *dst;
-  int16_t *pred;
   const int dst_stride = pd->dst.stride;
+#if CONFIG_PVQ || CONFIG_DAALA_DIST
+  int16_t *pred;
   const int txw = tx_size_wide[tx_size];
   const int txh = tx_size_high[tx_size];
   int i, j;
 #endif
+#endif
 
 #if !CONFIG_PVQ
   const int tx2d_size = tx_size_2d[tx_size];
@@ -595,8 +597,9 @@
 #endif  // CONFIG_HIGHBITDEPTH
 #endif
 
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ || CONFIG_DAALA_DIST || CONFIG_LGT
   dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#if CONFIG_PVQ || CONFIG_DAALA_DIST
   pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
 
 // copy uint8 orig and predicted block to int16 buffer
@@ -615,13 +618,20 @@
 #if CONFIG_HIGHBITDEPTH
   }
 #endif  // CONFIG_HIGHBITDEPTH
-#endif
+#endif  // CONFIG_PVQ || CONFIG_DAALA_DIST
+#endif  // CONFIG_PVQ || CONFIG_DAALA_DIST || CONFIG_LGT
 
   (void)ctx;
 
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
   fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+#if CONFIG_LGT
+  fwd_txfm_param.is_inter = is_inter_block(mbmi);
+  fwd_txfm_param.dst = dst;
+  fwd_txfm_param.stride = dst_stride;
+  fwd_txfm_param.mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+#endif
 
 #if !CONFIG_PVQ
   fwd_txfm_param.bd = xd->bd;
@@ -743,8 +753,14 @@
   if (x->pvq_skip[plane]) return;
 #endif
   TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+#if CONFIG_LGT
+  PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+  av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, dst,
+                              pd->dst.stride, p->eobs[block]);
+#else
   av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst,
                               pd->dst.stride, p->eobs[block]);
+#endif
 }
 
 #if CONFIG_VAR_TX
@@ -1362,11 +1378,11 @@
   av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
 #endif
 
-#if CONFIG_DPCM_INTRA
-  const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_DPCM_INTRA || CONFIG_LGT
   const PREDICTION_MODE mode =
-      (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
+      get_prediction_mode(xd->mi[0], plane, tx_size, block);
+#if CONFIG_DPCM_INTRA
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
     av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
                                 plane_bsize, tx_size, tx_type, args->ta,
@@ -1374,6 +1390,7 @@
     return;
   }
 #endif  // CONFIG_DPCM_INTRA
+#endif  // CONFIG_DPCM_INTRA || CONFIG_LGT
 
   av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
 
@@ -1395,8 +1412,11 @@
 
   if (x->pvq_skip[plane]) return;
 #endif  // CONFIG_PVQ
-  av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
-                              *eob);
+  av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT
+                              mode,
+#endif
+                              tx_type, tx_size, dst, dst_stride, *eob);
 #if !CONFIG_PVQ
   if (*eob) *(args->skip) = 0;
 #else
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index f644021..0c74676 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1694,8 +1694,11 @@
         const PLANE_TYPE plane_type = get_plane_type(plane);
         TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
 
-        av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon,
-                                    MAX_TX_SIZE, eob);
+        av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT
+                                    xd->mi[0]->mbmi.mode,
+#endif
+                                    tx_type, tx_size, recon, MAX_TX_SIZE, eob);
 
 #if CONFIG_DAALA_DIST
         if (plane == 0 && (bsw < 8 || bsh < 8)) {
@@ -3102,6 +3105,9 @@
             if (!skip)
 #endif
               av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+                                          mode,
+#endif
                                           DCT_DCT, tx_size, dst, dst_stride,
                                           p->eobs[block]);
           } else {
@@ -3151,6 +3157,9 @@
             if (!skip)
 #endif
               av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+                                          mode,
+#endif
                                           tx_type, tx_size, dst, dst_stride,
                                           p->eobs[block]);
             cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
@@ -3334,6 +3343,9 @@
           if (!skip)
 #endif  // CONFIG_PVQ
             av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+                                        mode,
+#endif
                                         tx_type, tx_size, dst, dst_stride,
                                         p->eobs[block]);
           unsigned int tmp;
@@ -3349,6 +3361,9 @@
           if (!skip)
 #endif  // CONFIG_PVQ
             av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+                                        mode,
+#endif
                                         DCT_DCT, tx_size, dst, dst_stride,
                                         p->eobs[block]);
         }
@@ -4254,8 +4269,14 @@
   rd_stats->sse += tmp * 16;
   const int eob = p->eobs[block];
 
+#if CONFIG_LGT
+  PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+  av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, rec_buffer,
+                              MAX_TX_SIZE, eob);
+#else
   av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
                               MAX_TX_SIZE, eob);
+#endif
   if (eob > 0) {
 #if CONFIG_DAALA_DIST
     if (plane == 0 && (bw < 8 && bh < 8)) {