DPCM intra coding experiment Encode a block line by line, horizontally or vertically. In the vertical mode, each row is predicted by the reconsturcted row above; in the horizontal mode, each column is predicted by the reconstructed column to the left. The DPCM modes are enabled automatically for blocks with horizontal or vertical prediction mode, and 1D transform types (ext-tx). Change-Id: I133ab6b537fa24a6e314ee1ef1d2fe9bd9d56c13

commit: b8a6fd6be732f483e3ade2154b491e44ae48fbc7 [log] [tgz]
author: hui su <huisu@google.com> Wed May 10 10:57:57 2017 -0700
committer: hui su <huisu@google.com> Sat May 20 15:29:38 2017 -0700
tree: 68c4fef62e269ad2f5568443f54b4bf0810ddc44
parent: 90ed98f98ac46339ec34c0e6f5db2a4ee97482e4 [diff]
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 1498adc..c387fcb 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -399,6 +399,13 @@
 
 add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
 
+if (aom_config("CONFIG_DPCM_INTRA") eq "yes") {
+  @sizes = (4, 8, 16, 32);
+  foreach $size (@sizes) {
+    add_proto "void", "av1_dpcm_ft$size", "const int16_t *input, int stride, TX_TYPE_1D tx_type, tran_low_t *output";
+  }
+}
+
 if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
   #fwd txfm
   add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";

diff --git a/av1/common/idct.c b/av1/common/idct.c
index 6cf99c9..d12c3be 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c

@@ -2917,3 +2917,157 @@
   }
 }
 #endif  // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_DPCM_INTRA
+void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
+                               TX_TYPE_1D tx_type, uint8_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d IHT[] = { aom_idct4_c, aom_iadst4_c, aom_iadst4_c,
+                                      iidtx4_c };
+  const transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[4];
+  inv_tx(input, out);
+  for (int i = 0; i < 4; ++i) {
+    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
+    dest[i * stride] =
+        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
+  }
+}
+
+void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
+                               TX_TYPE_1D tx_type, uint8_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d IHT[] = { aom_idct8_c, aom_iadst8_c, aom_iadst8_c,
+                                      iidtx8_c };
+  const transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[8];
+  inv_tx(input, out);
+  for (int i = 0; i < 8; ++i) {
+    dest[i * stride] =
+        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
+  }
+}
+
+void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
+                                TX_TYPE_1D tx_type, uint8_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d IHT[] = { aom_idct16_c, aom_iadst16_c,
+                                      aom_iadst16_c, iidtx16_c };
+  const transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[16];
+  inv_tx(input, out);
+  for (int i = 0; i < 16; ++i) {
+    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
+    dest[i * stride] =
+        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 5));
+  }
+}
+
+void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
+                                TX_TYPE_1D tx_type, uint8_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d IHT[] = { aom_idct32_c, ihalfright32_c,
+                                      ihalfright32_c, iidtx32_c };
+  const transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[32];
+  inv_tx(input, out);
+  for (int i = 0; i < 32; ++i) {
+    dest[i * stride] =
+        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
+  }
+}
+
+dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length) {
+  switch (tx_length) {
+    case 4: return av1_dpcm_inv_txfm_add_4_c;
+    case 8: return av1_dpcm_inv_txfm_add_8_c;
+    case 16: return av1_dpcm_inv_txfm_add_16_c;
+    case 32:
+      return av1_dpcm_inv_txfm_add_32_c;
+    // TODO(huisu): add support for TX_64X64.
+    default: assert(0); return NULL;
+  }
+}
+
+#if CONFIG_HIGHBITDEPTH
+void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
+                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const highbd_transform_1d IHT[] = { aom_highbd_idct4_c,
+                                             aom_highbd_iadst4_c,
+                                             aom_highbd_iadst4_c,
+                                             highbd_iidtx4_c };
+  const highbd_transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[4];
+  inv_tx(input, out, bd);
+  for (int i = 0; i < 4; ++i) {
+    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
+    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
+                                             ROUND_POWER_OF_TWO(out[i], 4), bd);
+  }
+}
+
+void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
+                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest) {
+  static const highbd_transform_1d IHT[] = { aom_highbd_idct8_c,
+                                             aom_highbd_iadst8_c,
+                                             aom_highbd_iadst8_c,
+                                             highbd_iidtx8_c };
+  assert(tx_type < TX_TYPES_1D);
+  const highbd_transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[8];
+  inv_tx(input, out, bd);
+  for (int i = 0; i < 8; ++i) {
+    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
+                                             ROUND_POWER_OF_TWO(out[i], 4), bd);
+  }
+}
+
+void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
+                                    TX_TYPE_1D tx_type, int bd,
+                                    uint16_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const highbd_transform_1d IHT[] = { aom_highbd_idct16_c,
+                                             aom_highbd_iadst16_c,
+                                             aom_highbd_iadst16_c,
+                                             highbd_iidtx16_c };
+  const highbd_transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[16];
+  inv_tx(input, out, bd);
+  for (int i = 0; i < 16; ++i) {
+    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
+    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
+                                             ROUND_POWER_OF_TWO(out[i], 5), bd);
+  }
+}
+
+void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
+                                    TX_TYPE_1D tx_type, int bd,
+                                    uint16_t *dest) {
+  assert(tx_type < TX_TYPES_1D);
+  static const highbd_transform_1d IHT[] = { aom_highbd_idct32_c,
+                                             highbd_ihalfright32_c,
+                                             highbd_ihalfright32_c,
+                                             highbd_iidtx32_c };
+  const highbd_transform_1d inv_tx = IHT[tx_type];
+  tran_low_t out[32];
+  inv_tx(input, out, bd);
+  for (int i = 0; i < 32; ++i) {
+    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
+                                             ROUND_POWER_OF_TWO(out[i], 4), bd);
+  }
+}
+
+hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length) {
+  switch (tx_length) {
+    case 4: return av1_hbd_dpcm_inv_txfm_add_4_c;
+    case 8: return av1_hbd_dpcm_inv_txfm_add_8_c;
+    case 16: return av1_hbd_dpcm_inv_txfm_add_16_c;
+    case 32:
+      return av1_hbd_dpcm_inv_txfm_add_32_c;
+    // TODO(huisu): add support for TX_64X64.
+    default: assert(0); return NULL;
+  }
+}
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // CONFIG_DPCM_INTRA

diff --git a/av1/common/idct.h b/av1/common/idct.h
index 29b51df..cf656dc 100644
--- a/av1/common/idct.h
+++ b/av1/common/idct.h

@@ -84,6 +84,33 @@
 void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
                              INV_TXFM_PARAM *inv_txfm_param);
 #endif  // CONFIG_HIGHBITDEPTH
+#if CONFIG_DPCM_INTRA
+void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
+                               TX_TYPE_1D tx_type, uint8_t *dest);
+void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
+                               TX_TYPE_1D tx_type, uint8_t *dest);
+void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
+                                TX_TYPE_1D tx_type, uint8_t *dest);
+void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
+                                TX_TYPE_1D tx_type, uint8_t *dest);
+typedef void (*dpcm_inv_txfm_add_func)(const tran_low_t *input, int stride,
+                                       TX_TYPE_1D tx_type, uint8_t *dest);
+dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length);
+#if CONFIG_HIGHBITDEPTH
+void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
+                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest);
+void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
+                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest);
+void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
+                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest);
+void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
+                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest);
+typedef void (*hbd_dpcm_inv_txfm_add_func)(const tran_low_t *input, int stride,
+                                           TX_TYPE_1D tx_type, int bd,
+                                           uint16_t *dest);
+hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length);
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // CONFIG_DPCM_INTRA
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index 5c21e99..fbcb7f9 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h

@@ -19,6 +19,20 @@
 extern "C" {
 #endif
 
+#if CONFIG_DPCM_INTRA
+static INLINE int av1_use_dpcm_intra(int plane, PREDICTION_MODE mode,
+                                     TX_TYPE tx_type,
+                                     const MB_MODE_INFO *const mbmi) {
+  (void)mbmi;
+  (void)plane;
+#if CONFIG_EXT_INTRA
+  if (mbmi->sb_type >= BLOCK_8X8 && mbmi->angle_delta[plane != 0]) return 0;
+#endif  // CONFIG_EXT_INTRA
+  return (mode == V_PRED && (tx_type == IDTX || tx_type == H_DCT)) ||
+         (mode == H_PRED && (tx_type == IDTX || tx_type == V_DCT));
+}
+#endif  // CONFIG_DPCM_INTRA
+
 void av1_init_intra_predictors(void);
 void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx,
                                     int blk_col, int blk_row, TX_SIZE tx_size);

diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 85b77ad..06a8efd 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c

@@ -518,6 +518,133 @@
   return row * max_blocks_wide + col * txh_unit;
 }
 
+#if CONFIG_DPCM_INTRA
+static void process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+                                    const tran_low_t *dqcoeff, uint8_t *dst,
+                                    int dst_stride) {
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int tx1d_height = tx_size_high[tx_size];
+  dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_dpcm_inv_txfm_add_func(tx1d_width);
+  for (int r = 0; r < tx1d_height; ++r) {
+    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
+    inverse_tx(dqcoeff, 1, tx_type_1d, dst);
+    dqcoeff += tx1d_width;
+    dst += dst_stride;
+  }
+}
+
+static void process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+                                    const tran_low_t *dqcoeff, uint8_t *dst,
+                                    int dst_stride) {
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int tx1d_height = tx_size_high[tx_size];
+  dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_dpcm_inv_txfm_add_func(tx1d_height);
+  tran_low_t tx_buff[64];
+  for (int c = 0; c < tx1d_width; ++c, ++dqcoeff, ++dst) {
+    for (int r = 0; r < tx1d_height; ++r) {
+      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
+      tx_buff[r] = dqcoeff[r * tx1d_width];
+    }
+    inverse_tx(tx_buff, dst_stride, tx_type_1d, dst);
+  }
+}
+
+#if CONFIG_HIGHBITDEPTH
+static void hbd_process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+                                        int bd, const tran_low_t *dqcoeff,
+                                        uint8_t *dst8, int dst_stride) {
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int tx1d_height = tx_size_high[tx_size];
+  hbd_dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width);
+  for (int r = 0; r < tx1d_height; ++r) {
+    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
+    inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst);
+    dqcoeff += tx1d_width;
+    dst += dst_stride;
+  }
+}
+
+static void hbd_process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+                                        int bd, const tran_low_t *dqcoeff,
+                                        uint8_t *dst8, int dst_stride) {
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int tx1d_height = tx_size_high[tx_size];
+  hbd_dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height);
+  tran_low_t tx_buff[64];
+  switch (tx1d_height) {
+    case 4: inverse_tx = av1_hbd_dpcm_inv_txfm_add_4_c; break;
+    case 8: inverse_tx = av1_hbd_dpcm_inv_txfm_add_8_c; break;
+    case 16: inverse_tx = av1_hbd_dpcm_inv_txfm_add_16_c; break;
+    case 32: inverse_tx = av1_hbd_dpcm_inv_txfm_add_32_c; break;
+    default: assert(0);
+  }
+
+  for (int c = 0; c < tx1d_width; ++c, ++dqcoeff, ++dst) {
+    for (int r = 0; r < tx1d_height; ++r) {
+      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
+      tx_buff[r] = dqcoeff[r * tx1d_width];
+    }
+    inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst);
+  }
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+static void inverse_transform_block_dpcm(MACROBLOCKD *xd, int plane,
+                                         PREDICTION_MODE mode, TX_SIZE tx_size,
+                                         TX_TYPE tx_type, uint8_t *dst,
+                                         int dst_stride, int16_t scan_line) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *const dqcoeff = pd->dqcoeff;
+  TX_TYPE_1D tx_type_1d = DCT_1D;
+  switch (tx_type) {
+    case IDTX: tx_type_1d = IDTX_1D; break;
+    case V_DCT:
+      assert(mode == H_PRED);
+      tx_type_1d = DCT_1D;
+      break;
+    case H_DCT:
+      assert(mode == V_PRED);
+      tx_type_1d = DCT_1D;
+      break;
+    default: assert(0);
+  }
+  switch (mode) {
+    case V_PRED:
+#if CONFIG_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+        hbd_process_block_dpcm_vert(tx_size, tx_type_1d, xd->bd, dqcoeff, dst,
+                                    dst_stride);
+      } else {
+#endif  // CONFIG_HIGHBITDEPTH
+        process_block_dpcm_vert(tx_size, tx_type_1d, dqcoeff, dst, dst_stride);
+#if CONFIG_HIGHBITDEPTH
+      }
+#endif  // CONFIG_HIGHBITDEPTH
+      break;
+    case H_PRED:
+#if CONFIG_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+        hbd_process_block_dpcm_horz(tx_size, tx_type_1d, xd->bd, dqcoeff, dst,
+                                    dst_stride);
+      } else {
+#endif  // CONFIG_HIGHBITDEPTH
+        process_block_dpcm_horz(tx_size, tx_type_1d, dqcoeff, dst, dst_stride);
+#if CONFIG_HIGHBITDEPTH
+      }
+#endif  // CONFIG_HIGHBITDEPTH
+      break;
+    default: assert(0);
+  }
+  memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
+}
+#endif  // CONFIG_DPCM_INTRA
+
 static void predict_and_reconstruct_intra_block(
     AV1_COMMON *cm, MACROBLOCKD *const xd, aom_reader *const r,
     MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) {
@@ -549,8 +676,22 @@
     if (eob) {
       uint8_t *dst =
           &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
-      inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
-                              max_scan_line, eob);
+#if CONFIG_DPCM_INTRA
+      const int block_raster_idx =
+          av1_block_index_to_raster_order(tx_size, block_idx);
+      const PREDICTION_MODE mode = (plane == 0)
+                                       ? get_y_mode(xd->mi[0], block_raster_idx)
+                                       : mbmi->uv_mode;
+      if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
+        inverse_transform_block_dpcm(xd, plane, mode, tx_size, tx_type, dst,
+                                     pd->dst.stride, max_scan_line);
+      } else {
+#endif  // CONFIG_DPCM_INTRA
+        inverse_transform_block(xd, plane, tx_type, tx_size, dst,
+                                pd->dst.stride, max_scan_line, eob);
+#if CONFIG_DPCM_INTRA
+      }
+#endif  // CONFIG_DPCM_INTRA
     }
 #else
     TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);

diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 049cc2b..f6b64f0 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c

@@ -2227,4 +2227,49 @@
 }
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_DPCM_INTRA
+void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+                    tran_low_t *output) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
+  const transform_1d ft = FHT[tx_type];
+  tran_low_t temp_in[4];
+  for (int i = 0; i < 4; ++i)
+    temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
+  ft(temp_in, output);
+}
+
+void av1_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+                    tran_low_t *output) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
+  const transform_1d ft = FHT[tx_type];
+  tran_low_t temp_in[8];
+  for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
+  ft(temp_in, output);
+}
+
+void av1_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+                     tran_low_t *output) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
+  const transform_1d ft = FHT[tx_type];
+  tran_low_t temp_in[16];
+  for (int i = 0; i < 16; ++i)
+    temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
+  ft(temp_in, output);
+}
+
+void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+                     tran_low_t *output) {
+  assert(tx_type < TX_TYPES_1D);
+  static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
+                                      fidtx32 };
+  const transform_1d ft = FHT[tx_type];
+  tran_low_t temp_in[32];
+  for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
+  ft(temp_in, output);
+}
+#endif  // CONFIG_DPCM_INTRA
 #endif  // !AV1_DCT_GTEST

diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 71d761d..3aa9822 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c

@@ -1419,6 +1419,301 @@
 #endif
 }
 
+#if CONFIG_DPCM_INTRA
+static int get_eob(const tran_low_t *qcoeff, intptr_t n_coeffs,
+                   const int16_t *scan) {
+  int eob = -1;
+  for (int i = (int)n_coeffs - 1; i >= 0; i--) {
+    const int rc = scan[i];
+    if (qcoeff[rc]) {
+      eob = i;
+      break;
+    }
+  }
+  return eob + 1;
+}
+
+static void quantize_scaler(int coeff, int16_t zbin, int16_t round_value,
+                            int16_t quant, int16_t quant_shift, int16_t dequant,
+                            int log_scale, tran_low_t *const qcoeff,
+                            tran_low_t *const dqcoeff) {
+  zbin = ROUND_POWER_OF_TWO(zbin, log_scale);
+  round_value = ROUND_POWER_OF_TWO(round_value, log_scale);
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  if (abs_coeff >= zbin) {
+    int tmp = clamp(abs_coeff + round_value, INT16_MIN, INT16_MAX);
+    tmp = ((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 - log_scale);
+    *qcoeff = (tmp ^ coeff_sign) - coeff_sign;
+    *dqcoeff = (*qcoeff * dequant) / (1 << log_scale);
+  }
+}
+
+typedef void (*dpcm_fwd_tx_func)(const int16_t *input, int stride,
+                                 TX_TYPE_1D tx_type, tran_low_t *output);
+
+static dpcm_fwd_tx_func get_dpcm_fwd_tx_func(int tx_length) {
+  switch (tx_length) {
+    case 4: return av1_dpcm_ft4_c;
+    case 8: return av1_dpcm_ft8_c;
+    case 16: return av1_dpcm_ft16_c;
+    case 32:
+      return av1_dpcm_ft32_c;
+    // TODO(huisu): add support for TX_64X64.
+    default: assert(0); return NULL;
+  }
+}
+
+static void process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+                                    struct macroblockd_plane *const pd,
+                                    struct macroblock_plane *const p,
+                                    uint8_t *src, int src_stride, uint8_t *dst,
+                                    int dst_stride, int16_t *src_diff,
+                                    int diff_stride, tran_low_t *coeff,
+                                    tran_low_t *qcoeff, tran_low_t *dqcoeff) {
+  const int tx1d_width = tx_size_wide[tx_size];
+  dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width);
+  dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_dpcm_inv_txfm_add_func(tx1d_width);
+  const int tx1d_height = tx_size_high[tx_size];
+  const int log_scale = av1_get_tx_scale(tx_size);
+  int q_idx = 0;
+  for (int r = 0; r < tx1d_height; ++r) {
+    // Update prediction.
+    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
+    // Subtraction.
+    for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
+    // Forward transform.
+    forward_tx(src_diff, 1, tx_type_1d, coeff);
+    // Quantization.
+    for (int c = 0; c < tx1d_width; ++c) {
+      quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
+                      p->quant[q_idx], p->quant_shift[q_idx],
+                      pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]);
+      q_idx = 1;
+    }
+    // Inverse transform.
+    inverse_tx(dqcoeff, 1, tx_type_1d, dst);
+    // Move to the next row.
+    coeff += tx1d_width;
+    qcoeff += tx1d_width;
+    dqcoeff += tx1d_width;
+    src_diff += diff_stride;
+    dst += dst_stride;
+    src += src_stride;
+  }
+}
+
+static void process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+                                    struct macroblockd_plane *const pd,
+                                    struct macroblock_plane *const p,
+                                    uint8_t *src, int src_stride, uint8_t *dst,
+                                    int dst_stride, int16_t *src_diff,
+                                    int diff_stride, tran_low_t *coeff,
+                                    tran_low_t *qcoeff, tran_low_t *dqcoeff) {
+  const int tx1d_height = tx_size_high[tx_size];
+  dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height);
+  dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_dpcm_inv_txfm_add_func(tx1d_height);
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int log_scale = av1_get_tx_scale(tx_size);
+  int q_idx = 0;
+  for (int c = 0; c < tx1d_width; ++c) {
+    for (int r = 0; r < tx1d_height; ++r) {
+      // Update prediction.
+      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
+      // Subtraction.
+      src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride];
+    }
+    // Forward transform.
+    tran_low_t tx_buff[64];
+    forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff);
+    for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
+    // Quantization.
+    for (int r = 0; r < tx1d_height; ++r) {
+      quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx],
+                      p->quant[q_idx], p->quant_shift[q_idx],
+                      pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width],
+                      &dqcoeff[r * tx1d_width]);
+      q_idx = 1;
+    }
+    // Inverse transform.
+    for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
+    inverse_tx(tx_buff, dst_stride, tx_type_1d, dst);
+    // Move to the next column.
+    ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
+  }
+}
+
+#if CONFIG_HIGHBITDEPTH
+static void hbd_process_block_dpcm_vert(
+    TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd,
+    struct macroblockd_plane *const pd, struct macroblock_plane *const p,
+    uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride,
+    int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
+    tran_low_t *dqcoeff) {
+  const int tx1d_width = tx_size_wide[tx_size];
+  dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width);
+  hbd_dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width);
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const int tx1d_height = tx_size_high[tx_size];
+  const int log_scale = av1_get_tx_scale(tx_size);
+  int q_idx = 0;
+  for (int r = 0; r < tx1d_height; ++r) {
+    // Update prediction.
+    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
+    // Subtraction.
+    for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
+    // Forward transform.
+    forward_tx(src_diff, 1, tx_type_1d, coeff);
+    // Quantization.
+    for (int c = 0; c < tx1d_width; ++c) {
+      quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
+                      p->quant[q_idx], p->quant_shift[q_idx],
+                      pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]);
+      q_idx = 1;
+    }
+    // Inverse transform.
+    inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst);
+    // Move to the next row.
+    coeff += tx1d_width;
+    qcoeff += tx1d_width;
+    dqcoeff += tx1d_width;
+    src_diff += diff_stride;
+    dst += dst_stride;
+    src += src_stride;
+  }
+}
+
+static void hbd_process_block_dpcm_horz(
+    TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd,
+    struct macroblockd_plane *const pd, struct macroblock_plane *const p,
+    uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride,
+    int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
+    tran_low_t *dqcoeff) {
+  const int tx1d_height = tx_size_high[tx_size];
+  dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height);
+  hbd_dpcm_inv_txfm_add_func inverse_tx =
+      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height);
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int log_scale = av1_get_tx_scale(tx_size);
+  int q_idx = 0;
+  for (int c = 0; c < tx1d_width; ++c) {
+    for (int r = 0; r < tx1d_height; ++r) {
+      // Update prediction.
+      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
+      // Subtraction.
+      src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride];
+    }
+    // Forward transform.
+    tran_low_t tx_buff[64];
+    forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff);
+    for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
+    // Quantization.
+    for (int r = 0; r < tx1d_height; ++r) {
+      quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx],
+                      p->quant[q_idx], p->quant_shift[q_idx],
+                      pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width],
+                      &dqcoeff[r * tx1d_width]);
+      q_idx = 1;
+    }
+    // Inverse transform.
+    for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
+    inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst);
+    // Move to the next column.
+    ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
+  }
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
+                                 PREDICTION_MODE mode, int plane, int block,
+                                 int blk_row, int blk_col,
+                                 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+                                 TX_TYPE tx_type, ENTROPY_CONTEXT *ta,
+                                 ENTROPY_CONTEXT *tl, int8_t *skip) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  struct macroblock_plane *const p = &x->plane[plane];
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  const int diff_stride = block_size_wide[plane_bsize];
+  const int src_stride = p->src.stride;
+  const int dst_stride = pd->dst.stride;
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int tx1d_height = tx_size_high[tx_size];
+  const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, 0);
+  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  uint8_t *dst =
+      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+  uint8_t *src =
+      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
+  int16_t *src_diff =
+      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
+  uint16_t *eob = &p->eobs[block];
+  *eob = 0;
+  memset(qcoeff, 0, tx1d_height * tx1d_width * sizeof(*qcoeff));
+  memset(dqcoeff, 0, tx1d_height * tx1d_width * sizeof(*dqcoeff));
+
+  if (LIKELY(!x->skip_block)) {
+    TX_TYPE_1D tx_type_1d = DCT_1D;
+    switch (tx_type) {
+      case IDTX: tx_type_1d = IDTX_1D; break;
+      case V_DCT:
+        assert(mode == H_PRED);
+        tx_type_1d = DCT_1D;
+        break;
+      case H_DCT:
+        assert(mode == V_PRED);
+        tx_type_1d = DCT_1D;
+        break;
+      default: assert(0);
+    }
+    switch (mode) {
+      case V_PRED:
+#if CONFIG_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          hbd_process_block_dpcm_vert(tx_size, tx_type_1d, xd->bd, pd, p, src,
+                                      src_stride, dst, dst_stride, src_diff,
+                                      diff_stride, coeff, qcoeff, dqcoeff);
+        } else {
+#endif  // CONFIG_HIGHBITDEPTH
+          process_block_dpcm_vert(tx_size, tx_type_1d, pd, p, src, src_stride,
+                                  dst, dst_stride, src_diff, diff_stride, coeff,
+                                  qcoeff, dqcoeff);
+#if CONFIG_HIGHBITDEPTH
+        }
+#endif  // CONFIG_HIGHBITDEPTH
+        break;
+      case H_PRED:
+#if CONFIG_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          hbd_process_block_dpcm_horz(tx_size, tx_type_1d, xd->bd, pd, p, src,
+                                      src_stride, dst, dst_stride, src_diff,
+                                      diff_stride, coeff, qcoeff, dqcoeff);
+        } else {
+#endif  // CONFIG_HIGHBITDEPTH
+          process_block_dpcm_horz(tx_size, tx_type_1d, pd, p, src, src_stride,
+                                  dst, dst_stride, src_diff, diff_stride, coeff,
+                                  qcoeff, dqcoeff);
+#if CONFIG_HIGHBITDEPTH
+        }
+#endif  // CONFIG_HIGHBITDEPTH
+        break;
+      default: assert(0);
+    }
+    *eob = get_eob(qcoeff, tx1d_height * tx1d_width, scan_order->scan);
+  }
+
+  ta[blk_col] = tl[blk_row] = *eob > 0;
+  if (*eob) *skip = 0;
+}
+#endif  // CONFIG_DPCM_INTRA
+
 void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                             void *arg) {
@@ -1448,6 +1743,20 @@
 #else
   av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
 #endif
+
+#if CONFIG_DPCM_INTRA
+  const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const PREDICTION_MODE mode =
+      (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
+  if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
+    av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
+                                plane_bsize, tx_size, tx_type, args->ta,
+                                args->tl, args->skip);
+    return;
+  }
+#endif  // CONFIG_DPCM_INTRA
+
   av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
 
   const ENTROPY_CONTEXT *a = &args->ta[blk_col];

diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index df7afb6..8270d7d 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h

@@ -93,6 +93,15 @@
                                             BLOCK_SIZE plane_bsize);
 #endif
 
+#if CONFIG_DPCM_INTRA
+void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
+                                 PREDICTION_MODE mode, int plane, int block,
+                                 int blk_row, int blk_col,
+                                 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+                                 TX_TYPE tx_type, ENTROPY_CONTEXT *ta,
+                                 ENTROPY_CONTEXT *tl, int8_t *skip);
+#endif  // CONFIG_DPCM_INTRA
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 307bd13..9ceb52c 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -1525,6 +1525,23 @@
 #else
     av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
 #endif
+#if CONFIG_DPCM_INTRA
+    const int block_raster_idx =
+        av1_block_index_to_raster_order(tx_size, block);
+    const PREDICTION_MODE mode =
+        (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
+    TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
+                                  xd, block, tx_size);
+    if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
+      int8_t skip;
+      av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
+                                  plane_bsize, tx_size, tx_type, a, l, &skip);
+      av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
+                     tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
+                     OUTPUT_HAS_DECODED_PIXELS);
+      goto CALCULATE_RD;
+    }
+#endif  // CONFIG_DPCM_INTRA
     av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
   }
 
@@ -1556,6 +1573,9 @@
     cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
   }
 #endif
+#if CONFIG_DPCM_INTRA
+CALCULATE_RD : {}
+#endif  // CONFIG_DPCM_INTRA
   rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
   if (args->this_rd + rd > args->best_rd) {
     args->exit_early = 1;

diff --git a/configure b/configure
index 617e059..42199e4 100755
--- a/configure
+++ b/configure

@@ -250,6 +250,7 @@
     convolve_round
     compound_round
     ext_tx
+    dpcm_intra
     tx64x64
     ext_intra
     intra_interp
@@ -516,6 +517,7 @@
     enabled smooth_hv && soft_enable alt_intra
     enabled intra_edge && enable_feature ext_intra
     enabled chroma_2x2 && disable_feature chroma_sub8x8
+    enabled dpcm_intra && enable_feature ext_tx
 
     if ! enabled daala_ec && ! enabled ans && enabled cfl; then
       log_echo "cfl requires daala_ec or ans, so disabling cfl"
commit	b8a6fd6be732f483e3ade2154b491e44ae48fbc7	[log] [tgz]
author	hui su <huisu@google.com>	Wed May 10 10:57:57 2017 -0700
committer	hui su <huisu@google.com>	Sat May 20 15:29:38 2017 -0700
tree	68c4fef62e269ad2f5568443f54b4bf0810ddc44
parent	90ed98f98ac46339ec34c0e6f5db2a4ee97482e4 [diff]