Enable the activity masking codes of PVQ

Turned off, by default.

TODO: The distortion function of Daala should be added
to complete the activity masking working.

Note that PVQ QM matrix (i.e. scaler for each band of
transform block) is calculated in decoder side as exactly same
way in encoder. In Daala, this matrix is written to bitstream
and decoder does not generate it.

Activity masking can be turned on by setting below flag as 1:

Change-Id: I44bfb905cb4e0cad6aa830a4c355cd760a993ffe
diff --git a/av1/common/pvq.c b/av1/common/pvq.c
index 32d9e8b..8f6512d 100644
--- a/av1/common/pvq.c
+++ b/av1/common/pvq.c
@@ -88,6 +88,81 @@
 };
 #endif
 
+/* Imported from encode.c in daala */
+/* These are the PVQ equivalent of quantization matrices, except that
+   the values are per-band. */
+#define OD_MASKING_DISABLED 0
+#define OD_MASKING_ENABLED 1
+
+const unsigned char OD_LUMA_QM_Q4[2][OD_QM_SIZE] = {
+/* Flat quantization for PSNR. The DC component isn't 16 because the DC
+   magnitude compensation is done here for inter (Haar DC doesn't need it).
+   Masking disabled: */
+ {
+  16, 16,
+  16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16
+ },
+/* The non-flat AC coefficients compensate for the non-linear scaling caused
+   by activity masking. The values are currently hand-tuned so that the rate
+   of each band remains roughly constant when enabling activity masking
+   on intra.
+   Masking enabled: */
+ {
+  16, 16,
+  16, 18, 28, 32,
+  16, 14, 20, 20, 28, 32,
+  16, 11, 14, 14, 17, 17, 22, 28
+ }
+};
+
+const unsigned char OD_CHROMA_QM_Q4[2][OD_QM_SIZE] = {
+/* Chroma quantization is different because of the reduced lapping.
+   FIXME: Use the same matrix as luma for 4:4:4.
+   Masking disabled: */
+ {
+  16, 16,
+  16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16
+ },
+/* The AC part is flat for chroma because it has no activity masking.
+   Masking enabled: */
+ {
+  16, 16,
+  16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16
+ }
+};
+
+/* No interpolation, always use od_flat_qm_q4, but use a different scale for
+   each plane.
+   FIXME: Add interpolation and properly tune chroma. */
+const od_qm_entry OD_DEFAULT_QMS[2][3][OD_NPLANES_MAX] = {
+ /* Masking disabled */
+ {{{4, 256, OD_LUMA_QM_Q4[OD_MASKING_DISABLED]},
+   {4, 448, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED]},
+   {4, 320, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED]}},
+  {{318, 256, OD_LUMA_QM_Q4[OD_MASKING_DISABLED]},
+   {318, 140, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED]},
+   {318, 100, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED]}},
+  {{0, 0, NULL},
+   {0, 0, NULL},
+   {0, 0, NULL}}},
+ /* Masking enabled */
+ {{{4, 256, OD_LUMA_QM_Q4[OD_MASKING_ENABLED]},
+   {4, 448, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED]},
+   {4, 320, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED]}},
+  {{318, 256, OD_LUMA_QM_Q4[OD_MASKING_ENABLED]},
+   {318, 140, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED]},
+   {318, 100, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED]}},
+  {{0, 0, NULL},
+   {0, 0, NULL},
+   {0, 0, NULL}}}
+};
+
 /* Constants for the beta parameter, which controls how activity masking is
    used.
    beta = 1 / (1 - alpha), so when beta is 1, alpha is 0 and activity
@@ -102,9 +177,6 @@
 static const od_val16 OD_PVQ_BETA32_LUMA[10] = {OD_BETA(1.), OD_BETA(1.),
  OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
  OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA64_LUMA[13] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
 
 static const od_val16 OD_PVQ_BETA4_LUMA_MASKING[1] = {OD_BETA(1.)};
 static const od_val16 OD_PVQ_BETA8_LUMA_MASKING[4] = {OD_BETA(1.5),
@@ -115,10 +187,6 @@
 static const od_val16 OD_PVQ_BETA32_LUMA_MASKING[10] = {OD_BETA(1.5),
  OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
  OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
-static const od_val16 OD_PVQ_BETA64_LUMA_MASKING[13] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5)};
 
 static const od_val16 OD_PVQ_BETA4_CHROMA[1] = {OD_BETA(1.)};
 static const od_val16 OD_PVQ_BETA8_CHROMA[4] = {OD_BETA(1.), OD_BETA(1.),
@@ -128,9 +196,6 @@
 static const od_val16 OD_PVQ_BETA32_CHROMA[10] = {OD_BETA(1.), OD_BETA(1.),
  OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
  OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA64_CHROMA[13] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
 
 const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1] = {
  {{OD_PVQ_BETA4_LUMA, OD_PVQ_BETA8_LUMA,
@@ -147,6 +212,46 @@
    OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}}
 };
 
+
+void od_interp_qm(unsigned char *out, int q, const od_qm_entry *entry1,
+  const od_qm_entry *entry2) {
+  int i;
+  if (entry2 == NULL || entry2->qm_q4 == NULL
+   || q < entry1->interp_q << OD_COEFF_SHIFT) {
+    /* Use entry1. */
+    for (i = 0; i < OD_QM_SIZE; i++) {
+      out[i] = OD_MINI(255, entry1->qm_q4[i]*entry1->scale_q8 >> 8);
+    }
+  }
+  else if (entry1 == NULL || entry1->qm_q4 == NULL
+   || q > entry2->interp_q << OD_COEFF_SHIFT) {
+    /* Use entry2. */
+    for (i = 0; i < OD_QM_SIZE; i++) {
+      out[i] = OD_MINI(255, entry2->qm_q4[i]*entry2->scale_q8 >> 8);
+    }
+  }
+  else {
+    /* Interpolate between entry1 and entry2. The interpolation is linear
+       in terms of log(q) vs log(m*scale). Considering that we're ultimately
+       multiplying the result it makes sense, but we haven't tried other
+       interpolation methods. */
+    double x;
+    const unsigned char *m1;
+    const unsigned char *m2;
+    int q1;
+    int q2;
+    m1 = entry1->qm_q4;
+    m2 = entry2->qm_q4;
+    q1 = entry1->interp_q << OD_COEFF_SHIFT;
+    q2 = entry2->interp_q << OD_COEFF_SHIFT;
+    x = (log(q)-log(q1))/(log(q2)-log(q1));
+    for (i = 0; i < OD_QM_SIZE; i++) {
+      out[i] = OD_MINI(255, (int)floor(.5 + (1./256)*exp(
+       x*log(m2[i]*entry2->scale_q8) + (1 - x)*log(m1[i]*entry1->scale_q8))));
+    }
+  }
+}
+
 void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe) {
   od_pvq_codeword_ctx *ctx;
   int i;
@@ -195,9 +300,9 @@
 #endif
 
 /* Initialize the quantization matrix. */
-// Note: When varying scan orders for hybrid transform is used by PVQ,
-// since AOM does not use magnitude compensation (i.e. simplay x16 for all coeffs),
-// we don't need seperate qm and qm_inv for each transform type.
+// Note: When hybrid transform and corresponding scan order is used by PVQ,
+// we don't need seperate qm and qm_inv for each transform type,
+// because AOM does not do magnitude compensation (i.e. simplay x16 for all coeffs).
 void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
   int i;
   int j;
diff --git a/av1/common/pvq.h b/av1/common/pvq.h
index 5a49a84..2836263 100644
--- a/av1/common/pvq.h
+++ b/av1/common/pvq.h
@@ -22,6 +22,8 @@
 extern const uint16_t EXP_CDF_TABLE[][16];
 extern const uint16_t LAPLACE_OFFSET[];
 
+#define AV1_PVQ_ENABLE_ACTIVITY_MASKING (0)
+
 # define PVQ_MAX_PARTITIONS (1 + 3*(OD_TXSIZES-1))
 
 # define OD_NOREF_ADAPT_SPEED (4)
@@ -129,6 +131,14 @@
   uint16_t        pvq_skip_dir_cdf[2*(OD_TXSIZES-1)][7];
 };
 
+typedef struct od_qm_entry {
+  int interp_q;
+  int scale_q8;
+  const unsigned char *qm_q4;
+} od_qm_entry;
+
+extern const od_qm_entry OD_DEFAULT_QMS[2][3][OD_NPLANES_MAX];
+
 void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe);
 int od_pvq_size_ctx(int n);
 int od_pvq_k1_ctx(int n, int orig_size);
@@ -139,6 +149,9 @@
 int od_vector_log_mag(const od_coeff *x, int n);
 #endif
 
+void od_interp_qm(unsigned char *out, int q, const od_qm_entry *entry1,
+                  const od_qm_entry *entry2);
+
 int od_qm_get_index(int bs, int band);
 
 extern const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1];
diff --git a/av1/common/pvq_state.h b/av1/common/pvq_state.h
index 0519451..dc8c0fa 100644
--- a/av1/common/pvq_state.h
+++ b/av1/common/pvq_state.h
@@ -40,10 +40,7 @@
 
 struct od_state {
   od_adapt_ctx adapt;
-  /* TODO(yushin): Enable this for activity masking,
-     when pvq_qm_q4 is available in AOM. */
-  /* unsigned char pvq_qm_q4[OD_NPLANES_MAX][OD_QM_SIZE]; */
-
+  unsigned char pvq_qm_q4[OD_NPLANES_MAX][OD_QM_SIZE];
   /* Quantization matrices and their inverses. */
   int16_t qm[OD_QM_BUFFER_SIZE];
   int16_t qm_inv[OD_QM_BUFFER_SIZE];
diff --git a/av1/decoder/decint.h b/av1/decoder/decint.h
index 99dbc43..fb756d4 100644
--- a/av1/decoder/decint.h
+++ b/av1/decoder/decint.h
@@ -26,6 +26,7 @@
   od_state state;
   /* Daala entropy decoder. */
   od_ec_dec *ec;
+  int use_activity_masking;
   /* Mode of quantization matrice : FLAT (0) or HVS (1) */
   int qm;
 };
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index de5f3fa..a64ba14 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -63,6 +63,7 @@
 
 #if CONFIG_PVQ
 #include "av1/decoder/pvq_decoder.h"
+#include "av1/common/pvq.h"
 #include "av1/encoder/encodemb.h"
 
 #include "aom_dsp/entdec.h"
@@ -336,10 +337,7 @@
   const int blk_size = tx_size_wide[bs];
   int eob = 0;
   int i;
-  // TODO(yushin) : To enable activity masking,
-  // int use_activity_masking = dec->use_activity_masking;
-  int use_activity_masking = 0;
-
+  int use_activity_masking = dec->use_activity_masking;
   DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
   DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
 
@@ -352,11 +350,13 @@
   if (lossless)
     pvq_dc_quant = 1;
   else {
-    // TODO(yushin): Enable this for activity masking,
-    // when pvq_qm_q4 is available in AOM.
-    // pvq_dc_quant = OD_MAXI(1, quant*
-    // dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >> 4);
-    pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+    if (use_activity_masking)
+      pvq_dc_quant = OD_MAXI(
+          1, (quant[0] >> quant_shift) *
+                     dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >>
+                 4);
+    else
+      pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
   }
 
   off = od_qm_offset(bs, xdec);
@@ -3070,16 +3070,51 @@
 #endif  // CONFIG_EXT_TILE
 
 #if CONFIG_PVQ
-static void daala_dec_init(daala_dec_ctx *daala_dec, od_ec_dec *ec) {
+static void daala_dec_init(AV1_COMMON *const cm, daala_dec_ctx *daala_dec,
+                           od_ec_dec *ec) {
   daala_dec->ec = ec;
   od_adapt_ctx_reset(&daala_dec->state.adapt, 0);
 
-  daala_dec->qm = OD_FLAT_QM;
+  // TODO(yushin) : activity masking info needs be signaled by a bitstream
+  daala_dec->use_activity_masking = AV1_PVQ_ENABLE_ACTIVITY_MASKING;
+
+  if (daala_dec->use_activity_masking)
+    daala_dec->qm = OD_HVS_QM;
+  else
+    daala_dec->qm = OD_FLAT_QM;
 
   od_init_qm(daala_dec->state.qm, daala_dec->state.qm_inv,
              daala_dec->qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+
+  if (daala_dec->use_activity_masking) {
+    int pli;
+    int use_masking = daala_dec->use_activity_masking;
+    int segment_id = 0;
+    int qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+
+    for (pli = 0; pli < MAX_MB_PLANE; pli++) {
+      int i;
+      int q;
+
+      q = qindex;
+      if (q <= OD_DEFAULT_QMS[use_masking][0][pli].interp_q << OD_COEFF_SHIFT) {
+        od_interp_qm(&daala_dec->state.pvq_qm_q4[pli][0], q,
+                     &OD_DEFAULT_QMS[use_masking][0][pli], NULL);
+      } else {
+        i = 0;
+        while (OD_DEFAULT_QMS[use_masking][i + 1][pli].qm_q4 != NULL &&
+               q > OD_DEFAULT_QMS[use_masking][i + 1][pli].interp_q
+                       << OD_COEFF_SHIFT) {
+          i++;
+        }
+        od_interp_qm(&daala_dec->state.pvq_qm_q4[pli][0], q,
+                     &OD_DEFAULT_QMS[use_masking][i][pli],
+                     &OD_DEFAULT_QMS[use_masking][i + 1][pli]);
+      }
+    }
+  }
 }
-#endif
+#endif  // #if CONFIG_PVQ
 
 static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
                                    const uint8_t *data_end) {
@@ -3182,7 +3217,7 @@
 #endif
                            td->dqcoeff);
 #if CONFIG_PVQ
-      daala_dec_init(&td->xd.daala_dec, &td->bit_reader.ec);
+      daala_dec_init(cm, &td->xd.daala_dec, &td->bit_reader.ec);
 #endif
 #if CONFIG_PALETTE
       td->xd.plane[0].color_index_map = td->color_index_map[0];
@@ -3510,7 +3545,7 @@
 #endif
                              twd->dqcoeff);
 #if CONFIG_PVQ
-        daala_dec_init(&twd->xd.daala_dec, &twd->bit_reader.ec);
+        daala_dec_init(cm, &twd->xd.daala_dec, &twd->bit_reader.ec);
 #endif
 #if CONFIG_PALETTE
         twd->xd.plane[0].color_index_map = twd->color_index_map[0];
diff --git a/av1/decoder/pvq_decoder.c b/av1/decoder/pvq_decoder.c
index 1cc75f8..76fbed1 100644
--- a/av1/decoder/pvq_decoder.c
+++ b/av1/decoder/pvq_decoder.c
@@ -314,15 +314,22 @@
   generic_encoder *model;
   int skip_rest[3] = {0};
   cfl_ctx cfl;
-  /* const unsigned char *pvq_qm; */
+  const unsigned char *pvq_qm;
+  int use_masking;
+
   /*Default to skip=1 and noref=0 for all bands.*/
   for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
     noref[i] = 0;
     skip[i] = 1;
   }
-  /* TODO(yushin): Enable this for activity masking,
-     when pvq_qm_q4 is available in AOM. */
-  /*pvq_qm = &dec->state.pvq_qm_q4[pli][0];*/
+
+  use_masking = dec->use_activity_masking;
+
+  if (use_masking)
+    pvq_qm = &dec->state.pvq_qm_q4[pli][0];
+  else
+    pvq_qm = 0;
+
   exg = &dec->state.adapt.pvq.pvq_exg[pli][bs][0];
   ext = dec->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
   model = dec->state.adapt.pvq.pvq_param_model;
@@ -341,10 +348,12 @@
     cfl.allow_flip = pli != 0 && is_keyframe;
     for (i = 0; i < nb_bands; i++) {
       int q;
-      /* TODO(yushin): Enable this for activity masking,
-         when pvq_qm_q4 is available in AOM. */
-      /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
-      q = OD_MAXI(1, q0);
+
+      if (use_masking)
+        q = OD_MAXI(1, q0 * pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);
+      else
+        q = OD_MAXI(1, q0);
+
       pvq_decode_partition(dec->ec, q, size[i],
        model, &dec->state.adapt, exg + i, ext + i, ref + off[i], out + off[i],
        &noref[i], beta[i], robust, is_keyframe, pli,
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e9b8aef..d1d5c53 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -56,6 +56,7 @@
 #include "av1/encoder/segmentation.h"
 #include "av1/encoder/tokenize.h"
 #if CONFIG_PVQ
+#include "av1/common/pvq.h"
 #include "av1/encoder/pvq_encoder.h"
 #endif
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -4607,9 +4608,14 @@
 #if CONFIG_PVQ
   td->mb.pvq_q = &this_tile->pvq_q;
 
-  // TODO(yushin)
-  // If activity masking is enabled, change below to OD_HVS_QM
-  td->mb.daala_enc.qm = OD_FLAT_QM;  // Hard coded. Enc/dec required to sync.
+  // TODO(yushin) : activity masking info needs be signaled by a bitstream
+  td->mb.daala_enc.use_activity_masking = AV1_PVQ_ENABLE_ACTIVITY_MASKING;
+
+  if (td->mb.daala_enc.use_activity_masking)
+    td->mb.daala_enc.qm = OD_HVS_QM;  // Hard coded. Enc/dec required to sync.
+  else
+    td->mb.daala_enc.qm = OD_FLAT_QM;  // Hard coded. Enc/dec required to sync.
+
   {
     // FIXME: Multiple segments support
     int segment_id = 0;
@@ -4626,12 +4632,41 @@
   }
   od_init_qm(td->mb.daala_enc.state.qm, td->mb.daala_enc.state.qm_inv,
              td->mb.daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+
+  if (td->mb.daala_enc.use_activity_masking) {
+    int pli;
+    int use_masking = td->mb.daala_enc.use_activity_masking;
+    int segment_id = 0;
+    int qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+
+    for (pli = 0; pli < MAX_MB_PLANE; pli++) {
+      int i;
+      int q;
+
+      q = qindex;
+      if (q <= OD_DEFAULT_QMS[use_masking][0][pli].interp_q << OD_COEFF_SHIFT) {
+        od_interp_qm(&td->mb.daala_enc.state.pvq_qm_q4[pli][0], q,
+                     &OD_DEFAULT_QMS[use_masking][0][pli], NULL);
+      } else {
+        i = 0;
+        while (OD_DEFAULT_QMS[use_masking][i + 1][pli].qm_q4 != NULL &&
+               q > OD_DEFAULT_QMS[use_masking][i + 1][pli].interp_q
+                       << OD_COEFF_SHIFT) {
+          i++;
+        }
+        od_interp_qm(&td->mb.daala_enc.state.pvq_qm_q4[pli][0], q,
+                     &OD_DEFAULT_QMS[use_masking][i][pli],
+                     &OD_DEFAULT_QMS[use_masking][i + 1][pli]);
+      }
+    }
+  }
+
   od_ec_enc_init(&td->mb.daala_enc.ec, 65025);
 
   adapt = &td->mb.daala_enc.state.adapt;
   od_ec_enc_reset(&td->mb.daala_enc.ec);
   od_adapt_ctx_reset(adapt, 0);
-#endif
+#endif  // #if CONFIG_PVQ
 
   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
        mi_row += cm->mib_size) {
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index de2b477..e2c6144 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -1116,13 +1116,9 @@
                           PVQ_INFO *pvq_info) {
   const int tx_blk_size = tx_size_wide[tx_size];
   int skip;
-  // TODO(yushin): Enable this later, when pvq_qm_q4 is available in AOM.
-  // int pvq_dc_quant = OD_MAXI(1,
-  //  quant * daala_enc->state.pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >>
-  //  4);
   int quant_shift = get_tx_scale(tx_size);
-  // DC quantizer for PVQ
-  int pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+  int pvq_dc_quant;
+  int use_activity_masking = daala_enc->use_activity_masking;
   int tell;
   int has_dc_skip = 1;
   int i;
@@ -1130,6 +1126,7 @@
 #if PVQ_CHROMA_RD
   double save_pvq_lambda;
 #endif
+
   DECLARE_ALIGNED(16, int16_t, coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
   DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
   DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
@@ -1138,6 +1135,16 @@
   DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
   DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
 
+  // DC quantizer for PVQ
+  if (use_activity_masking)
+    pvq_dc_quant = OD_MAXI(
+        1, (quant[0] >> quant_shift) *
+                   daala_enc->state.pvq_qm_q4[plane]
+                                             [od_qm_get_index(tx_size, 0)] >>
+               4);
+  else
+    pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+
   *eob = 0;
 
   tell = od_ec_enc_tell_frac(&daala_enc->ec);
@@ -1166,18 +1173,17 @@
     out_int32[0] = OD_DIV_R0(in_int32[0] - ref_int32[0], pvq_dc_quant);
   }
 
-  skip = od_pvq_encode(
-      daala_enc, ref_int32, in_int32, out_int32,
-      (int)quant[0] >> quant_shift,  // scale/quantizer
-      (int)quant[1] >> quant_shift,  // scale/quantizer
-      // TODO(yushin): Instead of 0,
-      //   use daala_enc->use_activity_masking for activity masking.
-      plane, tx_size, OD_PVQ_BETA[0][plane][tx_size], OD_ROBUST_STREAM,
-      0,        // is_keyframe,
-      0, 0, 0,  // q_scaling, bx, by,
-      daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
-      speed,  // speed
-      pvq_info);
+  skip = od_pvq_encode(daala_enc, ref_int32, in_int32, out_int32,
+                       (int)quant[0] >> quant_shift,  // scale/quantizer
+                       (int)quant[1] >> quant_shift,  // scale/quantizer
+                       plane, tx_size,
+                       OD_PVQ_BETA[use_activity_masking][plane][tx_size],
+                       OD_ROBUST_STREAM,
+                       0,        // is_keyframe,
+                       0, 0, 0,  // q_scaling, bx, by,
+                       daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
+                       speed,  // speed
+                       pvq_info);
 
   if (skip && pvq_info) assert(pvq_info->ac_dc_coded == 0);
 
diff --git a/av1/encoder/pvq_encoder.c b/av1/encoder/pvq_encoder.c
index ef41277..5f8884f 100644
--- a/av1/encoder/pvq_encoder.c
+++ b/av1/encoder/pvq_encoder.c
@@ -800,24 +800,34 @@
   int skip_rest;
   int skip_dir;
   int skip_theta_value;
-  /* const unsigned char *pvq_qm; */
+  const unsigned char *pvq_qm;
   double dc_rate;
+  int use_masking;
 #if !OD_SIGNAL_Q_SCALING
   OD_UNUSED(q_scaling);
   OD_UNUSED(bx);
   OD_UNUSED(by);
 #endif
-  /* TODO(yushin): Enable this for activity masking,
-     when pvq_qm_q4 is available in AOM. */
-  /* pvq_qm = &enc->state.pvq_qm_q4[pli][0]; */
+
+  use_masking = enc->use_activity_masking;
+
+  if (use_masking)
+    pvq_qm = &enc->state.pvq_qm_q4[pli][0];
+  else
+    pvq_qm = 0;
+
   exg = &enc->state.adapt.pvq.pvq_exg[pli][bs][0];
   ext = enc->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
   skip_cdf = enc->state.adapt.skip_cdf[2*bs + (pli != 0)];
   model = enc->state.adapt.pvq.pvq_param_model;
   nb_bands = OD_BAND_OFFSETS[bs][0];
   off = &OD_BAND_OFFSETS[bs][1];
-  /*dc_quant = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, 0)] >> 4);*/
-  dc_quant = OD_MAXI(1, q_dc);
+
+  if (use_masking)
+    dc_quant = OD_MAXI(1, q_dc * pvq_qm[od_qm_get_index(bs, 0)] >> 4);
+  else
+    dc_quant = OD_MAXI(1, q_dc);
+
   tell = 0;
   for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i];
   skip_diff = 0;
@@ -848,10 +858,12 @@
   }
   for (i = 0; i < nb_bands; i++) {
     int q;
-    /* TODO(yushin): Enable this for activity masking,
-       when pvq_qm_q4 is available in AOM. */
-    /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
-    q = OD_MAXI(1, q_ac);
+
+    if (use_masking)
+      q = OD_MAXI(1, q_ac * pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);
+    else
+      q = OD_MAXI(1, q_ac);
+
     qg[i] = pvq_theta(out + off[i], in + off[i], ref + off[i], size[i],
      q, y + off[i], &theta[i], &max_theta[i],
      &k[i], beta[i], &skip_diff, robust, is_keyframe, pli, &enc->state.adapt,