Add the q_segmentation experiment

This experiment implements low-cost delta q signalling on a per-block basis
for all non-inter frame types, which would allow for more efficient AQ
which bases its decisions on temporal information.

Based on an Intel proposal from March.

Change-Id: I18e73d8b12f4caa0b165df12c58ab506271bec03
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 12bdc80..22247a2 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -410,7 +410,11 @@
     int mi_col = sb_col_index * cm->mib_size;
     int qindex_thresh =
         cpi->oxcf.content == AOM_CONTENT_SCREEN
+#if CONFIG_Q_SEGMENTATION
+            ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, 0, cm->base_qindex)
+#else
             ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
+#endif
             : 0;
     assert(mi_row >= 0 && mi_row < cm->mi_rows);
     assert(mi_col >= 0 && mi_col < cm->mi_cols);
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 4b886f2..701c1cb 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -1619,7 +1619,11 @@
 }
 
 void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
+#if CONFIG_Q_SEGMENTATION
+                               int segment_id, int q_segment_id) {
+#else
                                int segment_id) {
+#endif
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   const QUANTS *const quants = &cpi->quants;
@@ -1636,7 +1640,12 @@
                 cm->delta_q_present_flag ? cm->base_qindex + xd->delta_qindex
                                          : cm->base_qindex));
 #endif
+#if CONFIG_Q_SEGMENTATION
+  const int qindex =
+      av1_get_qindex(&cm->seg, segment_id, q_segment_id, current_q_index);
+#else
   const int qindex = av1_get_qindex(&cm->seg, segment_id, current_q_index);
+#endif
   const int rdmult = av1_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
   int i;
 #if CONFIG_AOM_QM
@@ -1711,7 +1720,12 @@
 void av1_frame_init_quantizer(AV1_COMP *cpi) {
   MACROBLOCK *const x = &cpi->td.mb;
   MACROBLOCKD *const xd = &x->e_mbd;
+#if CONFIG_Q_SEGMENTATION
+  av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id,
+                            xd->mi[0]->mbmi.q_segment_id);
+#else
   av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
+#endif
 }
 
 void av1_set_quantizer(AV1_COMMON *cm, int q) {
diff --git a/av1/encoder/av1_quantize.h b/av1/encoder/av1_quantize.h
index 4635f66..25cbf6d 100644
--- a/av1/encoder/av1_quantize.h
+++ b/av1/encoder/av1_quantize.h
@@ -99,7 +99,11 @@
 void av1_frame_init_quantizer(struct AV1_COMP *cpi);
 
 void av1_init_plane_quantizers(const struct AV1_COMP *cpi, MACROBLOCK *x,
+#if CONFIG_Q_SEGMENTATION
+                               int segment_id, int q_segment_id);
+#else
                                int segment_id);
+#endif
 
 void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
                          int uv_dc_delta_q, int uv_ac_delta_q,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f964d3b..1340cef 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -744,6 +744,79 @@
 }
 #endif  // CONFIG_LV_MAP
 
+#if CONFIG_Q_SEGMENTATION
+static int neg_interleave(int x, int ref, int max) {
+  const int diff = x - ref;
+  if (!ref) return x;
+  if (ref >= (max - 1)) return -diff;
+  if (2 * ref < max) {
+    if (abs(diff) <= ref) {
+      if (diff > 0)
+        return (diff << 1) - 1;
+      else
+        return ((-diff) << 1);
+    }
+    return x;
+  } else {
+    if (abs(diff) < (max - ref)) {
+      if (diff > 0)
+        return (diff << 1) - 1;
+      else
+        return ((-diff) << 1);
+    }
+    return (max - x) - 1;
+  }
+}
+
+static void write_q_segment_id(const AV1_COMMON *cm, int skip,
+                               const MB_MODE_INFO *const mbmi, aom_writer *w,
+                               const struct segmentation *seg,
+                               struct segmentation_probs *segp,
+                               BLOCK_SIZE bsize, int mi_row, int mi_col) {
+  int prev_ul = 0; /* Top left segment_id */
+  int prev_l = 0;  /* Current left segment_id */
+  int prev_u = 0;  /* Current top segment_id */
+
+  if (!seg->q_lvls) return;
+
+  MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride + mi_col;
+  int tinfo = mi->mbmi.boundary_info;
+  int above = (!(tinfo & TILE_ABOVE_BOUNDARY)) && ((mi_row - 1) >= 0);
+  int left = (!(tinfo & TILE_LEFT_BOUNDARY)) && ((mi_col - 1) >= 0);
+
+  if (above && left)
+    prev_ul =
+        get_segment_id(cm, cm->q_seg_map, BLOCK_4X4, mi_row - 1, mi_col - 1);
+
+  if (above)
+    prev_u = get_segment_id(cm, cm->q_seg_map, BLOCK_4X4, mi_row - 1, mi_col);
+
+  if (left)
+    prev_l = get_segment_id(cm, cm->q_seg_map, BLOCK_4X4, mi_row, mi_col - 1);
+
+  int cdf_num = pick_q_seg_cdf(prev_ul, prev_u, prev_l);
+  int pred = pick_q_seg_pred(prev_ul, prev_u, prev_l);
+
+  if (skip) {
+    set_q_segment_id(cm, cm->q_seg_map, mbmi->sb_type, mi_row, mi_col, pred);
+    return;
+  }
+
+  int coded_id = neg_interleave(mbmi->q_segment_id, pred, seg->q_lvls);
+
+#if CONFIG_NEW_MULTISYMBOL
+  aom_cdf_prob *pred_cdf = segp->q_seg_cdf[cdf_num];
+  aom_write_symbol(w, coded_id, pred_cdf, 8);
+#else
+  aom_prob pred_cdf = segp->q_seg_cdf[cdf_num];
+  aom_write(w, coded_id, pred_prob);
+#endif
+
+  set_q_segment_id(cm, cm->q_seg_map, bsize, mi_row, mi_col,
+                   mbmi->q_segment_id);
+}
+#endif
+
 static void write_segment_id(aom_writer *w, const struct segmentation *seg,
                              struct segmentation_probs *segp, int segment_id) {
   if (seg->enabled && seg->update_map) {
@@ -1319,6 +1392,9 @@
   }
 
   skip = write_skip(cm, xd, segment_id, mi, w);
+#if CONFIG_Q_SEGMENTATION
+  write_q_segment_id(cm, skip, mbmi, w, seg, segp, bsize, mi_row, mi_col);
+#endif
   if (cm->delta_q_present_flag) {
     int super_block_upper_left = ((mi_row & (cm->mib_size - 1)) == 0) &&
                                  ((mi_col & (cm->mib_size - 1)) == 0);
@@ -1644,6 +1720,9 @@
   if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id);
 
   const int skip = write_skip(cm, xd, mbmi->segment_id, mi, w);
+#if CONFIG_Q_SEGMENTATION
+  write_q_segment_id(cm, skip, mbmi, w, seg, segp, bsize, mi_row, mi_col);
+#endif
   if (cm->delta_q_present_flag) {
     int super_block_upper_left = ((mi_row & (cm->mib_size - 1)) == 0) &&
                                  ((mi_col & (cm->mib_size - 1)) == 0);
@@ -2747,6 +2826,32 @@
   }
 }
 
+#if CONFIG_Q_SEGMENTATION
+static void encode_q_segmentation(AV1_COMMON *cm,
+                                  struct aom_write_bit_buffer *wb) {
+  int i;
+  struct segmentation *seg = &cm->seg;
+
+  for (i = 0; i < MAX_SEGMENTS; i++) {
+    if (segfeature_active(seg, i, SEG_LVL_ALT_Q)) {
+      seg->q_lvls = 0;
+      return;
+    }
+  }
+
+  aom_wb_write_bit(wb, !!seg->q_lvls);
+  if (!seg->q_lvls) return;
+
+  encode_unsigned_max(wb, seg->q_lvls, MAX_SEGMENTS);
+
+  for (i = 0; i < seg->q_lvls; i++) {
+    const int val = seg->q_delta[i];
+    encode_unsigned_max(wb, abs(val), MAXQ);
+    aom_wb_write_bit(wb, val < 0);
+  }
+}
+#endif
+
 static void write_tx_mode(AV1_COMMON *cm, TX_MODE *mode,
                           struct aom_write_bit_buffer *wb) {
   if (cm->all_lossless) {
@@ -3916,6 +4021,9 @@
   encode_loopfilter(cm, wb);
   encode_quantization(cm, wb);
   encode_segmentation(cm, xd, wb);
+#if CONFIG_Q_SEGMENTATION
+  encode_q_segmentation(cm, wb);
+#endif
   {
     int delta_q_allowed = 1;
 #if !CONFIG_EXT_DELTA_Q
@@ -4270,6 +4378,9 @@
   encode_loopfilter(cm, wb);
   encode_quantization(cm, wb);
   encode_segmentation(cm, xd, wb);
+#if CONFIG_Q_SEGMENTATION
+  encode_q_segmentation(cm, wb);
+#endif
   {
     int delta_q_allowed = 1;
 #if !CONFIG_EXT_DELTA_Q
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 99d0d6d..b646d29 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -288,16 +288,30 @@
   xd->cfl->mi_col = mi_col;
 #endif
 
-  // Setup segment ID.
+  mbmi->segment_id = 0;
+#if CONFIG_Q_SEGMENTATION
+  mbmi->q_segment_id = 0;
+#endif
+
+// Setup segment ID.
+#if CONFIG_Q_SEGMENTATION
+  if (seg->enabled || seg->q_lvls) {
+#else
   if (seg->enabled) {
-    if (!cpi->vaq_refresh) {
+#endif
+    if (seg->enabled && !cpi->vaq_refresh) {
       const uint8_t *const map =
           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
       mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
     }
+#if CONFIG_Q_SEGMENTATION
+    if (seg->q_lvls)
+      mbmi->q_segment_id =
+          get_segment_id(cm, cpi->q_seg_encoding_map, bsize, mi_row, mi_col);
+    av1_init_plane_quantizers(cpi, x, mbmi->segment_id, mbmi->q_segment_id);
+#else
     av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
-  } else {
-    mbmi->segment_id = 0;
+#endif
   }
 }
 
@@ -497,11 +511,21 @@
 
 #if !CONFIG_EXT_DELTA_Q
   if (cpi->oxcf.aq_mode > NO_AQ && cpi->oxcf.aq_mode < DELTA_AQ)
+#if CONFIG_Q_SEGMENTATION
+    av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id,
+                              xd->mi[0]->mbmi.q_segment_id);
+#else
     av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
+#endif
 #else
   if (cpi->oxcf.aq_mode)
+#if CONFIG_Q_SEGMENTATION
+    av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id,
+                              xd->mi[0]->mbmi.q_segment_id);
+#else
     av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
 #endif
+#endif
 
   x->skip = ctx->skip;
 
@@ -728,12 +752,25 @@
 }
 
 static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
+#if CONFIG_Q_SEGMENTATION
+                              int8_t segment_id, int8_t q_segment_id) {
+#else
                               int8_t segment_id) {
+#endif
   int segment_qindex;
   const AV1_COMMON *const cm = &cpi->common;
+#if CONFIG_Q_SEGMENTATION
+  av1_init_plane_quantizers(cpi, x, segment_id, q_segment_id);
+#else
   av1_init_plane_quantizers(cpi, x, segment_id);
+#endif
   aom_clear_system_state();
+#if CONFIG_Q_SEGMENTATION
+  segment_qindex =
+      av1_get_qindex(&cm->seg, segment_id, q_segment_id, cm->base_qindex);
+#else
   segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+#endif
   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
 }
 
@@ -847,12 +884,24 @@
       const int energy =
           bsize <= BLOCK_16X16 ? x->mb_energy : av1_block_energy(cpi, x, bsize);
       mbmi->segment_id = av1_vaq_segment_id(energy);
-      // Re-initialise quantiser
+// Re-initialise quantiser
+#if CONFIG_Q_SEGMENTATION
+      av1_init_plane_quantizers(cpi, x, mbmi->segment_id, mbmi->q_segment_id);
+#else
       av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
+#endif
     }
+#if CONFIG_Q_SEGMENTATION
+    x->rdmult =
+        set_segment_rdmult(cpi, x, mbmi->segment_id, mbmi->q_segment_id);
+  } else if (aq_mode == COMPLEXITY_AQ) {
+    x->rdmult =
+        set_segment_rdmult(cpi, x, mbmi->segment_id, mbmi->q_segment_id);
+#else
     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
   } else if (aq_mode == COMPLEXITY_AQ) {
     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
+#endif
   } else if (aq_mode == CYCLIC_REFRESH_AQ) {
     // If segment is boosted, use rdmult for that segment.
     if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
@@ -2161,24 +2210,66 @@
 
 #if CONFIG_FP_MB_STATS
 const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
-  0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120,
+  0,
+  10,
+  10,
+  30,
+  40,
+  40,
+  60,
+  80,
+  80,
+  90,
+  100,
+  100,
+  120,
 #if CONFIG_EXT_PARTITION
   // TODO(debargha): What are the correct numbers here?
-  130, 130, 150
+  130,
+  130,
+  150
 #endif  // CONFIG_EXT_PARTITION
 };
 const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
-  0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120,
+  0,
+  3,
+  3,
+  7,
+  15,
+  15,
+  30,
+  40,
+  40,
+  60,
+  80,
+  80,
+  120,
 #if CONFIG_EXT_PARTITION
   // TODO(debargha): What are the correct numbers here?
-  160, 160, 240
+  160,
+  160,
+  240
 #endif  // CONFIG_EXT_PARTITION
 };
 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  4,
+  4,
+  6,
 #if CONFIG_EXT_PARTITION
   // TODO(debargha): What are the correct numbers here?
-  8, 8, 10
+  8,
+  8,
+  10
 #endif  // CONFIG_EXT_PARTITION
 };
 
@@ -3301,8 +3392,16 @@
       xd->mi[0]->mbmi.current_q_index = current_qindex;
 #if !CONFIG_EXT_DELTA_Q
       xd->mi[0]->mbmi.segment_id = 0;
+#if CONFIG_Q_SEGMENTATION
+      xd->mi[0]->mbmi.q_segment_id = 0;
+#endif
 #endif  // CONFIG_EXT_DELTA_Q
+#if CONFIG_Q_SEGMENTATION
+      av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id,
+                                xd->mi[0]->mbmi.q_segment_id);
+#else
       av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
+#endif
 #if CONFIG_EXT_DELTA_Q
       if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) {
         int j, k;
@@ -4014,7 +4113,11 @@
 
   for (i = 0; i < MAX_SEGMENTS; ++i) {
     const int qindex = cm->seg.enabled
+#if CONFIG_Q_SEGMENTATION
+                           ? av1_get_qindex(&cm->seg, i, i, cm->base_qindex)
+#else
                            ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
+#endif
                            : cm->base_qindex;
     xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
                       cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 421622c..453be1b 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -486,6 +486,9 @@
 #endif  // CONFIG_FARME_MARKER
 
   uint8_t *segmentation_map;
+#if CONFIG_Q_SEGMENTATION
+  uint8_t *q_seg_encoding_map;  // Must be allocated and set by AQs
+#endif
 
   CYCLIC_REFRESH *cyclic_refresh;
   ActiveMap active_map;
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 3b687fe..c0c8438 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -505,9 +505,14 @@
 
   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
     const int qindex =
+#if CONFIG_Q_SEGMENTATION
+        clamp(
+            av1_get_qindex(&cm->seg, segment_id, segment_id, cm->base_qindex) +
+#else
         clamp(av1_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
-                  cm->y_dc_delta_q,
-              0, MAXQ);
+#endif
+                cm->y_dc_delta_q,
+            0, MAXQ);
     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
 
     for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {