Step size and arithmetic coding for delta quantization.

Example performance: 1.8% bit rate savings using
the AQ test mode aq-mode=4 :
./aomenc --codec=av1 --ivf --tile-columns=1 --tile-rows=1 \
                 --kf-max-dist=1000 --kf-min-dist=1000 --cpu-used=0 \
                 --passes=1 --threads=1 --lag-in-frames=0 \
                 --end-usage=q --limit=600 --cq-level=42 \
                 --aq-mode=4 --error-resilient=1 out.bits FourPeople_1280x720_60.y4m

Change-Id: Iba01cf2732a57f3c27481ac2a3c8fc37bb9e5533
diff --git a/aomenc.c b/aomenc.c
index 3b34ed8..373b69d 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -392,16 +392,13 @@
     ARG_DEF(NULL, "frame-parallel", 1,
             "Enable frame parallel decodability features "
             "(0: false (default), 1: true)");
+static const arg_def_t aq_mode = ARG_DEF(
+    NULL, "aq-mode", 1,
+    "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
 #if CONFIG_DELTA_Q
-static const arg_def_t aq_mode = ARG_DEF(
-    NULL, "aq-mode", 1,
-    "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
-    "3: cyclic refresh)");
+    "3: cyclic refresh, 4: delta quant)");
 #else
-static const arg_def_t aq_mode = ARG_DEF(
-    NULL, "aq-mode", 1,
-    "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
-    "3: cyclic refresh, 4: equator360)");
+    "3: cyclic refresh)");
 #endif
 static const arg_def_t frame_periodic_boost =
     ARG_DEF(NULL, "frame-boost", 1,
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 23e0409..3f6f0da 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -364,6 +364,11 @@
     };
 #endif  // CONFIG_MOTION_VAR || !CONFIG_WARPED_MOTION
 
+#if CONFIG_DELTA_Q
+static const aom_prob default_delta_q_probs[DELTA_Q_CONTEXTS] = { 220, 220,
+                                                                  220 };
+#endif
+
 /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
 const aom_tree_index av1_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
   -DC_PRED,   2,          /* 0 = DC_NODE */
@@ -1399,6 +1404,9 @@
                      PARTITION_CONTEXTS);
   av1_tree_to_cdf(av1_segment_tree, fc->seg.tree_probs, fc->seg.tree_cdf);
 #endif
+#if CONFIG_DELTA_Q
+  av1_copy(fc->delta_q_prob, default_delta_q_probs);
+#endif
 }
 
 #if CONFIG_DAALA_EC
@@ -1542,6 +1550,12 @@
           av1_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
           counts->switchable_interp[i], fc->switchable_interp_prob[i]);
   }
+
+#if CONFIG_DELTA_Q
+  for (i = 0; i < DELTA_Q_CONTEXTS; ++i)
+    fc->delta_q_prob[i] =
+        mode_mv_merge_probs(pre_fc->delta_q_prob[i], counts->delta_q[i]);
+#endif
 }
 
 void av1_adapt_intra_frame_probs(AV1_COMMON *cm) {
@@ -1644,6 +1658,11 @@
   }
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
+#if CONFIG_DELTA_Q
+  for (i = 0; i < DELTA_Q_CONTEXTS; ++i)
+    fc->delta_q_prob[i] =
+        mode_mv_merge_probs(pre_fc->delta_q_prob[i], counts->delta_q[i]);
+#endif
 #if CONFIG_EXT_INTRA
   for (i = 0; i < PLANE_TYPES; ++i) {
     fc->ext_intra_probs[i] = av1_mode_mv_merge_probs(pre_fc->ext_intra_probs[i],
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 03fe2bf..11e76a9 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -138,6 +138,9 @@
   aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
   aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES];
 #endif
+#if CONFIG_DELTA_Q
+  aom_prob delta_q_prob[DELTA_Q_CONTEXTS];
+#endif
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
@@ -203,6 +206,9 @@
 #else
   nmv_context_counts mv;
 #endif
+#if CONFIG_DELTA_Q
+  unsigned int delta_q[DELTA_Q_CONTEXTS][2];
+#endif
 #if CONFIG_EXT_TX
 #if CONFIG_RECT_TX
   unsigned int tx_size_implied[TX_SIZES][TX_SIZES];
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 79c0f18..ce6032b 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -371,6 +371,11 @@
 #endif
 
 #define INTER_MODE_CONTEXTS 7
+#if CONFIG_DELTA_Q
+#define DELTA_Q_SMALL 3
+#define DELTA_Q_CONTEXTS (DELTA_Q_SMALL)
+#define DEFAULT_DELTA_Q_RES 4
+#endif
 
 /* Segment Feature Masks */
 #define MAX_MV_REF_CANDIDATES 2
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 525680f..bbcedc4 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -397,6 +397,8 @@
 #endif
 #if CONFIG_DELTA_Q
   int delta_q_present_flag;
+  // Resolution of delta quant
+  int delta_q_res;
 #endif
 } AV1_COMMON;
 
@@ -501,7 +503,6 @@
     }
     xd->fc = cm->fc;
   }
-
   xd->above_seg_context = cm->above_seg_context;
 #if CONFIG_VAR_TX
   xd->above_txfm_context = cm->above_txfm_context;
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index 849450d..eeaeb21 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -341,4 +341,9 @@
   unsigned int i;
 
   for (i = 0; i < n_counts; i++) acc[i] += cnt[i];
+
+#if CONFIG_DELTA_Q
+  for (i = 0; i < DELTA_Q_CONTEXTS; i++)
+    for (j = 0; j < 2; ++j) cm->counts.delta_q[i][j] += counts->delta_q[i][j];
+#endif
 }
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index e3c7698..790a008 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3515,6 +3515,7 @@
       }
     }
 
+    cm->delta_q_res = 1;
     if (segment_quantizer_active == 0) {
       cm->delta_q_present_flag = aom_rb_read_bit(rb);
     } else {
@@ -3522,6 +3523,7 @@
     }
     if (cm->delta_q_present_flag) {
       xd->prev_qindex = cm->base_qindex;
+      cm->delta_q_res = 1 << aom_rb_read_literal(rb, 2);
     }
   }
 #endif
@@ -3724,6 +3726,11 @@
   for (k = 0; k < SKIP_CONTEXTS; ++k)
     av1_diff_update_prob(&r, &fc->skip_probs[k], ACCT_STR);
 
+#if CONFIG_DELTA_Q
+  for (k = 0; k < DELTA_Q_CONTEXTS; ++k)
+    av1_diff_update_prob(&r, &fc->delta_q_prob[k], ACCT_STR);
+#endif
+
   if (cm->seg.enabled && cm->seg.update_map) {
     if (cm->seg.temporal_update) {
       for (k = 0; k < PREDICTION_PROBS; k++)
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 37bc0f4..ed2fc4c 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -46,6 +46,42 @@
   return (PREDICTION_MODE)aom_read_tree(r, av1_intra_mode_tree, p, ACCT_STR);
 }
 
+#if CONFIG_DELTA_Q
+static int read_delta_qindex(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
+                             MB_MODE_INFO *const mbmi, int mi_col, int mi_row) {
+  FRAME_COUNTS *counts = xd->counts;
+  int sign, abs, reduced_delta_qindex = 0;
+  BLOCK_SIZE bsize = mbmi->sb_type;
+  const int b_col = mi_col & MAX_MIB_MASK;
+  const int b_row = mi_row & MAX_MIB_MASK;
+  const int read_delta_q_flag = (b_col == 0 && b_row == 0);
+  int rem_bits, thr, bit = 1;
+
+  if ((bsize != BLOCK_64X64 || mbmi->skip == 0) && read_delta_q_flag) {
+    abs = 0;
+    while (abs < DELTA_Q_SMALL && bit) {
+      bit = aom_read(r, cm->fc->delta_q_prob[abs], ACCT_STR);
+      if (counts) counts->delta_q[abs][bit]++;
+      abs += bit;
+    }
+    if (abs == DELTA_Q_SMALL) {
+      rem_bits = aom_read_literal(r, 3, ACCT_STR);
+      thr = (1 << rem_bits) + 1;
+      abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
+    }
+
+    if (abs) {
+      sign = aom_read_bit(r, ACCT_STR);
+    } else {
+      sign = 1;
+    }
+
+    reduced_delta_qindex = sign ? -abs : abs;
+  }
+  return reduced_delta_qindex;
+}
+#endif
+
 static PREDICTION_MODE read_intra_mode_y(AV1_COMMON *cm, MACROBLOCKD *xd,
                                          aom_reader *r, int size_group) {
   const PREDICTION_MODE y_mode =
@@ -601,32 +637,10 @@
 
 #if CONFIG_DELTA_Q
   if (cm->delta_q_present_flag) {
-    int b_col = mi_col & 7;
-    int b_row = mi_row & 7;
-    int read_delta_q_flag = (b_col == 0 && b_row == 0);
-    if ((bsize != BLOCK_64X64 || mbmi->skip == 0) && read_delta_q_flag) {
-      int sign, abs, tmp, delta_qindex;
-
-      abs = 0;
-      tmp = aom_read_bit(r, ACCT_STR);
-      while (tmp == 0 && abs < 2) {
-        tmp = aom_read_bit(r, ACCT_STR);
-        abs++;
-      }
-      if (tmp == 0) {
-        abs = aom_read_literal(r, 6, ACCT_STR);
-      }
-
-      if (abs) {
-        sign = aom_read_bit(r, ACCT_STR);
-      } else {
-        sign = 1;
-      }
-
-      delta_qindex = sign ? -abs : abs;
-      xd->current_qindex = xd->prev_qindex + delta_qindex;
-      xd->prev_qindex = xd->current_qindex;
-    }
+    xd->current_qindex =
+        xd->prev_qindex +
+        read_delta_qindex(cm, xd, r, mbmi, mi_col, mi_row) * cm->delta_q_res;
+    xd->prev_qindex = xd->current_qindex;
   }
 #endif
 
@@ -1689,31 +1703,10 @@
     mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
 #if CONFIG_DELTA_Q
     if (cm->delta_q_present_flag) {
-      BLOCK_SIZE bsize = mbmi->sb_type;
-      int b_col = mi_col & 7;
-      int b_row = mi_row & 7;
-      int read_delta_q_flag = (b_col == 0 && b_row == 0);
-      if ((bsize != BLOCK_64X64 || mbmi->skip == 0) && read_delta_q_flag) {
-        int sign, abs, tmp, delta_qindex;
-
-        abs = 0;
-        tmp = aom_read_bit(r, ACCT_STR);
-        while (tmp == 0 && abs < 2) {
-          tmp = aom_read_bit(r, ACCT_STR);
-          abs++;
-        }
-        if (tmp == 0) {
-          abs = aom_read_literal(r, 6, ACCT_STR);
-        }
-        if (abs) {
-          sign = aom_read_bit(r, ACCT_STR);
-        } else {
-          sign = 1;
-        }
-        delta_qindex = sign ? -abs : abs;
-        xd->current_qindex = xd->prev_qindex + delta_qindex;
-        xd->prev_qindex = xd->current_qindex;
-      }
+      xd->current_qindex =
+          xd->prev_qindex +
+          read_delta_qindex(cm, xd, r, mbmi, mi_col, mi_row) * cm->delta_q_res;
+      xd->prev_qindex = xd->current_qindex;
     }
 #endif
     inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index fe6ffe7..1de40fc 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -33,6 +33,7 @@
 #include "av1/common/entropymode.h"
 #include "av1/common/entropymv.h"
 #include "av1/common/mvref_common.h"
+#include "av1/common/odintrin.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/reconinter.h"
 #include "av1/common/seg_common.h"
@@ -489,19 +490,38 @@
 }
 
 #if CONFIG_DELTA_Q
-static void write_delta_qindex(int delta_qindex, aom_writer *w) {
+static void write_delta_qindex(const AV1_COMMON *cm, int delta_qindex,
+                               aom_writer *w) {
   int sign = delta_qindex < 0;
   int abs = sign ? -delta_qindex : delta_qindex;
-  if (abs < 3) {
-    aom_write_literal(w, 1, abs + 1);
-  } else {
-    aom_write_literal(w, 0, 3);
-    aom_write_literal(w, abs, 6);
+  int rem_bits, thr, i = 0;
+  int smallval = abs < DELTA_Q_SMALL ? 1 : 0;
+
+  while (i < DELTA_Q_SMALL && i <= abs) {
+    int bit = (i < abs);
+    aom_write(w, bit, cm->fc->delta_q_prob[i]);
+    i++;
+  }
+
+  if (!smallval) {
+    rem_bits = OD_ILOG_NZ(abs - 1) - 1;
+    thr = (1 << rem_bits) + 1;
+    aom_write_literal(w, rem_bits, 3);
+    aom_write_literal(w, abs - thr, rem_bits);
   }
   if (abs > 0) {
     aom_write_bit(w, sign);
   }
 }
+
+static void update_delta_q_probs(AV1_COMMON *cm, aom_writer *w,
+                                 FRAME_COUNTS *counts) {
+  int k;
+
+  for (k = 0; k < DELTA_Q_CONTEXTS; ++k) {
+    av1_cond_prob_diff_update(w, &cm->fc->delta_q_prob[k], counts->delta_q[k]);
+  }
+}
 #endif
 
 static void update_skip_probs(AV1_COMMON *cm, aom_writer *w,
@@ -1126,12 +1146,14 @@
 #endif  // CONFIG_SUPERTX
 #if CONFIG_DELTA_Q
   if (cm->delta_q_present_flag) {
-    int mi_row = (-xd->mb_to_top_edge) >> 6;
-    int mi_col = (-xd->mb_to_left_edge) >> 6;
-    int super_block_upper_left = ((mi_row & 7) == 0) && ((mi_col & 7) == 0);
+    int mi_row = (-xd->mb_to_top_edge) >> (MI_SIZE_LOG2 + 3);
+    int mi_col = (-xd->mb_to_left_edge) >> (MI_SIZE_LOG2 + 3);
+    int super_block_upper_left =
+        ((mi_row & MAX_MIB_MASK) == 0) && ((mi_col & MAX_MIB_MASK) == 0);
     if ((bsize != BLOCK_64X64 || skip == 0) && super_block_upper_left) {
-      int delta_qindex = mbmi->current_q_index - xd->prev_qindex;
-      write_delta_qindex(delta_qindex, w);
+      int reduced_delta_qindex =
+          (mbmi->current_q_index - xd->prev_qindex) / cm->delta_q_res;
+      write_delta_qindex(cm, reduced_delta_qindex, w);
       xd->prev_qindex = mbmi->current_q_index;
     }
   }
@@ -1562,8 +1584,9 @@
     int mi_col = (-xd->mb_to_left_edge) >> 6;
     int super_block_upper_left = ((mi_row & 7) == 0) && ((mi_col & 7) == 0);
     if ((bsize != BLOCK_64X64 || skip == 0) && super_block_upper_left) {
-      int delta_qindex = mbmi->current_q_index - xd->prev_qindex;
-      write_delta_qindex(delta_qindex, w);
+      int reduced_delta_qindex =
+          (mbmi->current_q_index - xd->prev_qindex) / cm->delta_q_res;
+      write_delta_qindex(cm, reduced_delta_qindex, w);
       xd->prev_qindex = mbmi->current_q_index;
     }
   }
@@ -3409,6 +3432,7 @@
       cm->delta_q_present_flag = cpi->oxcf.aq_mode == DELTA_AQ;
       aom_wb_write_bit(wb, cm->delta_q_present_flag);
       if (cm->delta_q_present_flag) {
+        aom_wb_write_literal(wb, OD_ILOG_NZ(cm->delta_q_res) - 1, 2);
         xd->prev_qindex = cm->base_qindex;
       }
     }
@@ -3530,6 +3554,9 @@
 #endif
 
   update_skip_probs(cm, header_bc, counts);
+#if CONFIG_DELTA_Q
+  update_delta_q_probs(cm, header_bc, counts);
+#endif
   update_seg_probs(cpi, header_bc);
 
   for (i = 0; i < INTRA_MODES; ++i)
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index dce26dd..66de9c6 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1804,13 +1804,34 @@
                          int supertx_enabled
 #endif
                          ) {
+#if CONFIG_DELTA_Q
+  MACROBLOCK *x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+#else
   const MACROBLOCK *x = &td->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
+#endif
   const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const BLOCK_SIZE bsize = mbmi->sb_type;
 
+#if CONFIG_DELTA_Q
+  // delta quant applies to both intra and inter
+  const int super_block_upper_left = ((mi_row & 7) == 0) && ((mi_col & 7) == 0);
+
+  if (cm->delta_q_present_flag && (bsize != BLOCK_64X64 || !mbmi->skip) &&
+      super_block_upper_left) {
+    const int dq = (mbmi->current_q_index - xd->prev_qindex) / cm->delta_q_res;
+    const int absdq = abs(dq);
+    int i;
+    for (i = 0; i < absdq; ++i) {
+      td->counts->delta_q[i][1]++;
+    }
+    if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
+    xd->prev_qindex = mbmi->current_q_index;
+  }
+#endif
   if (!frame_is_intra_only(cm)) {
     FRAME_COUNTS *const counts = td->counts;
     const int inter_block = is_inter_block(mbmi);
@@ -4154,6 +4175,12 @@
   // Initialize the left context for the new SB row
   av1_zero_left_context(xd);
 
+#if CONFIG_DELTA_Q
+  // Reset delta for every tile
+  if (cm->delta_q_present_flag)
+    if (mi_row == tile_info->mi_row_start) xd->prev_qindex = cm->base_qindex;
+#endif
+
   // Code each SB in the row
   for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
        mi_col += cm->mib_size) {
@@ -4195,12 +4222,21 @@
 
 #if CONFIG_DELTA_Q
     if (cpi->oxcf.aq_mode == DELTA_AQ) {
+      // Test mode for delta quantization
       int sb_row = mi_row >> 3;
       int sb_col = mi_col >> 3;
       int sb_stride = (cm->width + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2;
       int index = ((sb_row * sb_stride + sb_col + 8) & 31) - 16;
-      int offset_qindex = index < 0 ? -index - 8 : index - 8;
-      int current_qindex = clamp(cm->base_qindex + offset_qindex, 1, 255);
+
+      // Ensure divisibility of delta_qindex by delta_q_res
+      int offset_qindex = (index < 0 ? -index - 8 : index - 8);
+      int qmask = ~(cm->delta_q_res - 1);
+      int current_qindex = clamp(cm->base_qindex + offset_qindex,
+                                 cm->delta_q_res, 256 - cm->delta_q_res);
+      current_qindex =
+          ((current_qindex - cm->base_qindex + cm->delta_q_res / 2) & qmask) +
+          cm->base_qindex;
+
       xd->delta_qindex = current_qindex - cm->base_qindex;
       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
       xd->mi[0]->mbmi.current_q_index = current_qindex;
@@ -4653,6 +4689,12 @@
   cm->use_prev_frame_mvs =
       !cm->error_resilient_mode && cm->width == cm->last_width &&
       cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
+
+#if CONFIG_DELTA_Q
+  // Fix delta q resolution for the moment
+  cm->delta_q_res = DEFAULT_DELTA_Q_RES;
+#endif
+
 #if CONFIG_EXT_REFS
   // NOTE(zoeliu): As cm->prev_frame can take neither a frame of
   //               show_exisiting_frame=1, nor can it take a frame not used as