NEW_MULTISYMBOL: Code extra_bits using multi-symbols.

Code the extra_bits using up to 5 non-adaptive symbols,
starting from the LSB. The number of skipped bits is
truncated to fit into the multi-symbol groups.

Change-Id: I147b5b0c3815bd39c338ee9a4779638ae75d7ab5
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index bef2a71..d47c678 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -35,6 +35,54 @@
 };
 /* clang-format on */
 
+#if CONFIG_NEW_MULTISYMBOL
+/* Extra bits coded from LSB to MSB */
+const aom_cdf_prob av1_cat1_cdf0[CDF_SIZE(2)] = { 20352, 32768, 0 };
+const aom_cdf_prob *av1_cat1_cdf[] = { av1_cat1_cdf0 };
+
+const aom_cdf_prob av1_cat2_cdf0[CDF_SIZE(4)] = { 11963, 21121, 27719, 32768,
+                                                  0 };
+const aom_cdf_prob *av1_cat2_cdf[] = { av1_cat2_cdf0 };
+const aom_cdf_prob av1_cat3_cdf0[CDF_SIZE(8)] = {
+  7001, 12802, 17911, 22144, 25503, 28286, 30737, 32768, 0
+};
+const aom_cdf_prob *av1_cat3_cdf[] = { av1_cat3_cdf0 };
+
+const aom_cdf_prob av1_cat4_cdf0[CDF_SIZE(16)] = {
+  3934,  7460,  10719, 13640, 16203, 18500, 20624, 22528, 24316,
+  25919, 27401, 28729, 29894, 30938, 31903, 32768, 0
+};
+const aom_cdf_prob *av1_cat4_cdf[] = { av1_cat4_cdf0 };
+
+const aom_cdf_prob av1_cat5_cdf0[CDF_SIZE(16)] = {
+  2942,  5794,  8473,  11069, 13469, 15795, 17980, 20097, 21952,
+  23750, 25439, 27076, 28589, 30056, 31434, 32768, 0
+};
+const aom_cdf_prob av1_cat5_cdf1[CDF_SIZE(2)] = { 23040, 32768, 0 };
+const aom_cdf_prob *av1_cat5_cdf[] = { av1_cat5_cdf0, av1_cat5_cdf1 };
+
+const aom_cdf_prob av1_cat6_cdf0[CDF_SIZE(16)] = { 2382,  4727,  7036,  9309,
+                                                   11512, 13681, 15816, 17918,
+                                                   19892, 21835, 23748, 25632,
+                                                   27458, 29255, 31024, 32768 };
+const aom_cdf_prob av1_cat6_cdf1[CDF_SIZE(16)] = { 9314,  15584, 19741, 22540,
+                                                   25391, 27310, 28583, 29440,
+                                                   30493, 31202, 31672, 31988,
+                                                   32310, 32527, 32671, 32768 };
+const aom_cdf_prob av1_cat6_cdf2[CDF_SIZE(16)] = { 29548, 31129, 31960, 32004,
+                                                   32473, 32498, 32511, 32512,
+                                                   32745, 32757, 32764, 32764,
+                                                   32768, 32768, 32768, 32768 };
+const aom_cdf_prob av1_cat6_cdf3[CDF_SIZE(16)] = { 32006, 32258, 32510, 32512,
+                                                   32638, 32639, 32640, 32640,
+                                                   32766, 32767, 32768, 32768,
+                                                   32768, 32768, 32768, 32768 };
+const aom_cdf_prob av1_cat6_cdf4[CDF_SIZE(4)] = { 32513, 32641, 32769, 32768 };
+const aom_cdf_prob *av1_cat6_cdf[] = {
+  av1_cat6_cdf0, av1_cat6_cdf1, av1_cat6_cdf2, av1_cat6_cdf3, av1_cat6_cdf4
+};
+#endif
+/* Extra bits coded from MSB to LSB */
 const aom_prob av1_cat1_prob[] = { 159 };
 const aom_prob av1_cat2_prob[] = { 165, 145 };
 const aom_prob av1_cat3_prob[] = { 173, 148, 140 };
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 33f287a..fee9a83 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -99,11 +99,23 @@
 DECLARE_ALIGNED(16, extern const uint8_t, av1_cat4_prob[4]);
 DECLARE_ALIGNED(16, extern const uint8_t, av1_cat5_prob[5]);
 DECLARE_ALIGNED(16, extern const uint8_t, av1_cat6_prob[18]);
+#if CONFIG_NEW_MULTISYMBOL
+extern const aom_cdf_prob *av1_cat1_cdf[];
+extern const aom_cdf_prob *av1_cat2_cdf[];
+extern const aom_cdf_prob *av1_cat3_cdf[];
+extern const aom_cdf_prob *av1_cat4_cdf[];
+extern const aom_cdf_prob *av1_cat5_cdf[];
+extern const aom_cdf_prob *av1_cat6_cdf[];
+#endif
 
 #define EOB_MODEL_TOKEN 3
 
 typedef struct {
+#if CONFIG_NEW_MULTISYMBOL
+  const aom_cdf_prob **cdf;
+#else
   const aom_prob *prob;
+#endif
   int len;
   int base_val;
   const int16_t *cost;
@@ -125,6 +137,10 @@
   int tx_offset = (int)(tx_size - TX_4X4);
 #endif
   int bits = (int)bit_depth + 3 + tx_offset;
+#if CONFIG_NEW_MULTISYMBOL
+  // Round up
+  bits = AOMMIN((int)sizeof(av1_cat6_prob), ((bits + 3) & ~3));
+#endif
   assert(bits <= (int)sizeof(av1_cat6_prob));
   return bits;
 }
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 9a1510b..8977a74 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -42,12 +42,30 @@
     if (counts) ++coef_counts[band][ctx][token]; \
   } while (0)
 
+#if CONFIG_NEW_MULTISYMBOL
+#define READ_COEFF(prob_name, cdf_name, num, r) read_coeff(cdf_name, num, r);
+static INLINE int read_coeff(const aom_cdf_prob *const *cdf, int n,
+                             aom_reader *r) {
+  int val = 0;
+  int i = 0;
+  int count = 0;
+  while (count < n) {
+    const int size = AOMMIN(n - count, 4);
+    val |= aom_read_cdf(r, cdf[i++], 1 << size, ACCT_STR) << count;
+    count += size;
+  }
+  return val;
+}
+#else
+#define READ_COEFF(prob_name, cdf_name, num, r) read_coeff(prob_name, num, r);
 static INLINE int read_coeff(const aom_prob *probs, int n, aom_reader *r) {
   int i, val = 0;
   for (i = 0; i < n; ++i) val = (val << 1) | aom_read(r, probs[i], ACCT_STR);
   return val;
 }
 
+#endif
+
 static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
                         TX_SIZE tx_size, TX_TYPE tx_type, const int16_t *dq,
 #if CONFIG_NEW_QUANT
@@ -79,6 +97,7 @@
   aom_cdf_prob(*cdf_head)[CDF_SIZE(ENTROPY_TOKENS)];
   aom_cdf_prob(*cdf_tail)[CDF_SIZE(ENTROPY_TOKENS)];
   int val = 0;
+
 #if !CONFIG_EC_ADAPT
   unsigned int *blockz_count;
 #endif
@@ -171,19 +190,19 @@
       case THREE_TOKEN:
       case FOUR_TOKEN: val = token; break;
       case CATEGORY1_TOKEN:
-        val = CAT1_MIN_VAL + read_coeff(av1_cat1_prob, 1, r);
+        val = CAT1_MIN_VAL + READ_COEFF(av1_cat1_prob, av1_cat1_cdf, 1, r);
         break;
       case CATEGORY2_TOKEN:
-        val = CAT2_MIN_VAL + read_coeff(av1_cat2_prob, 2, r);
+        val = CAT2_MIN_VAL + READ_COEFF(av1_cat2_prob, av1_cat2_cdf, 2, r);
         break;
       case CATEGORY3_TOKEN:
-        val = CAT3_MIN_VAL + read_coeff(av1_cat3_prob, 3, r);
+        val = CAT3_MIN_VAL + READ_COEFF(av1_cat3_prob, av1_cat3_cdf, 3, r);
         break;
       case CATEGORY4_TOKEN:
-        val = CAT4_MIN_VAL + read_coeff(av1_cat4_prob, 4, r);
+        val = CAT4_MIN_VAL + READ_COEFF(av1_cat4_prob, av1_cat4_cdf, 4, r);
         break;
       case CATEGORY5_TOKEN:
-        val = CAT5_MIN_VAL + read_coeff(av1_cat5_prob, 5, r);
+        val = CAT5_MIN_VAL + READ_COEFF(av1_cat5_prob, av1_cat5_cdf, 5, r);
         break;
       case CATEGORY6_TOKEN: {
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -193,8 +212,8 @@
         const int skip_bits = (int)sizeof(av1_cat6_prob) -
                               av1_get_cat6_extrabits_size(tx_size, 8);
 #endif
-        val = CAT6_MIN_VAL +
-              read_coeff(av1_cat6_prob + skip_bits, 18 - skip_bits, r);
+        val = CAT6_MIN_VAL + READ_COEFF(av1_cat6_prob + skip_bits, av1_cat6_cdf,
+                                        18 - skip_bits, r);
       } break;
     }
 
@@ -269,19 +288,19 @@
       case THREE_TOKEN:
       case FOUR_TOKEN: val = token; break;
       case CATEGORY1_TOKEN:
-        val = CAT1_MIN_VAL + read_coeff(av1_cat1_prob, 1, r);
+        val = CAT1_MIN_VAL + READ_COEFF(av1_cat1_prob, av1_cat1_cdf, 1, r);
         break;
       case CATEGORY2_TOKEN:
-        val = CAT2_MIN_VAL + read_coeff(av1_cat2_prob, 2, r);
+        val = CAT2_MIN_VAL + READ_COEFF(av1_cat2_prob, av1_cat2_cdf, 2, r);
         break;
       case CATEGORY3_TOKEN:
-        val = CAT3_MIN_VAL + read_coeff(av1_cat3_prob, 3, r);
+        val = CAT3_MIN_VAL + READ_COEFF(av1_cat3_prob, av1_cat3_cdf, 3, r);
         break;
       case CATEGORY4_TOKEN:
-        val = CAT4_MIN_VAL + read_coeff(av1_cat4_prob, 4, r);
+        val = CAT4_MIN_VAL + READ_COEFF(av1_cat4_prob, av1_cat4_cdf, 4, r);
         break;
       case CATEGORY5_TOKEN:
-        val = CAT5_MIN_VAL + read_coeff(av1_cat5_prob, 5, r);
+        val = CAT5_MIN_VAL + READ_COEFF(av1_cat5_prob, av1_cat5_cdf, 5, r);
         break;
       case CATEGORY6_TOKEN: {
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -291,8 +310,8 @@
         const int skip_bits = (int)sizeof(av1_cat6_prob) -
                               av1_get_cat6_extrabits_size(tx_size, 8);
 #endif
-        val = CAT6_MIN_VAL +
-              read_coeff(av1_cat6_prob + skip_bits, 18 - skip_bits, r);
+        val = CAT6_MIN_VAL + READ_COEFF(av1_cat6_prob + skip_bits, av1_cat6_cdf,
+                                        18 - skip_bits, r);
       } break;
     }
 #else  // CONFIG_EC_MULTISYMBOL
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index adab304..2fe1f9b 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -818,6 +818,34 @@
 }
 #endif  // CONFIG_SUPERTX
 
+#if CONFIG_NEW_MULTISYMBOL
+static INLINE void write_coeff_extra(const aom_cdf_prob *const *cdf, int val,
+                                     int n, aom_writer *w) {
+  // Code the extra bits from LSB to MSB in groups of 4
+  int i = 0;
+  int count = 0;
+  while (count < n) {
+    const int size = AOMMIN(n - count, 4);
+    const int mask = (1 << size) - 1;
+    aom_write_cdf(w, val & mask, cdf[i++], 1 << size);
+    val >>= size;
+    count += size;
+  }
+}
+#else
+static INLINE void write_coeff_extra(const aom_prob *pb, int value,
+                                     int num_bits, int skip_bits, aom_writer *w,
+                                     TOKEN_STATS *token_stats) {
+  // Code the extra bits from MSB to LSB 1 bit at a time
+  int index;
+  for (index = skip_bits; index < num_bits; ++index) {
+    const int shift = num_bits - index - 1;
+    const int bb = (value >> shift) & 1;
+    aom_write_record(w, bb, pb[index], token_stats);
+  }
+}
+#endif
+
 #if CONFIG_NEW_TOKENSET
 static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
                            const TOKENEXTRA *const stop,
@@ -831,7 +859,6 @@
 
   while (p < stop && p->token != EOSB_TOKEN) {
     const int token = p->token;
-    aom_tree_index index = 0;
     const av1_extra_bit *const extra_bits = &av1_extra_bits[token];
 
     if (token == BLOCK_Z_TOKEN) {
@@ -853,30 +880,23 @@
     if (extra_bits->base_val) {
       const int bit_string = p->extra;
       const int bit_string_length = extra_bits->len;  // Length of extra bits to
+      const int is_cat6 = (extra_bits->base_val == CAT6_MIN_VAL);
       // be written excluding
       // the sign bit.
-      int skip_bits = (extra_bits->base_val == CAT6_MIN_VAL)
+      int skip_bits = is_cat6
                           ? (int)sizeof(av1_cat6_prob) -
                                 av1_get_cat6_extrabits_size(tx_size, bit_depth)
                           : 0;
 
-      if (bit_string_length > 0) {
-        const unsigned char *pb = extra_bits->prob;
-        const int value = bit_string >> 1;
-        const int num_bits = bit_string_length;  // number of bits in value
-        assert(num_bits > 0);
-
-        for (index = 0; index < num_bits; ++index) {
-          const int shift = num_bits - index - 1;
-          const int bb = (value >> shift) & 1;
-          if (skip_bits) {
-            --skip_bits;
-            assert(!bb);
-          } else {
-            aom_write_record(w, bb, pb[index], token_stats);
-          }
-        }
-      }
+      assert(!(bit_string >> (bit_string_length - skip_bits + 1)));
+      if (bit_string_length > 0)
+#if CONFIG_NEW_MULTISYMBOL
+        write_coeff_extra(extra_bits->cdf, bit_string >> 1,
+                          bit_string_length - skip_bits, w);
+#else
+        write_coeff_extra(extra_bits->prob, bit_string >> 1, bit_string_length,
+                          skip_bits, w, token_stats);
+#endif
 
       aom_write_bit_record(w, bit_string & 1, token_stats);
     }
@@ -904,7 +924,6 @@
 
   while (p < stop && p->token != EOSB_TOKEN) {
     const int token = p->token;
-    aom_tree_index index = 0;
 #if !CONFIG_EC_MULTISYMBOL
     const struct av1_token *const coef_encoding = &av1_coef_encodings[token];
     int coef_value = coef_encoding->value;
@@ -951,30 +970,24 @@
     if (extra_bits->base_val) {
       const int bit_string = p->extra;
       const int bit_string_length = extra_bits->len;  // Length of extra bits to
-                                                      // be written excluding
-                                                      // the sign bit.
+      // be written excluding
+      // the sign bit.
       int skip_bits = (extra_bits->base_val == CAT6_MIN_VAL)
                           ? (int)sizeof(av1_cat6_prob) -
                                 av1_get_cat6_extrabits_size(tx_size, bit_depth)
                           : 0;
+
+      assert(!(bit_string >> (bit_string_length - skip_bits + 1)));
       if (bit_string_length > 0) {
-        const unsigned char *pb = extra_bits->prob;
-        const int value = bit_string >> 1;
-        const int num_bits = bit_string_length;  // number of bits in value
-        assert(num_bits > 0);
-
-        for (index = 0; index < num_bits; ++index) {
-          const int shift = num_bits - index - 1;
-          const int bb = (value >> shift) & 1;
-          if (skip_bits) {
-            --skip_bits;
-            assert(!bb);
-          } else {
-            aom_write_record(w, bb, pb[index], token_stats);
-          }
-        }
+#if CONFIG_NEW_MULTISYMBOL
+        skip_bits &= ~3;
+        write_coeff_extra(extra_bits->cdf, bit_string >> 1,
+                          bit_string_length - skip_bits, w);
+#else
+        write_coeff_extra(extra_bits->prob, bit_string >> 1, bit_string_length,
+                          skip_bits, w, token_stats);
+#endif
       }
-
       aom_write_bit_record(w, bit_string & 1, token_stats);
     }
     ++p;
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 6a9a755..44b09e8 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -229,6 +229,22 @@
   0, 3, 6, 9, 12, 18, 24, 30
 };
 
+#if CONFIG_NEW_MULTISYMBOL
+const av1_extra_bit av1_extra_bits[ENTROPY_TOKENS] = {
+  { 0, 0, 0, zero_cost },                        // ZERO_TOKEN
+  { 0, 0, 1, sign_cost },                        // ONE_TOKEN
+  { 0, 0, 2, sign_cost },                        // TWO_TOKEN
+  { 0, 0, 3, sign_cost },                        // THREE_TOKEN
+  { 0, 0, 4, sign_cost },                        // FOUR_TOKEN
+  { av1_cat1_cdf, 1, CAT1_MIN_VAL, cat1_cost },  // CATEGORY1_TOKEN
+  { av1_cat2_cdf, 2, CAT2_MIN_VAL, cat2_cost },  // CATEGORY2_TOKEN
+  { av1_cat3_cdf, 3, CAT3_MIN_VAL, cat3_cost },  // CATEGORY3_TOKEN
+  { av1_cat4_cdf, 4, CAT4_MIN_VAL, cat4_cost },  // CATEGORY4_TOKEN
+  { av1_cat5_cdf, 5, CAT5_MIN_VAL, cat5_cost },  // CATEGORY5_TOKEN
+  { av1_cat6_cdf, 18, CAT6_MIN_VAL, 0 },         // CATEGORY6_TOKEN
+  { 0, 0, 0, zero_cost }                         // EOB_TOKEN
+};
+#else
 const av1_extra_bit av1_extra_bits[ENTROPY_TOKENS] = {
   { 0, 0, 0, zero_cost },                         // ZERO_TOKEN
   { 0, 0, 1, sign_cost },                         // ONE_TOKEN
@@ -243,6 +259,7 @@
   { av1_cat6_prob, 18, CAT6_MIN_VAL, 0 },         // CATEGORY6_TOKEN
   { 0, 0, 0, zero_cost }                          // EOB_TOKEN
 };
+#endif
 
 #if !CONFIG_EC_MULTISYMBOL
 const struct av1_token av1_coef_encodings[ENTROPY_TOKENS] = {