WIP: lv_map_multi: make br multi symbol

The br_cdf and lps_cdf with a new 4-state symbol br_cdf.
The br symbol indicates whether the level is k, k+1, k+2 or >k+2
In the latter case, a new br symbol is read. Up to 4 br symbols are
read which will reach level 14 at most. Levels greater than 14 are
golomb coded.

The adapted symbol count is reduced further by this commit.
E.g. for the I-frame of ducks_take_off at cq=12, the number of adapted symbols
is reduced from 4.27M to 3.85M. About 10% reduction.

Gains seems about neutral on a limitied subset.

Change-Id: I294234dbd63fb0fa26aef297a371cba80bd67383
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index dd2177a..ff6f0aa 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -1618,18 +1618,16 @@
 
 #if CONFIG_LV_MAP
   AVERAGE_TILE_CDFS(txb_skip_cdf)
-#if CONFIG_LV_MAP_MULTI
-  AVERAGE_TILE_CDFS(coeff_base_cdf)
-#else
+#if !CONFIG_LV_MAP_MULTI
   AVERAGE_TILE_CDFS(nz_map_cdf)
 #endif
   AVERAGE_TILE_CDFS(eob_flag_cdf)
   AVERAGE_TILE_CDFS(eob_extra_cdf)
   AVERAGE_TILE_CDFS(dc_sign_cdf)
-#if !CONFIG_LV_MAP_MULTI
   AVERAGE_TILE_CDFS(coeff_base_cdf)
-#endif
+#if !CONFIG_LV_MAP_MULTI
   AVERAGE_TILE_CDFS(coeff_lps_cdf)
+#endif
   AVERAGE_TILE_CDFS(coeff_br_cdf)
 #if CONFIG_CTX1D
   AVERAGE_TILE_CDFS(eob_mode_cdf)
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 33ec743..c3433b7 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -99,8 +99,14 @@
 #define LEVEL_CONTEXTS (BR_TMP_OFFSET * BR_REF_CAT)
 
 #define NUM_BASE_LEVELS 2
+
+#if CONFIG_LV_MAP_MULTI
+#define BR_CDF_SIZE (4)
+#define COEFF_BASE_RANGE (4 * (BR_CDF_SIZE - 1))
+#else
 #define COEFF_BASE_RANGE (16 - NUM_BASE_LEVELS)
 #define BASE_RANGE_SETS 3
+#endif
 
 #define COEFF_CONTEXT_BITS 6
 #define COEFF_CONTEXT_MASK ((1 << COEFF_CONTEXT_BITS) - 1)
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 33f351e..be1ec75 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -498,9 +498,10 @@
     { 133, 128, 129, 144, 128, 116, 135, 128, 43,  101, 100, 128,
       140, 163, 158, 173, 205, 128, 165, 171, 128, 128, 210, 163,
       172, 184, 192, 176, 201, 183, 177, 190, 128, 192, 199, 144,
-      192, 192, 1,   196, 192, 255, 171, 178, 255, 128, 171, 179 } }
+      192, 192, 128, 196, 192, 255, 171, 178, 255, 128, 171, 179 } }
 };
 
+#if !CONFIG_LV_MAP_MULTI
 const aom_prob
     default_coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS] = {
       { { { 62,  128, 54,  116, 128, 51,  97,  128, 59,  68,  107, 128,
@@ -600,6 +601,7 @@
             26,  27,  128, 126, 128, 255, 63, 142, 128, 128, 1,   1,
             125, 159, 128, 173, 212, 128, 85, 189, 128, 128, 255, 171 } } }
     };
+#endif
 #if CONFIG_CTX1D
 static const aom_prob default_eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES] = {
   { { 128, 176, 157 }, { 128, 222, 198 } },
@@ -3230,7 +3232,9 @@
   av1_copy(fc->dc_sign, default_dc_sign);
   av1_copy(fc->coeff_base, default_coeff_base);
   av1_copy(fc->coeff_lps, default_coeff_lps);
+#if !CONFIG_LV_MAP_MULTI
   av1_copy(fc->coeff_br, default_coeff_br);
+#endif
 #if CONFIG_CTX1D
   av1_copy(fc->eob_mode, default_eob_mode);
   av1_copy(fc->empty_line, default_empty_line);
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 7e86fa6..d2cb009 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -144,7 +144,9 @@
   aom_prob coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
                      [COEFF_BASE_CONTEXTS];
   aom_prob coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS];
+#if !CONFIG_LV_MAP_MULTI
   aom_prob coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS];
+#endif
 #if CONFIG_CTX1D
   aom_prob eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES];
   aom_prob empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES][EMPTY_LINE_CONTEXTS];
@@ -152,10 +154,7 @@
 #endif  // CONFIG_CTX1D
 
   aom_cdf_prob txb_skip_cdf[TX_SIZES][TXB_SKIP_CONTEXTS][CDF_SIZE(2)];
-#if CONFIG_LV_MAP_MULTI
-  aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
-                             [CDF_SIZE(4)];
-#else
+#if !CONFIG_LV_MAP_MULTI
   aom_cdf_prob nz_map_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
                          [CDF_SIZE(2)];
 #endif
@@ -164,14 +163,19 @@
   aom_cdf_prob eob_extra_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]
                             [CDF_SIZE(2)];
   aom_cdf_prob dc_sign_cdf[PLANE_TYPES][DC_SIGN_CONTEXTS][CDF_SIZE(2)];
-#if !CONFIG_LV_MAP_MULTI
+#if CONFIG_LV_MAP_MULTI
+  aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
+                             [CDF_SIZE(4)];
+  aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
+                           [CDF_SIZE(BR_CDF_SIZE)];
+#else
   aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
                              [COEFF_BASE_CONTEXTS][CDF_SIZE(2)];
-#endif
   aom_cdf_prob coeff_lps_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
                             [CDF_SIZE(2)];
   aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS]
                            [LEVEL_CONTEXTS][CDF_SIZE(2)];
+#endif
 #if CONFIG_CTX1D
   aom_cdf_prob eob_mode_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][CDF_SIZE(2)];
   aom_cdf_prob empty_line_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES]
@@ -409,8 +413,10 @@
   unsigned int coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
                          [COEFF_BASE_CONTEXTS][2];
   unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS][2];
+#if !CONFIG_LV_MAP_MULTI
   unsigned int coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS]
                        [2];
+#endif
 #if CONFIG_CTX1D
   unsigned int eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES][2];
   unsigned int empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]
diff --git a/av1/common/txb_common.c b/av1/common/txb_common.c
index 836d44d..38ac984 100644
--- a/av1/common/txb_common.c
+++ b/av1/common/txb_common.c
@@ -119,7 +119,22 @@
     }
   }
 
-#if !CONFIG_LV_MAP_MULTI
+#if CONFIG_LV_MAP_MULTI
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (ctx = 0; ctx < COEFF_BASE_CONTEXTS; ++ctx) {
+        int p = fc->nz_map[tx_size][plane][ctx] * 128;
+        fc->coeff_base_cdf[tx_size][plane][ctx][0] = AOM_ICDF(p);
+        p += ((32768 - p) * fc->coeff_base[tx_size][plane][0][ctx]) >> 8;
+        fc->coeff_base_cdf[tx_size][plane][ctx][1] = AOM_ICDF(p);
+        p += ((32768 - p) * fc->coeff_base[tx_size][plane][1][ctx]) >> 8;
+        fc->coeff_base_cdf[tx_size][plane][ctx][2] = AOM_ICDF(p);
+        fc->coeff_base_cdf[tx_size][plane][ctx][3] = AOM_ICDF(32768);
+        fc->coeff_base_cdf[tx_size][plane][ctx][4] = 0;
+      }
+    }
+  }
+#else
   // Update probability models for non-zero coefficient map and eob flag.
   for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
     for (plane = 0; plane < PLANE_TYPES; ++plane) {
@@ -138,16 +153,7 @@
   for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
     for (plane = 0; plane < PLANE_TYPES; ++plane) {
       for (ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
-#if CONFIG_LV_MAP_MULTI
-        int p = fc->nz_map[tx_size][plane][ctx] * 128;
-        fc->coeff_base_cdf[tx_size][plane][ctx][0] = AOM_ICDF(p);
-        p += ((32768 - p) * fc->coeff_base[tx_size][plane][0][ctx]) >> 8;
-        fc->coeff_base_cdf[tx_size][plane][ctx][1] = AOM_ICDF(p);
-        p += ((32768 - p) * fc->coeff_base[tx_size][plane][1][ctx]) >> 8;
-        fc->coeff_base_cdf[tx_size][plane][ctx][2] = AOM_ICDF(p);
-        fc->coeff_base_cdf[tx_size][plane][ctx][3] = AOM_ICDF(32768);
-        fc->coeff_base_cdf[tx_size][plane][ctx][4] = 0;
-#else
+#if !CONFIG_LV_MAP_MULTI
         fc->nz_map_cdf[tx_size][plane][ctx][0] =
             AOM_ICDF(128 * (aom_cdf_prob)fc->nz_map[tx_size][plane][ctx]);
         fc->nz_map_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
@@ -172,6 +178,23 @@
 
   for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
     for (plane = 0; plane < PLANE_TYPES; ++plane) {
+#if CONFIG_LV_MAP_MULTI
+      for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
+        int p = 32768 - fc->coeff_lps[tx_size][plane][ctx] * 128;
+        int sum = p;
+        fc->coeff_br_cdf[tx_size][plane][ctx][0] = AOM_ICDF(sum);
+        sum += ((32768 - sum) * p) >> 15;
+        fc->coeff_br_cdf[tx_size][plane][ctx][1] = AOM_ICDF(sum);
+        sum += ((32768 - sum) * p) >> 15;
+        fc->coeff_br_cdf[tx_size][plane][ctx][2] = AOM_ICDF(sum);
+        fc->coeff_br_cdf[tx_size][plane][ctx][3] = AOM_ICDF(32768);
+        fc->coeff_br_cdf[tx_size][plane][ctx][4] = AOM_ICDF(32768);
+        // printf("br_cdf: %d %d %2d : %3d %3d %3d\n", tx_size, plane, ctx,
+        //        fc->coeff_br_cdf[tx_size][plane][ctx][0] >> 7,
+        //        fc->coeff_br_cdf[tx_size][plane][ctx][1] >> 7,
+        //        fc->coeff_br_cdf[tx_size][plane][ctx][2] >> 7);
+      }
+#else
       for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
         fc->coeff_lps_cdf[tx_size][plane][ctx][0] =
             AOM_ICDF(128 * (aom_cdf_prob)fc->coeff_lps[tx_size][plane][ctx]);
@@ -187,6 +210,7 @@
           fc->coeff_br_cdf[tx_size][plane][br][ctx][2] = 0;
         }
       }
+#endif
     }
   }
 #if CONFIG_CTX1D
diff --git a/av1/common/txb_common.h b/av1/common/txb_common.h
index 2043be6..c609f29 100644
--- a/av1/common/txb_common.h
+++ b/av1/common/txb_common.h
@@ -209,6 +209,7 @@
   0, 0, 1, 1, 2, 2, 3, 3, 3,
 };
 
+#if !CONFIG_LV_MAP_MULTI
 static const int coeff_to_br_index[COEFF_BASE_RANGE] = {
   0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
 };
@@ -220,6 +221,7 @@
 static const int br_extra_bits[BASE_RANGE_SETS] = {
   1, 2, 3,
 };
+#endif
 
 #define BR_MAG_OFFSET 1
 // TODO(angiebird): optimize this function by using a table to map from
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index a89e39c..edcb0aa 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -286,6 +286,19 @@
 
       ctx = get_br_ctx(levels, scan[c], bwl, level_counts[scan[c]]);
 
+#if CONFIG_LV_MAP_MULTI
+      for (idx = 0; idx < COEFF_BASE_RANGE / (BR_CDF_SIZE - 1); ++idx) {
+        int k = av1_read_record_symbol4(
+            counts, r, ec_ctx->coeff_br_cdf[txs_ctx][plane_type][ctx],
+            BR_CDF_SIZE, ACCT_STR);
+        *level += k;
+        if (k < BR_CDF_SIZE - 1) break;
+      }
+      if (*level <= NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+        cul_level += *level;
+        continue;
+      }
+#else
       for (idx = 0; idx < BASE_RANGE_SETS; ++idx) {
         // printf("br: %d %d %d %d\n", txs_ctx, plane_type, idx, ctx);
         if (av1_read_record_bin(
@@ -318,7 +331,7 @@
       }
 
       if (idx < BASE_RANGE_SETS) continue;
-
+#endif
       // decode 0-th order Golomb code
       *level = COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS;
       // Save golomb in tcoeffs because adding it to level may incur overflow
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 9c90299..24987f8 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -75,7 +75,9 @@
   int base_cost[NUM_BASE_LEVELS][COEFF_BASE_CONTEXTS][2];
 #endif
   int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
+#if !CONFIG_LV_MAP_MULTI
   int br_cost[BASE_RANGE_SETS][LEVEL_CONTEXTS][2];
+#endif
 #if CONFIG_CTX1D
   int eob_mode_cost[TX_CLASSES][2];
   int empty_line_cost[TX_CLASSES][EMPTY_LINE_CONTEXTS][2];
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 37df7b2..e2cce4a 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -469,6 +469,15 @@
       ctx = get_br_ctx(levels, scan[c], bwl, level_counts[scan[c]]);
 
       int base_range = level - 1 - NUM_BASE_LEVELS;
+#if CONFIG_LV_MAP_MULTI
+      for (idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
+        int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
+        aom_write_cdf4(w, k, ec_ctx->coeff_br_cdf[txs_ctx][plane_type][ctx],
+                       BR_CDF_SIZE);
+        if (k < BR_CDF_SIZE - 1) break;
+      }
+      if (base_range < COEFF_BASE_RANGE) continue;
+#else
       int br_set_idx = 0;
       int br_base = 0;
       int br_offset = 0;
@@ -501,7 +510,7 @@
       }
 
       if (br_set_idx < BASE_RANGE_SETS) continue;
-
+#endif
       // use 0-th order Golomb code to handle the residual level.
       write_golomb(
           w, abs(tcoeff[scan[c]]) - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
@@ -2271,6 +2280,16 @@
       ctx = get_br_ctx(levels, scan[c], bwl, level_counts[scan[c]]);
 
       int base_range = level - 1 - NUM_BASE_LEVELS;
+#if CONFIG_LV_MAP_MULTI
+      for (idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
+        int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
+        // printf("br_update: %d %d %2d : %2d %d\n", txsize_ctx, plane, ctx,
+        // base_range, k);
+        update_cdf(ec_ctx->coeff_br_cdf[txsize_ctx][plane_type][ctx], k,
+                   BR_CDF_SIZE);
+        if (k < BR_CDF_SIZE - 1) break;
+      }
+#else
       int br_set_idx = base_range < COEFF_BASE_RANGE
                            ? coeff_to_br_index[base_range]
                            : BASE_RANGE_SETS;
@@ -2304,6 +2323,7 @@
           update_bin(ec_ctx->coeff_br_cdf[txsize_ctx][plane_type][idx][ctx], 0,
                      2);
       }
+#endif
       // use 0-th order Golomb code to handle the residual level.
     }
   }
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 4b4de0d..4173020 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -569,14 +569,36 @@
           av1_cost_tokens_from_cdf(
               pcost->base_cost[layer][ctx],
               fc->coeff_base_cdf[tx_size][plane][layer][ctx], NULL);
-#endif
       for (int br = 0; br < BASE_RANGE_SETS; ++br)
         for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx)
           av1_cost_tokens_from_cdf(pcost->br_cost[br][ctx],
                                    fc->coeff_br_cdf[tx_size][plane][br][ctx],
                                    NULL);
+#endif
 
       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
+#if CONFIG_LV_MAP_MULTI
+        int br_rate[BR_CDF_SIZE];
+        int prev_cost = 0;
+        int i, j;
+        av1_cost_tokens_from_cdf(br_rate, fc->coeff_br_cdf[tx_size][plane][ctx],
+                                 NULL);
+        // printf("br_rate: ");
+        // for(j = 0; j < BR_CDF_SIZE; j++)
+        //  printf("%4d ", br_rate[j]);
+        // printf("\n");
+        for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
+          for (j = 0; j < BR_CDF_SIZE - 1; j++) {
+            pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
+          }
+          prev_cost += br_rate[j];
+        }
+        pcost->lps_cost[ctx][i] = prev_cost;
+// printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
+// for (i = 0; i <= COEFF_BASE_RANGE; i++)
+//  printf("%5d ", pcost->lps_cost[ctx][i]);
+// printf("\n");
+#else
         int lps_rate[2];
         av1_cost_tokens_from_cdf(lps_rate,
                                  fc->coeff_lps_cdf[tx_size][plane][ctx], NULL);
@@ -611,6 +633,7 @@
           }
           // load the base range cost
         }
+#endif
       }
 #if CONFIG_CTX1D
       for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)