[CFL] Uniform Q3 alpha grid with extent [-2, 2]

Expand the range of alpha to [-2, 2] in Q3.
Jointly signal the signs, including zeros.
Use the signs to give context for each quadrant
and half-axis. The (0, 0) point is excluded.
Symmetry in alpha_u == alpha_v yields 6 contexts.

Results on Subset1 (Compared to 9136ab7d with CFL enabled)

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0792 | -0.7535 | -0.7574 |  -0.0639 | -0.0843 | -0.0665 |    -0.3324

Change-Id: I250369692e92a91d9c8d174a203d441217d15063
Signed-off-by: David Michael Barr <b@rr-dav.id.au>
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index b565d5f..317db9e 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -471,8 +471,8 @@
 #if CONFIG_CFL
   // Index of the alpha Cb and alpha Cr combination
   int cfl_alpha_idx;
-  // Signs of alpha Cb and alpha Cr
-  CFL_SIGN_TYPE cfl_alpha_signs[CFL_PRED_PLANES];
+  // Joint sign of alpha Cb and alpha Cr
+  int cfl_alpha_signs;
 #endif
 
   BOUNDARY_TYPE boundary_info;
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 7c88dd0..aff6b7d 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -227,17 +227,14 @@
   assert(a <= MAX_NUM_TXB);
 }
 
-static INLINE int cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
+static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
                                    CFL_PRED_TYPE pred_type) {
-  const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
-  const int abs_alpha_q3 = cfl_alpha_mags_q3[mag_idx];
-  if (alpha_sign == CFL_SIGN_POS) {
-    return abs_alpha_q3;
-  } else {
-    assert(abs_alpha_q3 != 0);
-    assert(cfl_alpha_mags_q3[mag_idx + 1] == -abs_alpha_q3);
-    return -abs_alpha_q3;
-  }
+  const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign)
+                                                   : CFL_SIGN_V(joint_sign);
+  if (alpha_sign == CFL_SIGN_ZERO) return 0;
+  const int abs_alpha_q3 =
+      (pred_type == CFL_PRED_U) ? CFL_IDX_U(alpha_idx) : CFL_IDX_V(alpha_idx);
+  return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
 }
 
 // Predict the current transform block using CfL.
@@ -255,8 +252,8 @@
   const uint8_t *y_pix = cfl->y_down_pix;
 
   const int dc_pred = cfl->dc_pred[plane - 1];
-  const int alpha_q3 = cfl_idx_to_alpha(
-      mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
+  const int alpha_q3 =
+      cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
 
   const int avg_row =
       (row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size];
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 7e031fc..79afaff 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -60,20 +60,11 @@
   int dc_pred[CFL_PRED_PLANES];
 
   // The rate associated with each alpha codeword
-  int costs[CFL_ALPHABET_SIZE];
+  int costs[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE];
 
   int mi_row, mi_col;
 } CFL_CTX;
 
-static const int cfl_alpha_mags_q3[CFL_MAGS_SIZE] = { 0, 1, -1, 2, -2, 4, -4 };
-
-static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
-  // barrbrain's simple 1D quant ordered by subset 3 likelihood
-  { 1, 1 }, { 3, 0 }, { 3, 3 }, { 1, 0 }, { 3, 1 },
-  { 5, 5 }, { 0, 1 }, { 5, 3 }, { 5, 0 }, { 3, 5 },
-  { 1, 3 }, { 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 }
-};
-
 static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) {
   return (alpha_q3 * ((y_pix << 3) - avg_q3) + 32) >> 6;
 }
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index d00c5c8..d6ca6a0 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -5668,6 +5668,7 @@
   AVERAGE_TILE_CDFS(uv_mode_cdf)
 
 #if CONFIG_CFL
+  AVERAGE_TILE_CDFS(cfl_sign_cdf)
   AVERAGE_TILE_CDFS(cfl_alpha_cdf)
 #endif
 
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 5410383..7790136 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -3567,12 +3567,39 @@
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
 
 #if CONFIG_CFL
-static const aom_cdf_prob default_cfl_alpha_cdf[CDF_SIZE(CFL_ALPHABET_SIZE)] = {
-  AOM_ICDF(4646),  AOM_ICDF(8045),  AOM_ICDF(11243), AOM_ICDF(17092),
-  AOM_ICDF(22690), AOM_ICDF(24193), AOM_ICDF(26118), AOM_ICDF(27310),
-  AOM_ICDF(28206), AOM_ICDF(29118), AOM_ICDF(30997), AOM_ICDF(31439),
-  AOM_ICDF(32189), AOM_ICDF(32601), AOM_ICDF(32768), 0
+static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
+  AOM_ICDF(1892),  AOM_ICDF(2229),  AOM_ICDF(11464),
+  AOM_ICDF(14116), AOM_ICDF(25661), AOM_ICDF(26409),
+  AOM_ICDF(32508), AOM_ICDF(32768), 0
 };
+
+static const aom_cdf_prob
+    default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
+      { AOM_ICDF(16215), AOM_ICDF(27740), AOM_ICDF(31726), AOM_ICDF(32606),
+        AOM_ICDF(32736), AOM_ICDF(32751), AOM_ICDF(32757), AOM_ICDF(32759),
+        AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
+        AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(15213), AOM_ICDF(24615), AOM_ICDF(29704), AOM_ICDF(31974),
+        AOM_ICDF(32545), AOM_ICDF(32673), AOM_ICDF(32713), AOM_ICDF(32746),
+        AOM_ICDF(32753), AOM_ICDF(32756), AOM_ICDF(32758), AOM_ICDF(32761),
+        AOM_ICDF(32763), AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(13250), AOM_ICDF(24677), AOM_ICDF(29113), AOM_ICDF(31666),
+        AOM_ICDF(32408), AOM_ICDF(32578), AOM_ICDF(32628), AOM_ICDF(32711),
+        AOM_ICDF(32730), AOM_ICDF(32738), AOM_ICDF(32744), AOM_ICDF(32749),
+        AOM_ICDF(32752), AOM_ICDF(32756), AOM_ICDF(32759), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(24593), AOM_ICDF(30787), AOM_ICDF(32062), AOM_ICDF(32495),
+        AOM_ICDF(32656), AOM_ICDF(32707), AOM_ICDF(32735), AOM_ICDF(32747),
+        AOM_ICDF(32752), AOM_ICDF(32757), AOM_ICDF(32760), AOM_ICDF(32763),
+        AOM_ICDF(32764), AOM_ICDF(32765), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(19883), AOM_ICDF(27419), AOM_ICDF(30100), AOM_ICDF(31392),
+        AOM_ICDF(31896), AOM_ICDF(32184), AOM_ICDF(32299), AOM_ICDF(32511),
+        AOM_ICDF(32568), AOM_ICDF(32602), AOM_ICDF(32628), AOM_ICDF(32664),
+        AOM_ICDF(32680), AOM_ICDF(32691), AOM_ICDF(32708), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(15939), AOM_ICDF(24151), AOM_ICDF(27754), AOM_ICDF(29680),
+        AOM_ICDF(30651), AOM_ICDF(31267), AOM_ICDF(31527), AOM_ICDF(31868),
+        AOM_ICDF(32001), AOM_ICDF(32090), AOM_ICDF(32181), AOM_ICDF(32284),
+        AOM_ICDF(32314), AOM_ICDF(32366), AOM_ICDF(32486), AOM_ICDF(32768), 0 }
+    };
 #endif
 
 // CDF version of 'av1_kf_y_mode_prob'.
@@ -5122,6 +5149,7 @@
 #endif
 #endif  // CONFIG_DELTA_Q
 #if CONFIG_CFL
+  av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf);
   av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf);
 #endif
 #if CONFIG_INTRABC
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 3253247..0a07087 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -366,7 +366,8 @@
   od_adapt_ctx pvq_context;
 #endif  // CONFIG_PVQ
 #if CONFIG_CFL
-  aom_cdf_prob cfl_alpha_cdf[CDF_SIZE(CFL_ALPHABET_SIZE)];
+  aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)];
+  aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)];
 #endif
 } FRAME_CONTEXT;
 
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 14b0309..b71ae4d 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -334,11 +334,35 @@
 typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE;
 
 #if CONFIG_CFL
-#define CFL_ALPHABET_SIZE 15
-#define CFL_MAGS_SIZE 7
+#define CFL_ALPHABET_SIZE_LOG2 4
+#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
+#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
+#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
+#define CFL_IDX_V(idx) (idx & (CFL_ALPHABET_SIZE - 1))
 
 typedef enum { CFL_PRED_U = 0, CFL_PRED_V = 1, CFL_PRED_PLANES } CFL_PRED_TYPE;
-typedef enum { CFL_SIGN_NEG = 0, CFL_SIGN_POS = 1, CFL_SIGNS } CFL_SIGN_TYPE;
+
+typedef enum {
+  CFL_SIGN_ZERO = 0,
+  CFL_SIGN_NEG = 1,
+  CFL_SIGN_POS = 2,
+  CFL_SIGNS
+} CFL_SIGN_TYPE;
+
+// CFL_SIGN_ZERO,CFL_SIGN_ZERO is invalid
+#define CFL_JOINT_SIGNS (CFL_SIGNS * CFL_SIGNS - 1)
+// CFL_SIGN_U is equivalent to (js + 1) / 3 for js in 0 to 8
+#define CFL_SIGN_U(js) (((js + 1) * 11) >> 5)
+// CFL_SIGN_V is equivalent to (js + 1) % 3 for js in 0 to 8
+#define CFL_SIGN_V(js) ((js + 1) - CFL_SIGNS * CFL_SIGN_U(js))
+
+// There is no context when the alpha for a given plane is zero.
+// So there are 2 fewer contexts than joint signs.
+#define CFL_ALPHA_CONTEXTS (CFL_JOINT_SIGNS + 1 - CFL_SIGNS)
+#define CFL_CONTEXT_U(js) (js + 1 - CFL_SIGNS)
+// Also, the contexts are symmetric under swapping the planes.
+#define CFL_CONTEXT_V(js) \
+  (CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS)
 #endif
 
 #if CONFIG_PALETTE
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index ceb54e2..cff90b5 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -156,20 +156,22 @@
 
 #if CONFIG_CFL
 static int read_cfl_alphas(FRAME_CONTEXT *const ec_ctx, aom_reader *r,
-                           CFL_SIGN_TYPE signs_out[CFL_PRED_PLANES]) {
-  const int ind =
-      aom_read_symbol(r, ec_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE, "cfl:alpha");
-  // Signs are only coded for nonzero values
-  // sign == 0 implies negative alpha
-  // sign == 1 implies positive alpha
-  signs_out[CFL_PRED_U] = cfl_alpha_codes[ind][CFL_PRED_U]
-                              ? aom_read_bit(r, "cfl:sign")
-                              : CFL_SIGN_POS;
-  signs_out[CFL_PRED_V] = cfl_alpha_codes[ind][CFL_PRED_V]
-                              ? aom_read_bit(r, "cfl:sign")
-                              : CFL_SIGN_POS;
-
-  return ind;
+                           int *signs_out) {
+  const int joint_sign =
+      aom_read_symbol(r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, "cfl:signs");
+  int idx = 0;
+  // Magnitudes are only coded for nonzero values
+  if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
+    idx = aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE, "cfl:alpha_u")
+          << CFL_ALPHABET_SIZE_LOG2;
+  }
+  if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
+    idx += aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE, "cfl:alpha_v");
+  }
+  *signs_out = joint_sign;
+  return idx;
 }
 #endif
 
@@ -1214,7 +1216,7 @@
 #if CONFIG_CFL
     // TODO(ltrudeau) support PALETTE
     if (mbmi->uv_mode == UV_CFL_PRED) {
-      mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, mbmi->cfl_alpha_signs);
+      mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, &mbmi->cfl_alpha_signs);
     }
 #endif  // CONFIG_CFL
 
@@ -1809,7 +1811,7 @@
 #if CONFIG_CFL
     if (mbmi->uv_mode == UV_CFL_PRED) {
       mbmi->cfl_alpha_idx =
-          read_cfl_alphas(xd->tile_ctx, r, mbmi->cfl_alpha_signs);
+          read_cfl_alphas(xd->tile_ctx, r, &mbmi->cfl_alpha_signs);
     }
 #endif  // CONFIG_CFL
 
diff --git a/av1/decoder/inspection.c b/av1/decoder/inspection.c
index c243e06..b0a5002 100644
--- a/av1/decoder/inspection.c
+++ b/av1/decoder/inspection.c
@@ -103,8 +103,7 @@
 #if CONFIG_CFL
       if (mbmi->uv_mode == UV_CFL_PRED) {
         mi->cfl_alpha_idx = mbmi->cfl_alpha_idx;
-        mi->cfl_alpha_sign = (mbmi->cfl_alpha_signs[CFL_PRED_V] << CFL_PRED_V) +
-                             mbmi->cfl_alpha_signs[CFL_PRED_U];
+        mi->cfl_alpha_sign = mbmi->cfl_alpha_signs;
       } else {
         mi->cfl_alpha_idx = 0;
         mi->cfl_alpha_sign = 0;
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 43b2521..bd3591c 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1642,23 +1642,18 @@
 }
 
 #if CONFIG_CFL
-static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int ind,
-                             const CFL_SIGN_TYPE signs[CFL_SIGNS],
-                             aom_writer *w) {
-  // Check for uninitialized signs
-  if (cfl_alpha_codes[ind][CFL_PRED_U] == 0)
-    assert(signs[CFL_PRED_U] == CFL_SIGN_POS);
-  if (cfl_alpha_codes[ind][CFL_PRED_V] == 0)
-    assert(signs[CFL_PRED_V] == CFL_SIGN_POS);
-
-  // Write a symbol representing a combination of alpha Cb and alpha Cr.
-  aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
-
-  // Signs are only signaled for nonzero codes.
-  if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
-    aom_write_bit(w, signs[CFL_PRED_U]);
-  if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
-    aom_write_bit(w, signs[CFL_PRED_V]);
+static void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx, int idx,
+                             int joint_sign, aom_writer *w) {
+  aom_write_symbol(w, joint_sign, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS);
+  // Magnitudes are only signaled for nonzero codes.
+  if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
+    aom_write_symbol(w, CFL_IDX_U(idx), cdf_u, CFL_ALPHABET_SIZE);
+  }
+  if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
+    aom_write_symbol(w, CFL_IDX_V(idx), cdf_v, CFL_ALPHABET_SIZE);
+  }
 }
 #endif
 
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 7f468ea..c8e5811 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5685,19 +5685,25 @@
 }
 
 static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
-  assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
-         AOM_ICDF(CDF_PROB_TOP));
-
-  aom_cdf_prob prev_cdf = 0;
-
-  for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
-    const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
-                              (cfl_alpha_codes[c][CFL_PRED_V] != 0);
-
-    aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf;
-    prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
-
-    cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost);
+  int sign_cost[CFL_JOINT_SIGNS];
+  av1_cost_tokens_from_cdf(sign_cost, ec_ctx->cfl_sign_cdf, NULL);
+  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
+    const aom_cdf_prob *cdf_u =
+        ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
+    const aom_cdf_prob *cdf_v =
+        ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
+    int *cost_u = cfl->costs[joint_sign][CFL_PRED_U];
+    int *cost_v = cfl->costs[joint_sign][CFL_PRED_V];
+    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO)
+      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
+    else
+      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
+    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO)
+      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
+    else
+      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
+    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
+      cost_u[u] += sign_cost[joint_sign];
   }
 }
 
@@ -5722,8 +5728,6 @@
   const int *y_averages_q3 = cfl->y_averages_q3;
   const uint8_t *y_pix = cfl->y_down_pix;
 
-  CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
-
   cfl_update_costs(cfl, ec_ctx);
 
   int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
@@ -5734,47 +5738,54 @@
       cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
                      width, height, tx_size, dc_pred_v, 0, NULL);
 
-  for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
-    assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]);
+  for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
+    const int m = c * 2 + 1;
+    const int abs_alpha_q3 = c + 1;
     sse[CFL_PRED_U][m] = cfl_alpha_dist(
         y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
-        tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]);
+        tx_size, dc_pred_u, abs_alpha_q3, &sse[CFL_PRED_U][m + 1]);
     sse[CFL_PRED_V][m] = cfl_alpha_dist(
         y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
-        tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]);
+        tx_size, dc_pred_v, abs_alpha_q3, &sse[CFL_PRED_V][m + 1]);
   }
 
   int64_t dist;
   int64_t cost;
-  int64_t best_cost;
+  int64_t best_cost = INT64_MAX;
+  int best_rate = 0;
 
   // Compute least squares parameter of the entire block
   int ind = 0;
-  signs[CFL_PRED_U] = CFL_SIGN_POS;
-  signs[CFL_PRED_V] = CFL_SIGN_POS;
-  best_cost = INT64_MAX;
+  int signs = 0;
 
-  for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
-    const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
-    const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
-    for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
-      for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
+  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
+    const int sign_u = CFL_SIGN_U(joint_sign);
+    const int sign_v = CFL_SIGN_V(joint_sign);
+    const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
+    const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
+    for (int u = 0; u < size_u; u++) {
+      const int idx_u = (sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1;
+      for (int v = 0; v < size_v; v++) {
+        const int idx_v = (sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1;
         dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
                sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
         dist *= 16;
-        cost = RDCOST(x->rdmult, cfl->costs[c], dist);
+        const int rate = cfl->costs[joint_sign][CFL_PRED_U][u] +
+                         cfl->costs[joint_sign][CFL_PRED_V][v];
+        cost = RDCOST(x->rdmult, rate, dist);
         if (cost < best_cost) {
           best_cost = cost;
-          ind = c;
-          signs[CFL_PRED_U] = sign_u;
-          signs[CFL_PRED_V] = sign_v;
+          best_rate = rate;
+          ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
+          signs = joint_sign;
         }
       }
     }
   }
 
   mbmi->cfl_alpha_idx = ind;
-  return cfl->costs[ind];
+  mbmi->cfl_alpha_signs = signs;
+  return best_rate;
 }
 #endif  // CONFIG_CFL