Port optimize_b cache optimizations to AV1. Bring the following libvpx commits to aom: e446ffd Cache optimizations in optimize_b() 50d3629 Repack vp9_token_state Saves 24600 bytes of stack in the default configuration. Change-Id: If9d6506cf3fe1c34ab639dedb3ef62a996293781
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index c2c68a5..23607e2 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -73,12 +73,13 @@ } typedef struct av1_token_state { - int rate; int64_t error; - int next; + int rate; + int16_t next; int16_t token; tran_low_t qc; tran_low_t dqc; + uint8_t best_index; } av1_token_state; // These numbers are empirically obtained. @@ -114,7 +115,6 @@ struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); av1_token_state tokens[MAX_TX_SQUARE + 1][2]; - unsigned best_index[MAX_TX_SQUARE + 1][2]; uint8_t token_cache[MAX_TX_SQUARE]; const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); @@ -242,7 +242,7 @@ tokens[i][0].next = next; tokens[i][0].qc = x; tokens[i][0].dqc = dqcoeff[rc]; - best_index[i][0] = best; + tokens[i][0].best_index = best; /* Evaluate the second possibility for this state. */ rate0 = tokens[next][0].rate; @@ -282,7 +282,6 @@ x -= 2 * sz + 1; } else { tokens[i][1] = tokens[i][0]; - best_index[i][1] = best_index[i][0]; next = i; if (UNLIKELY(!(--band_left))) { @@ -392,7 +391,7 @@ tokens[i][1].dqc = 0; } - best_index[i][1] = best; + tokens[i][1].best_index = best; /* Finally, make this the new head of the trellis. */ next = i; } else { @@ -411,7 +410,7 @@ tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1); tokens[next][1].token = ZERO_TOKEN; } - best_index[i][0] = best_index[i][1] = 0; + tokens[i][0].best_index = tokens[i][1].best_index = 0; shortcut = (tokens[next][0].rate != tokens[next][1].rate); /* Don't update next, because we didn't add a new node. */ } @@ -445,7 +444,7 @@ dqcoeff[rc] = tokens[i][best].dqc; next = tokens[i][best].next; - best = best_index[i][best]; + best = tokens[i][best].best_index; } final_eob++;