Optimize get_nz_mag
Unroll loop and use clipping table for AOMMIN(level, 3)
Change-Id: If7d5b82f82768dcbaf859eb719d050509ae9e84f
diff --git a/av1/common/txb_common.h b/av1/common/txb_common.h
index 336e1cc..b466cf3 100644
--- a/av1/common/txb_common.h
+++ b/av1/common/txb_common.h
@@ -436,29 +436,41 @@
{ 0, 2 }, { 0, 3 }, { 0, 4 }
};
+static const uint8_t clip_max3[256] = {
+ 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+};
+
static INLINE int get_nz_mag(const uint8_t *const levels, const int bwl,
const TX_CLASS tx_class) {
int mag;
// Note: AOMMIN(level, 3) is useless for decoder since level < 3.
- mag = AOMMIN(levels[1], 3); // { 0, 1 }
- mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR], 3); // { 1, 0 }
+ mag = clip_max3[levels[1]]; // { 0, 1 }
+ mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR]]; // { 1, 0 }
- for (int idx = 0; idx < SIG_REF_DIFF_OFFSET_NUM; ++idx) {
- const int row_offset =
- ((tx_class == TX_CLASS_2D) ? sig_ref_diff_offset[idx][0]
- : ((tx_class == TX_CLASS_VERT)
- ? sig_ref_diff_offset_vert[idx][0]
- : sig_ref_diff_offset_horiz[idx][0]));
- const int col_offset =
- ((tx_class == TX_CLASS_2D) ? sig_ref_diff_offset[idx][1]
- : ((tx_class == TX_CLASS_VERT)
- ? sig_ref_diff_offset_vert[idx][1]
- : sig_ref_diff_offset_horiz[idx][1]));
- const int nb_pos =
- (row_offset << bwl) + (row_offset << TX_PAD_HOR_LOG2) + col_offset;
- mag += AOMMIN(levels[nb_pos], 3);
+ if (tx_class == TX_CLASS_2D) {
+ mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR + 1]]; // { 1, 1 }
+ mag += clip_max3[levels[2]]; // { 0, 2 }
+ mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 }
+ } else if (tx_class == TX_CLASS_VERT) {
+ mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 }
+ mag += clip_max3[levels[(3 << bwl) + (3 << TX_PAD_HOR_LOG2)]]; // { 3, 0 }
+ mag += clip_max3[levels[(4 << bwl) + (4 << TX_PAD_HOR_LOG2)]]; // { 4, 0 }
+ } else {
+ mag += clip_max3[levels[2]]; // { 0, 2 }
+ mag += clip_max3[levels[3]]; // { 0, 3 }
+ mag += clip_max3[levels[4]]; // { 0, 4 }
}
+
return mag;
}