| /* |
| * Copyright (c) 2025, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 3-Clause Clear License |
| * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear |
| * License was not distributed with this source code in the LICENSE file, you |
| * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the |
| * Alliance for Open Media Patent License 1.0 was not distributed with this |
| * source code in the PATENTS file, you can obtain it at |
| * aomedia.org/license/patent-license/. |
| */ |
| |
| #include <immintrin.h> |
| |
| #include "config/aom_dsp_rtcd.h" |
| #include "aom_dsp/entdec.h" |
| #include "aom_dsp/entcode.h" |
| |
| /*Decodes a symbol given an inverse cumulative distribution function (CDF) |
| table in Q15. |
| icdf: CDF_PROB_TOP minus the CDF, such that symbol s falls in the range |
| [s > 0 ? (CDF_PROB_TOP - icdf[s - 1]) : 0, CDF_PROB_TOP - icdf[s]). |
| The values must be monotonically non-increasing, and icdf[nsyms - 1] |
| must be 0. |
| nsyms: The number of symbols in the alphabet. |
| This should be at most 16. |
| Return: The decoded symbol s.*/ |
| int od_ec_decode_cdf_q15_avx2(od_ec_dec *dec, const uint16_t *icdf, int nsyms) { |
| uint16_t scaled_cdf[16]; |
| |
| __m256i cdf = _mm256_lddqu_si256((__m256i *)icdf); |
| cdf = _mm256_srli_epi16(cdf, EC_PROB_SHIFT); |
| cdf = _mm256_slli_epi16(cdf, EC_PROB_SHIFT - 2); |
| __m256i inc = _mm256_lddqu_si256((__m256i *)av1_prob_inc_tbl[nsyms - 2]); |
| __m256i mask = _mm256_srai_epi16(inc, 15); |
| inc = _mm256_slli_epi16(inc, EC_PROB_SHIFT - 6); |
| cdf = _mm256_add_epi16(cdf, inc); |
| __m256i rng = _mm256_set1_epi16(dec->rng); |
| __m256i rngv = _mm256_srli_epi16(rng, 8); |
| rngv = _mm256_slli_epi16(rngv, 8); |
| __m256i sc_cdf = _mm256_mulhi_epu16(cdf, rngv); |
| sc_cdf = _mm256_slli_epi16(sc_cdf, 3); |
| od_ec_window dif = dec->dif; |
| __m256i difv = _mm256_set1_epi16((int16_t)(dif >> (OD_EC_WINDOW_SIZE - 16))); |
| difv = _mm256_or_si256(mask, difv); |
| __m256i cmp_min = _mm256_min_epu16(sc_cdf, difv); |
| __m256i gt = _mm256_cmpeq_epi16(sc_cdf, cmp_min); |
| __m256i retv = _mm256_hadd_epi16(gt, gt); |
| retv = _mm256_hadd_epi16(retv, retv); |
| retv = _mm256_hadd_epi16(retv, retv); |
| __m128i retv_hi = _mm256_extractf128_si256(retv, 1); |
| __m128i retv_lo = _mm256_castsi256_si128(retv); |
| retv_lo = _mm_add_epi16(retv_lo, retv_hi); |
| int16_t ret = (int16_t)_mm_extract_epi16(retv_lo, 0); |
| ret = 16 + ret; |
| |
| __m256i sc_cdf1 = _mm256_permute2x128_si256(sc_cdf, rng, 0x02); |
| sc_cdf1 = _mm256_alignr_epi8(sc_cdf, sc_cdf1, 14); |
| __m256i sc_cdf_diff = _mm256_sub_epi16(sc_cdf1, sc_cdf); |
| _mm256_storeu_si256((__m256i *)scaled_cdf, sc_cdf); |
| uint16_t scaled_cdf_diff[16]; |
| _mm256_storeu_si256((__m256i *)scaled_cdf_diff, sc_cdf_diff); |
| unsigned v = scaled_cdf[ret]; |
| unsigned r = scaled_cdf_diff[ret]; |
| dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16); |
| |
| return od_ec_dec_normalize(dec, dif, r, ret); |
| } |