| /* | 
 |  * Copyright (c) 2025, Alliance for Open Media. All rights reserved | 
 |  * | 
 |  * This source code is subject to the terms of the BSD 3-Clause Clear License | 
 |  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear | 
 |  * License was not distributed with this source code in the LICENSE file, you | 
 |  * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the | 
 |  * Alliance for Open Media Patent License 1.0 was not distributed with this | 
 |  * source code in the PATENTS file, you can obtain it at | 
 |  * aomedia.org/license/patent-license/. | 
 |  */ | 
 |  | 
 | #include <immintrin.h> | 
 |  | 
 | #include "config/aom_dsp_rtcd.h" | 
 | #include "aom_dsp/entdec.h" | 
 | #include "aom_dsp/entcode.h" | 
 |  | 
 | /*Decodes a symbol given an inverse cumulative distribution function (CDF) | 
 |    table in Q15. | 
 |   icdf: CDF_PROB_TOP minus the CDF, such that symbol s falls in the range | 
 |          [s > 0 ? (CDF_PROB_TOP - icdf[s - 1]) : 0, CDF_PROB_TOP - icdf[s]). | 
 |         The values must be monotonically non-increasing, and icdf[nsyms - 1] | 
 |          must be 0. | 
 |   nsyms: The number of symbols in the alphabet. | 
 |          This should be at most 16. | 
 |   Return: The decoded symbol s.*/ | 
 | int od_ec_decode_cdf_q15_avx2(od_ec_dec *dec, const uint16_t *icdf, int nsyms) { | 
 |   uint16_t scaled_cdf[16]; | 
 |  | 
 |   __m256i cdf = _mm256_lddqu_si256((__m256i *)icdf); | 
 |   cdf = _mm256_srli_epi16(cdf, EC_PROB_SHIFT); | 
 |   cdf = _mm256_slli_epi16(cdf, EC_PROB_SHIFT - 2); | 
 |   __m256i inc = _mm256_lddqu_si256((__m256i *)av1_prob_inc_tbl[nsyms - 2]); | 
 |   __m256i mask = _mm256_srai_epi16(inc, 15); | 
 |   inc = _mm256_slli_epi16(inc, EC_PROB_SHIFT - 6); | 
 |   cdf = _mm256_add_epi16(cdf, inc); | 
 |   __m256i rng = _mm256_set1_epi16(dec->rng); | 
 |   __m256i rngv = _mm256_srli_epi16(rng, 8); | 
 |   rngv = _mm256_slli_epi16(rngv, 8); | 
 |   __m256i sc_cdf = _mm256_mulhi_epu16(cdf, rngv); | 
 |   sc_cdf = _mm256_slli_epi16(sc_cdf, 3); | 
 |   od_ec_window dif = dec->dif; | 
 |   __m256i difv = _mm256_set1_epi16((int16_t)(dif >> (OD_EC_WINDOW_SIZE - 16))); | 
 |   difv = _mm256_or_si256(mask, difv); | 
 |   __m256i cmp_min = _mm256_min_epu16(sc_cdf, difv); | 
 |   __m256i gt = _mm256_cmpeq_epi16(sc_cdf, cmp_min); | 
 |   __m256i retv = _mm256_hadd_epi16(gt, gt); | 
 |   retv = _mm256_hadd_epi16(retv, retv); | 
 |   retv = _mm256_hadd_epi16(retv, retv); | 
 |   __m128i retv_hi = _mm256_extractf128_si256(retv, 1); | 
 |   __m128i retv_lo = _mm256_castsi256_si128(retv); | 
 |   retv_lo = _mm_add_epi16(retv_lo, retv_hi); | 
 |   int16_t ret = (int16_t)_mm_extract_epi16(retv_lo, 0); | 
 |   ret = 16 + ret; | 
 |  | 
 |   __m256i sc_cdf1 = _mm256_permute2x128_si256(sc_cdf, rng, 0x02); | 
 |   sc_cdf1 = _mm256_alignr_epi8(sc_cdf, sc_cdf1, 14); | 
 |   __m256i sc_cdf_diff = _mm256_sub_epi16(sc_cdf1, sc_cdf); | 
 |   _mm256_storeu_si256((__m256i *)scaled_cdf, sc_cdf); | 
 |   uint16_t scaled_cdf_diff[16]; | 
 |   _mm256_storeu_si256((__m256i *)scaled_cdf_diff, sc_cdf_diff); | 
 |   unsigned v = scaled_cdf[ret]; | 
 |   unsigned r = scaled_cdf_diff[ret]; | 
 |   dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16); | 
 |  | 
 |   return od_ec_dec_normalize(dec, dif, r, ret); | 
 | } |