blob: 57b1074cb413569f2bc122589c61057418922de8 [file] [log] [blame]
/*
* Copyright 2020 Google LLC
*
*/
/*
* Copyright (c) 2020, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
RWByteAddressBuffer pool : register(u0);
cbuffer cb_sort_data : register(b0) {
uint cb_wi_count;
uint cb_src_offset;
uint cb_src_stride;
uint cb_dst_width;
uint cb_dst_offset;
uint cb_dst_stride;
};
[numthreads(64, 1, 1)] void main(uint3 thread
: SV_DispatchThreadID) {
if (thread.x >= cb_wi_count) return;
const int x = thread.x % cb_dst_width;
const int y = thread.x / cb_dst_width;
int src_stride = cb_src_stride;
int dst_stride = cb_dst_stride;
int src_offset = cb_src_offset;
int dst_offset = cb_dst_offset;
uint base_src = y * src_stride + x * 48 + src_offset;
uint base_trg = y * dst_stride + x * 32 + dst_offset;
uint4 srcval1 = pool.Load4(base_src);
uint4 srcval2 = pool.Load4(base_src + 16);
uint4 srcval3 = pool.Load4(base_src + 32);
uint4 trgval;
trgval.x = (srcval1.x & 0x03FF) | ((srcval1.x & 0x03FF0000) >> 6) | ((srcval1.y & 0x03FF) << 20);
trgval.y = ((srcval1.y & 0x03FF0000) >> 16) | ((srcval1.z & 0x03FF) << 10) | ((srcval1.z & 0x03FF0000) << 4);
trgval.z = (srcval1.w & 0x03FF) | ((srcval1.w & 0x03FF0000) >> 6) | ((srcval2.x & 0x03FF) << 20);
trgval.w = ((srcval2.x & 0x03FF0000) >> 16) | ((srcval2.y & 0x03FF) << 10) | ((srcval2.y & 0x03FF0000) << 4);
pool.Store4(base_trg, trgval);
trgval.x = (srcval2.z & 0x03FF) | ((srcval2.z & 0x03FF0000) >> 6) | ((srcval2.w & 0x03FF) << 20);
trgval.y = ((srcval2.w & 0x03FF0000) >> 16) | ((srcval3.x & 0x03FF) << 10) | ((srcval3.x & 0x03FF0000) << 4);
trgval.z = (srcval3.y & 0x03FF) | ((srcval3.y & 0x03FF0000) >> 6) | ((srcval3.z & 0x03FF) << 20);
trgval.w = ((srcval3.z & 0x03FF0000) >> 16) | ((srcval3.w & 0x03FF) << 10) | ((srcval3.w & 0x03FF0000) << 4);
pool.Store4(base_trg + 16, trgval);
}