blob: d087b86f81db01b3d3828a83df24f552d94bac25 [file] [log] [blame]
/*
* Copyright 2020 Google LLC
*
*/
/*
* Copyright (c) 2020, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "dx/types.h"
#include "dx/av1_core.h"
#include "dx/av1_memory.h"
#include "dx/av1_compute.h"
#include "av1\common\scan.h"
#include "av1\common\idct.h"
struct cb_sort_data {
int count;
int src_offset;
int reserved[2];
int offsets[20][4];
};
struct idct_frame_data {
int planes[3][4];
int bitdepth;
};
void av1_idct_run(Av1Core *dec) {
av1_frame_thread_data *td = dec->curr_frame_data;
const int tile_count = td->tile_count;
int offset = 0;
for (int i = 0; i <= TX_SIZES_ALL + 1; ++i) {
for (int t = 0; t < tile_count; ++t) {
int count = td->tile_data[t].idct_blocks_sizes[i];
td->tile_data[t].idct_blocks_sizes[i] = offset;
offset += count;
}
}
ComputeCommandBuffer *cb = &td->command_buffer;
Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list;
ComputeShader *shader = &dec->shader_lib->shader_fill_buffer;
command_list->SetComputeRootSignature(shader->signaturePtr.Get());
command_list->SetPipelineState(shader->pso.Get());
command_list->SetComputeRootUnorderedAccessView(0, dec->idct_residuals->dev->GetGPUVirtualAddress());
int value[2];
value[0] = static_cast<int>(dec->idct_residuals->size >> 4);
value[1] = 0;
command_list->SetComputeRoot32BitConstants(1, 2, value, 0);
command_list->Dispatch((value[0] + 63) / 64, 1, 1);
shader = &dec->shader_lib->shader_idct_sort;
command_list->SetComputeRootSignature(shader->signaturePtr.Get());
command_list->SetPipelineState(shader->pso.Get());
command_list->SetComputeRootShaderResourceView(0, td->idct_blocks_unordered->dev->GetGPUVirtualAddress());
command_list->SetComputeRootUnorderedAccessView(1, dec->idct_blocks->dev->GetGPUVirtualAddress());
for (int t = 0; t < tile_count; ++t) {
ConstantBufferObject cbo = cb->Alloc(sizeof(cb_sort_data));
cb_sort_data *sort_data = reinterpret_cast<cb_sort_data *>(cbo.host_ptr);
// av1_tile_thread_data * thr = td->thread_data + thread;
av1_tile_data *tile = td->tile_data + t;
for (int i = 0; i < TX_SIZES_ALL + 1; ++i) {
sort_data->offsets[i][0] = tile->idct_blocks_sizes[i];
}
sort_data->count = tile->idct_blocks_ptr;
sort_data->src_offset = tile->blocks_offset;
if (tile->idct_blocks_ptr) {
command_list->SetComputeRootConstantBufferView(2, cbo.dev_address);
command_list->Dispatch((tile->idct_blocks_ptr + 63) / 64, 1, 1);
}
}
D3D12_RESOURCE_BARRIER barriers[] = {CD3DX12_RESOURCE_BARRIER::UAV(dec->idct_blocks->dev),
CD3DX12_RESOURCE_BARRIER::UAV(dec->idct_residuals->dev)};
command_list->ResourceBarrier(2, barriers);
ConstantBufferObject cbo = cb->Alloc(sizeof(idct_frame_data));
idct_frame_data *data = reinterpret_cast<idct_frame_data *>(cbo.host_ptr);
data->planes[0][0] = td->frame_buffer->planes[0].res_stride;
data->planes[0][1] = td->frame_buffer->planes[0].res_offset;
data->planes[1][0] = td->frame_buffer->planes[1].res_stride;
data->planes[1][1] = td->frame_buffer->planes[1].res_offset;
data->planes[2][0] = td->frame_buffer->planes[2].res_stride;
data->planes[2][1] = td->frame_buffer->planes[2].res_offset;
data->bitdepth = td->bitdepth;
av1_tile_data *tdata = td->tile_data;
command_list->SetComputeRootSignature(dec->shader_lib->sig_idct.Get());
command_list->SetComputeRootShaderResourceView(0, dec->idct_blocks->dev->GetGPUVirtualAddress());
command_list->SetComputeRootUnorderedAccessView(1, dec->idct_residuals->dev->GetGPUVirtualAddress());
command_list->SetComputeRootUnorderedAccessView(2, dec->idct_coefs->dev->GetGPUVirtualAddress());
command_list->SetComputeRootConstantBufferView(4, cbo.dev_address);
for (int type = 0; type <= TX_SIZES_ALL; ++type) {
int offset = tdata->idct_blocks_sizes[type];
int count = tdata->idct_blocks_sizes[type + 1] - offset;
if (count == 0) continue;
const int tx_size = type == TX_SIZES_ALL ? TX_4X4 : type;
const int block_w = tx_size_wide[tx_size];
const int block_h = tx_size_high[tx_size];
const int block_size = block_w * block_h;
const int wi_per_block = AOMMIN(block_w, block_h);
ConstantBufferObject scan_cbo = cb->Alloc(block_size * 3 * 4);
int *gpu_scans = reinterpret_cast<int *>(scan_cbo.host_ptr);
for (int s = 0; s < 3; ++s) {
int *dst = gpu_scans + s * block_size;
const int16_t *scan = av1_idct_scans[tx_size][s];
if (block_w > 32) {
for (int i = 0; i < 32 * block_h; ++i) {
int s = scan[i];
dst[i] = (s & 31) + (s / 32) * 64;
}
} else
for (int i = 0; i < block_size; ++i) dst[i] = scan[i];
}
int const_inline[2];
const_inline[0] = offset;
const_inline[1] = wi_per_block * count;
command_list->SetPipelineState(dec->shader_lib->shader_idct[type].pso.Get());
command_list->SetComputeRootConstantBufferView(3, scan_cbo.dev_address);
command_list->SetComputeRoot32BitConstants(5, 2, const_inline, 0);
command_list->Dispatch((const_inline[1] + 63) / 64, 1, 1);
}
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->idct_residuals->dev));
}