| /* |
| * Copyright 2020 Google LLC |
| * |
| */ |
| |
| /* |
| * Copyright (c) 2020, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include "dx/types.h" |
| #include "dx/av1_core.h" |
| #include "dx/av1_memory.h" |
| #include "dx/av1_compute.h" |
| |
| #include "av1/common/restoration.h" |
| #include "av1/common/av1_loopfilter.h" |
| |
| struct LoopfilterData { |
| int planes[3][4]; |
| int limits[64][4]; |
| }; |
| |
| struct LoopfilterSRT { |
| int wicount; |
| int plane; |
| int offset_base; |
| int block_cols; |
| int block_id_offset; |
| }; |
| |
| struct GenLfData { |
| int mi_stride; |
| int mi_addr_base; |
| int delta_q_info_delta_lf_present_flag; |
| int delta_q_info_delta_lf_multi; |
| int lf_mode_ref_delta_enabled; |
| int r[3]; |
| |
| int lf_filter_level[2]; |
| int lf_filter_level_u; |
| int lf_filter_level_v; |
| |
| int lf_mode_deltas[2][4]; |
| int lf_ref_deltas[8][4]; |
| int seg_features[8][4]; |
| int seg_data[8][8][4]; |
| int lfi_n_lvl[3][8][8][2][4]; |
| }; |
| |
| struct GenLfSRT { |
| int wicount; |
| int mi_cols; |
| int mi_rows; |
| int plane; |
| int dst_offset; |
| int dst_stride; |
| }; |
| |
| void av1_loopfilter_gpu(Av1Core *dec, AV1_COMMON *cm, MACROBLOCKD *xd) { |
| if (!(cm->lf.filter_level[0] || cm->lf.filter_level[1])) { |
| return; |
| } |
| av1_loop_filter_frame_init(cm, 0, 3); |
| |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| HwFrameBuffer *dst = td->dst_frame_buffer; |
| ComputeCommandBuffer *cb = &td->command_buffer; |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| ComputeShader *shader; |
| |
| const int mi_cols_y = AOMMIN(cm->mi_cols, (dst->y_crop_width + 3) >> 2); |
| const int mi_rows_y = AOMMIN(cm->mi_rows, (dst->y_crop_height + 3) >> 2); |
| const int mi_cols_uv = AOMMIN((cm->mi_cols + 1) >> 1, (dst->uv_crop_width + 3) >> 2); |
| const int mi_rows_uv = AOMMIN((cm->mi_rows + 1) >> 1, (dst->uv_crop_height + 3) >> 2); |
| |
| const int blk_cols[] = { |
| (mi_cols_y + 15) >> 4, (mi_cols_uv + 15) >> 4, (mi_cols_uv + 15) >> 4, mi_cols_y, mi_cols_uv, mi_cols_uv}; |
| |
| int blk_count[] = { |
| blk_cols[0] * mi_rows_y, |
| blk_cols[1] * mi_rows_uv, |
| blk_cols[2] * mi_rows_uv, |
| blk_cols[3] * ((mi_rows_y + 15) >> 4), |
| blk_cols[4] * ((mi_rows_uv + 15) >> 4), |
| blk_cols[5] * ((mi_rows_uv + 15) >> 4), |
| }; |
| |
| int blk_offsets[6]; |
| blk_offsets[0] = 0; |
| for (int i = 1; i < 6; ++i) blk_offsets[i] = blk_offsets[i - 1] + blk_count[i - 1]; |
| |
| ConstantBufferObject cbo = cb->Alloc(sizeof(GenLfData)); |
| GenLfData *gen_data = (GenLfData *)cbo.host_ptr; |
| gen_data->mi_stride = cm->mi_stride; |
| gen_data->mi_addr_base = static_cast<int>((uint64_t)dec->mode_info_pool->host_ptr); |
| gen_data->delta_q_info_delta_lf_multi = cm->delta_q_info.delta_lf_multi; |
| gen_data->delta_q_info_delta_lf_present_flag = cm->delta_q_info.delta_lf_present_flag; |
| gen_data->lf_mode_ref_delta_enabled = cm->lf.mode_ref_delta_enabled; |
| gen_data->lf_filter_level[0] = cm->lf.filter_level[0]; |
| gen_data->lf_filter_level[1] = cm->lf.filter_level[1]; |
| gen_data->lf_filter_level_u = cm->lf.filter_level_u; |
| gen_data->lf_filter_level_v = cm->lf.filter_level_v; |
| gen_data->lf_mode_deltas[0][0] = cm->lf.mode_deltas[0]; |
| gen_data->lf_mode_deltas[1][0] = cm->lf.mode_deltas[1]; |
| for (int i = 0; i < 8; ++i) gen_data->lf_ref_deltas[i][0] = cm->lf.ref_deltas[i]; |
| for (int i = 0; i < 8; ++i) { |
| gen_data->seg_features[i][0] = xd->lossless[i]; |
| gen_data->seg_features[i][1] = cm->seg.enabled ? cm->seg.feature_mask[i] : 0; |
| for (int j = 0; j < 8; ++j) gen_data->seg_data[i][j][0] = cm->seg.feature_data[i][j]; |
| } |
| for (int p = 0; p < 3; ++p) |
| for (int s = 0; s < 8; ++s) |
| for (int r = 0; r < 8; ++r) |
| for (int m = 0; m < 2; ++m) { |
| gen_data->lfi_n_lvl[p][s][r][m][0] = cm->lf_info.lvl[p][s][0][r][m]; |
| gen_data->lfi_n_lvl[p][s][r][m][1] = cm->lf_info.lvl[p][s][1][r][m]; |
| } |
| |
| GenLfSRT gen_srt; |
| shader = &dec->shader_lib->shader_gen_lf_vert; |
| |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, dec->mode_info_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(1, td->mode_info_grid->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(2, dec->loopfilter_blocks->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(3, cbo.dev_address); |
| |
| command_list->SetPipelineState(shader->pso.Get()); |
| for (int plane = 0; plane < 3; ++plane) { |
| if ((plane == 1 && !cm->lf.filter_level_u) || (plane == 2 && !cm->lf.filter_level_v)) continue; |
| |
| const int mi_cols = plane ? mi_cols_uv : mi_cols_y; |
| const int mi_rows = plane ? mi_rows_uv : mi_rows_y; |
| gen_srt.plane = plane; |
| gen_srt.mi_cols = mi_cols; |
| gen_srt.mi_rows = mi_rows; |
| gen_srt.dst_offset = blk_offsets[plane] * 32; |
| gen_srt.dst_stride = blk_cols[plane] * 32; |
| gen_srt.wicount = mi_rows * ((mi_cols + 15) >> 4); |
| command_list->SetComputeRoot32BitConstants(4, 6, &gen_srt, 0); |
| command_list->Dispatch((gen_srt.wicount + 63) >> 6, 1, 1); |
| } |
| shader = &dec->shader_lib->shader_gen_lf_hor; |
| command_list->SetPipelineState(shader->pso.Get()); |
| for (int plane = 0; plane < 3; ++plane) { |
| if ((plane == 1 && !cm->lf.filter_level_u) || (plane == 2 && !cm->lf.filter_level_v)) continue; |
| |
| const int mi_cols = plane ? mi_cols_uv : mi_cols_y; |
| const int mi_rows = plane ? mi_rows_uv : mi_rows_y; |
| gen_srt.plane = plane; |
| gen_srt.mi_cols = mi_cols; |
| gen_srt.mi_rows = mi_rows; |
| gen_srt.dst_offset = blk_offsets[plane + 3] * 32; |
| gen_srt.dst_stride = blk_cols[plane + 3] * 2; |
| gen_srt.wicount = mi_cols * ((mi_rows + 15) >> 4); |
| command_list->SetComputeRoot32BitConstants(4, 6, &gen_srt, 0); |
| command_list->Dispatch((gen_srt.wicount + 63) >> 6, 1, 1); |
| } |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->loopfilter_blocks->dev)); |
| |
| cbo = cb->Alloc(sizeof(LoopfilterData)); |
| LoopfilterData *data = (LoopfilterData *)cbo.host_ptr; |
| memcpy(data->planes, td->frame_buffer->planes, sizeof(data->planes)); |
| for (int lvl = 0; lvl < 64; ++lvl) { |
| data->limits[lvl][0] = cm->lf_info.lfthr[lvl].lim[0]; |
| data->limits[lvl][1] = cm->lf_info.lfthr[lvl].mblim[0]; |
| data->limits[lvl][2] = cm->lf_info.lfthr[lvl].hev_thr[0]; |
| } |
| |
| LoopfilterSRT srt; |
| shader = &td->shaders->loopfilter_v; |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, dec->loopfilter_blocks->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(1, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(2, cbo.dev_address); |
| command_list->SetPipelineState(shader->pso.Get()); |
| srt.block_id_offset = 0; |
| for (int p = 0; p < 3; ++p) { |
| if (p == 1 && !cm->lf.filter_level_u) continue; |
| if (p == 2 && !cm->lf.filter_level_v) continue; |
| srt.plane = p; |
| srt.offset_base = blk_offsets[p]; |
| srt.block_cols = blk_cols[p]; |
| srt.wicount = ((blk_count[p] + 1) >> 1) * 4; |
| command_list->SetComputeRoot32BitConstants(3, 5, &srt, 0); |
| command_list->Dispatch((srt.wicount + 63) >> 6, 1, 1); |
| } |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| |
| srt.block_id_offset = 1; |
| for (int p = 0; p < 3; ++p) { |
| if (p == 1 && !cm->lf.filter_level_u) continue; |
| if (p == 2 && !cm->lf.filter_level_v) continue; |
| srt.plane = p; |
| srt.offset_base = blk_offsets[p]; |
| srt.block_cols = blk_cols[p]; |
| srt.wicount = (blk_count[p] >> 1) * 4; |
| command_list->SetComputeRoot32BitConstants(3, 5, &srt, 0); |
| command_list->Dispatch((srt.wicount + 63) >> 6, 1, 1); |
| } |
| |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| |
| shader = &td->shaders->loopfilter_h; |
| command_list->SetPipelineState(shader->pso.Get()); |
| for (int p = 0; p < 3; ++p) { |
| if (p == 1 && !cm->lf.filter_level_u) continue; |
| if (p == 2 && !cm->lf.filter_level_v) continue; |
| int type = p + 3; |
| srt.plane = p; |
| srt.offset_base = blk_offsets[type]; |
| srt.block_cols = blk_cols[type]; |
| srt.wicount = blk_count[type] * 4; |
| command_list->SetComputeRoot32BitConstants(3, 5, &srt, 0); |
| command_list->Dispatch((srt.wicount + 63) >> 6, 1, 1); |
| } |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| } |
| |
| struct UpscaleData { |
| int src_planes[3][4]; |
| int dst_planes[3][4]; |
| }; |
| |
| struct UpscaleSRT { |
| int plane; |
| int wi_count; |
| int hbd; |
| }; |
| |
| void av1_superres(Av1Core *dec, HwFrameBuffer *src, HwFrameBuffer *dst, int iter) { |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| ComputeCommandBuffer *cb = &td->command_buffer; |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| ConstantBufferObject cbo = cb->Alloc(sizeof(UpscaleData)); |
| UpscaleData *data = (UpscaleData *)cbo.host_ptr; |
| memcpy(data->src_planes, src->planes, sizeof(src->planes)); |
| memcpy(data->dst_planes, dst->planes, sizeof(dst->planes)); |
| data->dst_planes[0][2] = dst->y_crop_width; |
| data->dst_planes[1][2] = dst->uv_crop_width; |
| data->dst_planes[2][2] = dst->uv_crop_width; |
| data->src_planes[0][2] = src->y_crop_width; |
| data->src_planes[1][2] = src->uv_crop_width; |
| data->src_planes[2][2] = src->uv_crop_width; |
| data->src_planes[0][3] = src->width; |
| data->src_planes[1][3] = (src->width + 1) >> 1; |
| data->src_planes[2][3] = (src->width + 1) >> 1; |
| |
| UpscaleSRT srt; |
| ComputeShader *shader = &dec->shader_lib->shader_upscale; |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootUnorderedAccessView(0, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(1, cbo.dev_address); |
| for (int plane = 0; plane < 3; ++plane) { |
| srt.plane = plane; |
| const int w = plane ? dst->uv_crop_width : dst->y_crop_width; |
| const int h = plane ? dst->uv_crop_height : dst->y_crop_height; |
| srt.wi_count = h * ((w + 3) >> 2); |
| srt.hbd = dst->hbd; |
| command_list->SetComputeRoot32BitConstants(2, 3, &srt, 0); |
| command_list->Dispatch((srt.wi_count + 63) >> 6, 1, 1); |
| } |
| |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| } |
| |
| struct PlaneInfo1 { |
| int stride; |
| int offset; |
| int width; |
| int height; |
| }; |
| |
| struct UnitsInfo { |
| int Rows; |
| int Cols; |
| int Size; |
| int Stride; |
| }; |
| |
| struct PlaneRestorationData { |
| PlaneInfo1 plane; |
| UnitsInfo units; |
| |
| int pp_offset; |
| int dst_offset; |
| int Lr_buffer_offset; |
| int subsampling; |
| int hbd; |
| int bit_depth; |
| int pad[2]; |
| }; |
| |
| struct RestorationData { |
| int Sgr_Params[16][4]; |
| }; |
| |
| struct CDefData { |
| int plane[4]; |
| int uv_stride; |
| int dst_offset[3]; |
| |
| int uv_offset[2]; |
| int index_stride; |
| int skips_stride; |
| |
| int pri_damping; |
| int sec_damping; |
| int pli; |
| int hbd; |
| |
| int bit_depth; |
| int _dummie[3]; |
| |
| int cdef_directions[16][2][4]; |
| int cdef_strength[16][4]; |
| int cdef_uv_strength[16][4]; |
| }; |
| |
| void av1_cdef_filter_run(Av1Core *dec, AV1_COMMON *cm, HwFrameBuffer *src, HwFrameBuffer *dst) { |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| if (!td->do_cdef) { |
| PutPerfMarker(td, &td->perf_markers[4]); |
| return; |
| } |
| |
| const CdefInfo *const cdef_info = &cm->cdef_info; |
| |
| int *cdef_indexes = (int *)td->cdef_indexes->host_ptr; |
| int *cdef_skips = (int *)td->cdef_skips->host_ptr; |
| const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| |
| for (int fbr = 0; fbr < nvfb; fbr++) { |
| for (int fbc = 0; fbc < nhfb; fbc++) { |
| cdef_indexes[fbr * nhfb + fbc] = |
| cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength; |
| } |
| } |
| for (int r = 0; r < AOMMIN(nvfb * 16, cm->mi_rows / 2); r++) { |
| for (int c = 0; c < AOMMIN(nhfb * 16, cm->mi_cols / 2); c++) { |
| cdef_skips[r * nhfb * 16 + c] = cm->mi_grid_visible[(r * 2 + 0) * cm->mi_stride + c * 2 + 0]->skip && |
| cm->mi_grid_visible[(r * 2 + 0) * cm->mi_stride + c * 2 + 1]->skip && |
| cm->mi_grid_visible[(r * 2 + 1) * cm->mi_stride + c * 2 + 0]->skip && |
| cm->mi_grid_visible[(r * 2 + 1) * cm->mi_stride + c * 2 + 1]->skip; |
| } |
| } |
| |
| ComputeCommandBuffer *cb = &td->command_buffer; |
| ConstantBufferObject cbo = cb->Alloc(sizeof(CDefData)); |
| CDefData *data = reinterpret_cast<CDefData *>(cbo.host_ptr); |
| |
| /* Generated from gen_filter_tables.c. */ |
| const int cdef_directions_new[8][4] = {{-1, +1, -2, +2}, {0, +1, -1, +2}, {0, +1, 0, +2}, {0, +1, 1, +2}, |
| {1, +1, 2, +2}, {1, +0, 2, +1}, {1, +0, 2, +0}, {1, +0, 2, -1}}; |
| for (int i = 0; i < 8; i++) { |
| data->cdef_directions[i][0][1] = cdef_directions_new[i][0]; |
| data->cdef_directions[i][0][0] = cdef_directions_new[i][1]; |
| data->cdef_directions[i][1][1] = cdef_directions_new[i][2]; |
| data->cdef_directions[i][1][0] = cdef_directions_new[i][3]; |
| data->cdef_directions[8 + i][0][1] = cdef_directions_new[i][0]; |
| data->cdef_directions[8 + i][0][0] = cdef_directions_new[i][1]; |
| data->cdef_directions[8 + i][1][1] = cdef_directions_new[i][2]; |
| data->cdef_directions[8 + i][1][0] = cdef_directions_new[i][3]; |
| } |
| for (int i = 0; i < 16; i++) { |
| data->cdef_strength[i][0] = cdef_info->cdef_strengths[i]; |
| data->cdef_uv_strength[i][0] = cdef_info->cdef_uv_strengths[i]; |
| } |
| |
| data->index_stride = nhfb; |
| data->skips_stride = nhfb * 16; |
| data->hbd = td->is_hbd; |
| data->bit_depth = td->bitdepth; |
| |
| data->pri_damping = cdef_info->cdef_pri_damping; |
| data->sec_damping = cdef_info->cdef_sec_damping; |
| int w = src->width; |
| int h = src->height; |
| data->plane[0] = src->planes[0].stride; |
| data->plane[1] = src->planes[0].offset; |
| data->plane[2] = w; |
| data->plane[3] = h; |
| data->uv_stride = src->planes[1].stride; |
| data->uv_offset[0] = src->planes[1].offset; |
| data->uv_offset[1] = src->planes[2].offset; |
| |
| // srt.data->pli = i; |
| data->dst_offset[0] = dst->planes[0].offset; |
| data->dst_offset[1] = dst->planes[1].offset; |
| data->dst_offset[2] = dst->planes[2].offset; |
| |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| ComputeShader *shader = &dec->shader_lib->shader_cdef_filter; |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, td->cdef_indexes->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(1, td->cdef_skips->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(2, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(3, cbo.dev_address); |
| command_list->Dispatch((w + 31) >> 5, (h + 31) >> 5, 1); |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| PutPerfMarker(td, &td->perf_markers[4]); |
| } |
| |
| void av1_looprestoration(Av1Core *dec, AV1_COMMON *cm, void *lr_ctxt) { |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| if (!td->do_loop_rest) { |
| PutPerfMarker(td, &td->perf_markers[5]); |
| return; |
| } |
| |
| if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE || |
| cm->rst_info[1].frame_restoration_type != RESTORE_NONE || cm->rst_info[2].frame_restoration_type != RESTORE_NONE) |
| av1_loop_restoration_filter_frame_init((AV1LrStruct *)lr_ctxt, (YV12_BUFFER_CONFIG *)&cm->cur_frame->buf, cm, 0, 3); |
| |
| AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt; |
| int lr_ptr = 0; |
| // PlaneRestorationData pl_data[3] = { 0, 0, 0 }; |
| int pl_width[3] = {0, 0, 0}; |
| int pl_height[3] = {0, 0, 0}; |
| HwFrameBuffer *dst_buffer = td->dst_frame_buffer; |
| HwFrameBuffer *src_buffer = (td->do_cdef == td->do_superres) ? td->frame_buffer : &dec->back_buffer1; |
| HwFrameBuffer *pp_buffer = td->do_superres ? &dec->back_buffer1 : td->frame_buffer; |
| |
| ComputeCommandBuffer *cb = &td->command_buffer; |
| ConstantBufferObject cbo1 = cb->Alloc(sizeof(RestorationData)); |
| RestorationData *data = reinterpret_cast<RestorationData *>(cbo1.host_ptr); |
| ConstantBufferObject cbo2 = cb->Alloc(sizeof(PlaneRestorationData) * 3); |
| PlaneRestorationData *pl_data = reinterpret_cast<PlaneRestorationData *>(cbo2.host_ptr); |
| |
| for (int p = 0; p < 3; ++p) { |
| int subsampling = p != 0; |
| pl_width[p] = dst_buffer->width >> subsampling; |
| pl_height[p] = dst_buffer->height >> subsampling; |
| |
| pl_data[p].plane.width = p ? dst_buffer->uv_crop_width : dst_buffer->y_crop_width; // pl_width[p]; |
| pl_data[p].plane.height = p ? dst_buffer->uv_crop_height : dst_buffer->y_crop_height; // pl_height[p]; |
| pl_data[p].plane.stride = dst_buffer->planes[p].stride; |
| pl_data[p].plane.offset = src_buffer->planes[p].offset; |
| pl_data[p].pp_offset = pp_buffer->planes[p].offset; |
| pl_data[p].dst_offset = dst_buffer->planes[p].offset; |
| pl_data[p].subsampling = p != 0; |
| pl_data[p].hbd = td->is_hbd; |
| pl_data[p].bit_depth = td->bitdepth; |
| |
| if (cm->rst_info[p].frame_restoration_type == RESTORE_NONE) { |
| continue; |
| } |
| |
| FilterFrameCtxt *ctx = &loop_rest_ctxt->ctxt[p]; |
| pl_data[p].units.Rows = ctx->rsi->vert_units_per_tile; |
| pl_data[p].units.Cols = ctx->rsi->horz_units_per_tile; |
| pl_data[p].units.Size = ctx->rsi->restoration_unit_size; |
| pl_data[p].units.Stride = ctx->rsi->horz_units_per_tile; |
| pl_data[p].Lr_buffer_offset = lr_ptr; |
| |
| int *lr_type = (int *)td->loop_rest_types->host_ptr; |
| int *lr_wiener = (int *)td->loop_rest_wiener->host_ptr; |
| for (int u = 0; u < ctx->rsi->units_per_tile; ++u) { |
| RestorationUnitInfo *unit = ctx->rsi->unit_info + u; |
| int *dst_type = lr_type + lr_ptr * 4; |
| int *dst_wiener = lr_wiener + lr_ptr * 16; |
| ++lr_ptr; |
| dst_type[0] = unit->restoration_type; |
| dst_type[1] = unit->sgrproj_info.xqd[0]; |
| dst_type[2] = unit->sgrproj_info.xqd[1]; |
| dst_type[3] = unit->sgrproj_info.ep; |
| if (unit->restoration_type == RESTORE_WIENER) { |
| for (int i = 0; i < 8; ++i) dst_wiener[i] = unit->wiener_info.hfilter[i]; |
| for (int i = 0; i < 8; ++i) dst_wiener[i + 8] = unit->wiener_info.vfilter[i]; |
| } |
| } |
| } |
| |
| { |
| struct CBuffer { |
| int do_restoration; |
| int plane; |
| } cBuffer; |
| memcpy(data->Sgr_Params, sgr_params, sizeof(sgr_params)); |
| |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| ComputeShader *shader = &dec->shader_lib->shader_loop_rest; |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(1, td->loop_rest_types->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(2, td->loop_rest_wiener->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(3, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(4, cbo1.dev_address); |
| command_list->SetComputeRootConstantBufferView(5, cbo2.dev_address); |
| |
| for (int p = 0; p < 3; ++p) { |
| int w = pl_width[p]; |
| int h = pl_height[p]; |
| cBuffer.do_restoration = cm->rst_info[p].frame_restoration_type != RESTORE_NONE; |
| cBuffer.plane = p; |
| command_list->SetComputeRoot32BitConstants(6, 2, &cBuffer, 0); |
| command_list->Dispatch((w + 15) >> 4, (h + 3) >> 2, 1); |
| } |
| |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| } |
| PutPerfMarker(td, &td->perf_markers[5]); |
| } |
| |
| void av1_cdef_looprestoration(Av1Core *dec, AV1_COMMON *cm, void *lr_ctxt) { |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| ComputeCommandBuffer *cb = &td->command_buffer; |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| |
| PlaneRestorationData *pl_data[3] = {0, 0, 0}; |
| RestorationData *lr_data = NULL; |
| HwFrameBuffer *cdef_dst = (td->do_loop_rest == td->do_superres) ? td->dst_frame_buffer : &dec->back_buffer1; |
| HwFrameBuffer *dst_fb = td->dst_frame_buffer; |
| |
| av1_cdef_filter_run(dec, cm, td->frame_buffer, cdef_dst); |
| |
| if (td->do_superres) { |
| const int h_border = 16; |
| const int bpp = 1 + dst_fb->hbd; |
| const int upscaled_width = cm->superres_upscaled_width; |
| const int y_height = (dst_fb->height + 127) & ~127; |
| const int superres_w = (upscaled_width + 127) & ~127; |
| const int superres_y_stride = (superres_w + 2 * h_border) * bpp; |
| const int superres_uv_stride = ((superres_w >> 1) + 2 * h_border) * bpp; |
| const int superres_y_size = y_height * superres_y_stride; |
| const int superres_uv_size = (y_height >> 1) * superres_uv_stride; |
| if (td->do_loop_rest) { |
| // upscale pre cdef frame (td->frame_buffer) to temp buffer |
| HwFrameBuffer *dst = &dec->back_buffer1; |
| assert(dst->size >= (superres_y_size + 2 * superres_uv_size)); |
| dst->y_crop_width = upscaled_width; |
| dst->uv_crop_width = (upscaled_width + 1) >> 1; |
| dst->y_crop_height = dst_fb->y_crop_height; |
| dst->uv_crop_height = dst_fb->uv_crop_height; |
| dst->planes[0].stride = superres_y_stride; |
| dst->planes[1].stride = superres_uv_stride; |
| dst->planes[2].stride = superres_uv_stride; |
| dst->planes[0].offset = static_cast<int>(dst->base_offset + h_border * bpp); |
| dst->planes[1].offset = dst->planes[0].offset + superres_y_size; |
| dst->planes[2].offset = dst->planes[1].offset + superres_uv_size; |
| dst->hbd = dst_fb->hbd; |
| av1_superres(dec, td->frame_buffer, dst, 2); |
| } |
| |
| if (td->do_cdef || (td->do_cdef == 0 && td->do_loop_rest == 0)) { |
| // upscale post cdef frame (cdef_dst) or src (td->frame_buffer) to: |
| HwFrameBuffer *src = td->do_cdef ? cdef_dst : td->frame_buffer; |
| HwFrameBuffer *dst = td->do_loop_rest ? td->frame_buffer : dst_fb; |
| assert(dst->size >= (superres_y_size + 2 * superres_uv_size)); |
| dst->y_crop_width = upscaled_width; |
| dst->uv_crop_width = (upscaled_width + 1) >> 1; |
| dst->y_crop_height = dst_fb->y_crop_height; |
| dst->uv_crop_height = dst_fb->uv_crop_height; |
| dst->planes[0].stride = superres_y_stride; |
| dst->planes[1].stride = superres_uv_stride; |
| dst->planes[2].stride = superres_uv_stride; |
| dst->planes[0].offset = static_cast<int>(dst->base_offset + h_border * bpp); |
| dst->planes[1].offset = dst->planes[0].offset + superres_y_size; |
| dst->planes[2].offset = dst->planes[1].offset + superres_uv_size; |
| dst->hbd = dst_fb->hbd; |
| av1_superres(dec, src, dst, 3); |
| } |
| |
| // update dst fb if not yet updated |
| dst_fb->y_crop_width = upscaled_width; |
| dst_fb->uv_crop_width = (upscaled_width + 1) >> 1; |
| dst_fb->width = (upscaled_width + 7) & ~7; |
| dst_fb->planes[0].stride = superres_y_stride; |
| dst_fb->planes[1].stride = superres_uv_stride; |
| dst_fb->planes[2].stride = superres_uv_stride; |
| dst_fb->planes[0].offset = static_cast<int>(dst_fb->base_offset + h_border * bpp); |
| dst_fb->planes[1].offset = dst_fb->planes[0].offset + superres_y_size; |
| dst_fb->planes[2].offset = dst_fb->planes[1].offset + superres_uv_size; |
| YV12_BUFFER_CONFIG *const buf = &cm->cur_frame->buf; |
| buf->y_crop_width = dst_fb->y_crop_width; |
| buf->uv_crop_width = dst_fb->uv_crop_width; |
| buf->y_width = dst_fb->width; |
| buf->uv_width = buf->y_width >> 1; |
| /* |
| YV12_BUFFER_CONFIG *const buf = &cm->cur_frame->buf; |
| buf->y_crop_width = upscaled_width; |
| buf->uv_crop_width = (upscaled_width + 1) >> 1; |
| buf->y_width = (upscaled_width + 7) & ~7; |
| buf->uv_width = buf->y_width >> 1;*/ |
| } |
| |
| av1_looprestoration(dec, cm, lr_ctxt); |
| } |
| |
| struct GrainParams { |
| // This structure is compared element-by-element in the function |
| // av1_check_grain_params_equiv: this function must be updated if any changes |
| // are made to this structure. |
| int apply_grain; |
| int update_parameters; |
| int num_y_points; // value: 0..14 |
| int num_cb_points; // value: 0..10 |
| |
| int num_cr_points; // value: 0..10 |
| int scaling_shift; // values : 8..11 |
| int ar_coeff_lag; // values: 0..3 |
| int ar_coeff_shift; // values : 6..9 |
| |
| // Shift value: AR coeffs range |
| // 6: [-2, 2) |
| // 7: [-1, 1) |
| // 8: [-0.5, 0.5) |
| // 9: [-0.25, 0.25) |
| |
| int cb_mult; // 8 bits |
| int cb_luma_mult; // 8 bits |
| int cb_offset; // 9 bits |
| int cr_mult; // 8 bits |
| |
| int cr_luma_mult; // 8 bits |
| int cr_offset; // 9 bits |
| int overlap_flag; |
| int clip_to_restricted_range; |
| |
| unsigned int bit_depth; // video bit depth |
| int chroma_scaling_from_luma; |
| int grain_scale_shift; |
| unsigned int random_seed; |
| |
| // Y 8 bit values 14*2 = 7*4 |
| // UV 8 bit values 10*2 = 5*4+2 (padding 2) |
| int scaling_points[14][2][4]; |
| |
| // Y 8 bit values 24 = 6*4 |
| // UV 8 bit values 25 = 6*4 + 1 (padding 3) |
| int ar_coeffs[3][25][4]; // xyz = y, cb, cr |
| }; |
| |
| struct FilmGrainGenData { |
| GrainParams params; |
| |
| int luma_block_size_y; |
| int luma_block_size_x; |
| int luma_grain_stride; |
| int chroma_block_size_y; |
| |
| int chroma_block_size_x; |
| int chroma_grain_stride; |
| int left_pad; |
| int top_pad; |
| |
| int right_pad; |
| int bottom_pad; |
| int dst_offset_u; |
| int dst_offset_v; |
| |
| int pred_pos[25][3][4]; |
| }; |
| |
| struct FilmGrainData { |
| GrainParams params; |
| |
| int src_planes[3][4]; |
| int dst_planes[3][4]; |
| |
| int enable_chroma; |
| int random_offset_stride; |
| int width; |
| int height; |
| |
| int mc_identity; |
| int luma_grain_stride; |
| int chroma_grain_stride; |
| int left_pad; |
| |
| int right_pad; |
| int top_pad; |
| int bottom_pad; |
| int ar_padding; |
| |
| int grain_offset_u; |
| int grain_offset_v; |
| int is_10x3; |
| int pad; |
| |
| int scaling_lut[256][4]; |
| }; |
| |
| static void init_scaling_function(const int scaling_points[][2], int num_points, int scaling_lut[][4], int p) { |
| if (num_points == 0) { |
| for (int i = 0; i < 256; i++) scaling_lut[i][p] = 0; |
| return; |
| } |
| for (int i = 0; i < scaling_points[0][0]; i++) scaling_lut[i][p] = scaling_points[0][1]; |
| for (int point = 0; point < num_points - 1; point++) { |
| int delta_y = scaling_points[point + 1][1] - scaling_points[point][1]; |
| int delta_x = scaling_points[point + 1][0] - scaling_points[point][0]; |
| int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x); |
| for (int x = 0; x < delta_x; x++) { |
| scaling_lut[scaling_points[point][0] + x][p] = scaling_points[point][1] + (int)((x * delta + 32768) >> 16); |
| } |
| } |
| for (int i = scaling_points[num_points - 1][0]; i < 256; i++) scaling_lut[i][p] = scaling_points[num_points - 1][1]; |
| } |
| |
| struct RNG { |
| uint16_t random_register; |
| |
| void init(int luma_line, uint16_t seed) { |
| uint16_t msb = (seed >> 8) & 255; |
| uint16_t lsb = seed & 255; |
| random_register = (msb << 8) + lsb; |
| int luma_num = luma_line >> 5; |
| random_register ^= ((luma_num * 37 + 178) & 255) << 8; |
| random_register ^= ((luma_num * 173 + 105) & 255); |
| } |
| |
| int get_random_number(int bits) { |
| uint16_t bit; |
| bit = ((random_register >> 0) ^ (random_register >> 1) ^ (random_register >> 3) ^ (random_register >> 12)) & 1; |
| random_register = (random_register >> 1) | (bit << 15); |
| return (random_register >> (16 - bits)) & ((1 << bits) - 1); |
| } |
| }; |
| |
| void av1_filmgrain_run(Av1Core *dec, AV1_COMMON *cm, int enable_chorma) { |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| aom_film_grain_t *params = &cm->film_grain_params; |
| |
| ComputeCommandBuffer *cb = &td->command_buffer; |
| ConstantBufferObject cbo1 = cb->Alloc(sizeof(FilmGrainGenData)); |
| FilmGrainGenData *gen_data = reinterpret_cast<FilmGrainGenData *>(cbo1.host_ptr); |
| |
| gen_data->params.apply_grain = params->apply_grain; |
| gen_data->params.ar_coeff_lag = params->ar_coeff_lag; |
| gen_data->params.ar_coeff_shift = params->ar_coeff_shift; |
| gen_data->params.bit_depth = params->bit_depth; |
| gen_data->params.cb_luma_mult = params->cb_luma_mult; |
| gen_data->params.cb_mult = params->cb_mult; |
| gen_data->params.cb_offset = params->cb_offset; |
| gen_data->params.chroma_scaling_from_luma = params->chroma_scaling_from_luma; |
| gen_data->params.clip_to_restricted_range = params->clip_to_restricted_range; |
| gen_data->params.cr_luma_mult = params->cr_luma_mult; |
| gen_data->params.cr_mult = params->cr_mult; |
| gen_data->params.cr_offset = params->cr_offset; |
| gen_data->params.grain_scale_shift = params->grain_scale_shift; |
| gen_data->params.num_cb_points = params->num_cb_points; |
| gen_data->params.num_cr_points = params->num_cr_points; |
| gen_data->params.num_y_points = params->num_y_points; |
| gen_data->params.overlap_flag = params->overlap_flag; |
| gen_data->params.random_seed = params->random_seed; |
| gen_data->params.update_parameters = params->update_parameters; |
| gen_data->params.scaling_shift = params->scaling_shift; |
| for (int i = 0; i < 24; ++i) { |
| gen_data->params.ar_coeffs[0][i][0] = params->ar_coeffs_y[i]; |
| gen_data->params.ar_coeffs[1][i][0] = params->ar_coeffs_cb[i]; |
| gen_data->params.ar_coeffs[2][i][0] = params->ar_coeffs_cr[i]; |
| } |
| gen_data->params.ar_coeffs[1][24][0] = params->ar_coeffs_cb[24]; |
| gen_data->params.ar_coeffs[2][24][0] = params->ar_coeffs_cr[24]; |
| |
| for (int i = 0; i < 14; ++i) { |
| gen_data->params.scaling_points[i][0][0] = params->scaling_points_y[i][0]; |
| gen_data->params.scaling_points[i][1][0] = params->scaling_points_y[i][1]; |
| if (i < 10) { |
| gen_data->params.scaling_points[i][0][1] = params->scaling_points_cb[i][0]; |
| gen_data->params.scaling_points[i][1][1] = params->scaling_points_cb[i][1]; |
| gen_data->params.scaling_points[i][0][2] = params->scaling_points_cr[i][0]; |
| gen_data->params.scaling_points[i][1][2] = params->scaling_points_cr[i][1]; |
| } |
| } |
| |
| const int left_pad = 3; |
| const int right_pad = 3; // padding to offset for AR coefficients |
| const int top_pad = 3; |
| const int bottom_pad = 0; |
| const int ar_padding = 3; // maximum lag used for stabilization of AR coefficients |
| const int luma_subblock_size_y = 32; |
| const int luma_subblock_size_x = 32; |
| const int chroma_subblock_size_y = luma_subblock_size_y >> 1; |
| const int chroma_subblock_size_x = luma_subblock_size_x >> 1; |
| const int luma_block_size_y = top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad; |
| const int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 + 2 * ar_padding + right_pad; |
| const int chroma_block_size_y = top_pad + (2 >> 1) * ar_padding + chroma_subblock_size_y * 2 + bottom_pad; |
| const int chroma_block_size_x = |
| left_pad + (2 >> 1) * ar_padding + chroma_subblock_size_x * 2 + (2 >> 1) * ar_padding + right_pad; |
| |
| gen_data->luma_block_size_y = luma_block_size_y; |
| gen_data->luma_block_size_x = luma_block_size_x; |
| gen_data->luma_grain_stride = luma_block_size_x; |
| gen_data->chroma_block_size_y = chroma_block_size_y; |
| gen_data->chroma_block_size_x = chroma_block_size_x; |
| gen_data->chroma_grain_stride = chroma_block_size_x; |
| gen_data->left_pad = left_pad; |
| gen_data->top_pad = top_pad; |
| gen_data->right_pad = right_pad; |
| gen_data->bottom_pad = bottom_pad; |
| gen_data->dst_offset_u = luma_block_size_x * luma_block_size_y; |
| gen_data->dst_offset_v = luma_block_size_x * luma_block_size_y + chroma_block_size_x * chroma_block_size_y; |
| int pos_ar_index = 0; |
| for (int row = -params->ar_coeff_lag; row < 0; row++) { |
| for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1; col++) { |
| gen_data->pred_pos[pos_ar_index][0][0] = row; |
| gen_data->pred_pos[pos_ar_index][1][0] = col; |
| gen_data->pred_pos[pos_ar_index][2][0] = 0; |
| |
| gen_data->pred_pos[pos_ar_index][0][1] = row; |
| gen_data->pred_pos[pos_ar_index][1][1] = col; |
| gen_data->pred_pos[pos_ar_index][2][1] = 0; |
| ++pos_ar_index; |
| } |
| } |
| |
| for (int col = -params->ar_coeff_lag; col < 0; col++) { |
| gen_data->pred_pos[pos_ar_index][0][0] = 0; |
| gen_data->pred_pos[pos_ar_index][1][0] = col; |
| gen_data->pred_pos[pos_ar_index][2][0] = 0; |
| |
| gen_data->pred_pos[pos_ar_index][0][1] = 0; |
| gen_data->pred_pos[pos_ar_index][1][1] = col; |
| gen_data->pred_pos[pos_ar_index][2][1] = 0; |
| ++pos_ar_index; |
| } |
| if (params->num_y_points > 0) { |
| gen_data->pred_pos[pos_ar_index][0][1] = 0; |
| gen_data->pred_pos[pos_ar_index][1][1] = 0; |
| gen_data->pred_pos[pos_ar_index][2][1] = 1; |
| } |
| |
| ConstantBufferObject cbo2 = cb->Alloc(sizeof(FilmGrainData)); |
| FilmGrainData *data = reinterpret_cast<FilmGrainData *>(cbo2.host_ptr); |
| |
| data->params = gen_data->params; |
| data->enable_chroma = enable_chorma; |
| |
| memcpy(data->dst_planes, dec->back_buffer1.planes, sizeof(dec->back_buffer1.planes)); |
| |
| HwFrameBuffer *src = td->dst_frame_buffer; |
| memcpy(data->src_planes, src->planes, sizeof(src->planes)); |
| |
| data->luma_grain_stride = luma_block_size_x; |
| data->chroma_grain_stride = chroma_block_size_x; |
| data->grain_offset_u = luma_block_size_x * luma_block_size_y; |
| data->grain_offset_v = luma_block_size_x * luma_block_size_y + chroma_block_size_x * chroma_block_size_y; |
| data->mc_identity = cm->cur_frame->buf.matrix_coefficients == AOM_CICP_MC_IDENTITY ? 1 : 0; |
| data->left_pad = left_pad; |
| data->right_pad = right_pad; |
| data->top_pad = top_pad; |
| data->bottom_pad = bottom_pad; |
| data->ar_padding = ar_padding; |
| data->width = src->y_crop_width; |
| data->height = src->y_crop_height; |
| data->random_offset_stride = (src->width / 32) + 1; |
| data->is_10x3 = dec->tryhdr10x3 && src->hbd; |
| // data->scaling_lut[256][4]; |
| init_scaling_function(params->scaling_points_y, params->num_y_points, data->scaling_lut, 0); |
| if (params->chroma_scaling_from_luma) { |
| for (int i = 0; i < 256; ++i) { |
| data->scaling_lut[i][1] = data->scaling_lut[i][0]; |
| data->scaling_lut[i][2] = data->scaling_lut[i][0]; |
| } |
| } else { |
| init_scaling_function(params->scaling_points_cb, params->num_cb_points, data->scaling_lut, 1); |
| init_scaling_function(params->scaling_points_cr, params->num_cr_points, data->scaling_lut, 2); |
| } |
| |
| RNG rng; |
| // const int random_offset_width = src->width / luma_subblock_size_x; |
| int *random_offset = (int *)td->filmgrain_rand_offset->host_ptr; |
| for (int y = 0; y < src->height / 2; y += (luma_subblock_size_y >> 1)) { |
| rng.init(y * 2, params->random_seed); |
| for (int x = 0; x < src->width / 2; x += (luma_subblock_size_x >> 1)) { |
| random_offset[(y / (luma_subblock_size_x >> 1)) * data->random_offset_stride + x / (luma_subblock_size_x >> 1)] = |
| rng.get_random_number(8); |
| } |
| } |
| |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| ComputeShader *shader = &dec->shader_lib->shader_filmgrain_luma_gen; |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, dec->filmgrain_random_luma->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(1, dec->filmgrain_random_chroma->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(2, dec->filmgrain_gaus->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(3, dec->filmgrain_noise->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(4, cbo1.dev_address); |
| command_list->Dispatch(1, 1, 1); |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->filmgrain_noise->dev)); |
| |
| shader = &dec->shader_lib->shader_filmgrain_chroma_gen; |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, dec->filmgrain_random_luma->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(1, dec->filmgrain_random_chroma->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(2, dec->filmgrain_gaus->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(3, dec->filmgrain_noise->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(4, cbo1.dev_address); |
| command_list->Dispatch(1, 2, 1); |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->filmgrain_noise->dev)); |
| |
| shader = &dec->shader_lib->shader_filmgrain_filter; |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetComputeRootShaderResourceView(0, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(1, dec->filmgrain_noise->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootShaderResourceView(2, td->filmgrain_rand_offset->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootUnorderedAccessView(3, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| command_list->SetComputeRootConstantBufferView(4, cbo2.dev_address); |
| command_list->Dispatch(src->width / 32 + 1, src->height / 32 + 1, 1); |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| |
| PutPerfMarker(td, &td->perf_markers[7]); |
| } |
| |
| struct CopyPlaneData { |
| unsigned int cb_wi_count; |
| unsigned int cb_src_offset; |
| unsigned int cb_src_stride; |
| unsigned int cb_dst_width; |
| unsigned int cb_dst_offset; |
| unsigned int cb_dst_stride; |
| }; |
| |
| int av1_postprocess_copy_output(Av1Core *dec, AV1_COMMON *cm) { |
| av1_frame_thread_data *td = dec->curr_frame_data; |
| cm->cur_frame->buf.hw_show_image = NULL; |
| if (!cm->showable_frame && !cm->show_frame) return 0; |
| |
| HwFrameBuffer *src = td->dst_frame_buffer; |
| const int is_monochrome = cm->cur_frame->buf.monochrome; |
| const int is_10x3 = dec->tryhdr10x3 && src->hbd; |
| const int align1 = 255; |
| int y_stride = 0; |
| int uv_stride = 0; |
| const int bpp = src->hbd ? (is_10x3 ? 4 : 2) : 1; |
| int dst_y_texture_width = is_10x3 ? (src->y_crop_width + 2) / 3 : src->y_crop_width; |
| int dst_uv_texture_width = is_10x3 ? (src->uv_crop_width + 2) / 3 : src->uv_crop_width; |
| if (is_monochrome) { |
| y_stride = ((dst_y_texture_width * bpp + align1) & (~align1)); |
| } else { |
| uv_stride = ((dst_uv_texture_width * bpp + align1) & (~align1)); |
| y_stride = uv_stride * 2; |
| } |
| |
| const int num_planes = is_monochrome ? 1 : 3; |
| const int y_size = (y_stride * src->y_crop_height + 511) & ~511; |
| const int uv_size = (uv_stride * src->uv_crop_height + 511) & ~511; |
| const int size = y_size + (num_planes - 1) * uv_size; |
| // callback here: |
| av1_decoded_frame_buffer_t fb = {0}; |
| |
| int err = 1; |
| frame_buffer_type fb_type = td->is_hbd ? (is_10x3 ? fbt10x3 : fbt10bit) : fbt8bit; |
| if (dec->cb_get_output_image) |
| err = dec->cb_get_output_image(dec->image_alloc_priv, size, src->y_crop_width, src->y_crop_height, fb_type, &fb); |
| if (err || (fb.dx12_buffer == 0 && fb.dx12_texture[0] == 0)) { |
| aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate output frame."); |
| } |
| |
| HwOutputImage *img = (HwOutputImage *)MTQueueGet(&dec->image_pool); |
| img->fb_ptr = (uint8_t *)fb.buffer_host_ptr; |
| img->alloc_priv = fb.priv; |
| img->is_valid = 1; |
| img->y_crop_width = src->y_crop_width; |
| img->y_crop_height = src->y_crop_height; |
| img->uv_crop_width = src->uv_crop_width; |
| img->uv_crop_height = src->uv_crop_height; |
| img->frame_number = src->frame_number; |
| img->hbd = td->is_hbd; |
| img->monochrome = is_monochrome; |
| img->planes[0].stride = y_stride; |
| img->planes[1].stride = uv_stride; |
| img->planes[2].stride = uv_stride; |
| img->planes[0].offset = 0; |
| img->planes[1].offset = y_size; |
| img->planes[2].offset = y_size + uv_size; |
| img->is_valid = 1; |
| |
| if (is_monochrome) { |
| img->planes[1].offset = 0; |
| img->planes[2].offset = 0; |
| } |
| img->size = fb.buffer_size; |
| cm->cur_frame->buf.hw_show_image = img; |
| |
| const int film_grain_chroma = td->do_filmgrain && !is_monochrome && |
| (cm->film_grain_params.num_cb_points || cm->film_grain_params.chroma_scaling_from_luma); |
| |
| HwFrameBuffer *dst = &dec->back_buffer1; |
| dst->planes[0].stride = img->planes[0].stride; |
| dst->planes[1].stride = img->planes[1].stride; |
| dst->planes[2].stride = img->planes[2].stride; |
| dst->planes[0].offset = static_cast<int>(img->planes[0].offset + dst->base_offset); |
| dst->planes[1].offset = static_cast<int>(img->planes[1].offset + dst->base_offset); |
| dst->planes[2].offset = static_cast<int>(img->planes[2].offset + dst->base_offset); |
| if (td->do_filmgrain) { |
| av1_filmgrain_run(dec, cm, film_grain_chroma); |
| } |
| |
| Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> command_list = dec->compute.command_list; |
| if (!td->do_filmgrain || !film_grain_chroma) { |
| ComputeShader *shader = |
| is_10x3 ? &dec->shader_lib->shader_copy_plane_10bit10x3 : &dec->shader_lib->shader_copy_plane; |
| command_list->SetComputeRootSignature(shader->signaturePtr.Get()); |
| command_list->SetPipelineState(shader->pso.Get()); |
| command_list->SetComputeRootUnorderedAccessView(0, dec->frame_buffer_pool->dev->GetGPUVirtualAddress()); |
| |
| CopyPlaneData data; |
| |
| for (int plane = td->do_filmgrain; plane < num_planes; ++plane) { |
| const int w = is_10x3 ? (((plane == 0 ? img->y_crop_width : img->uv_crop_width) + 23) / 24) |
| : (((plane == 0 ? img->y_crop_width : img->uv_crop_width) * bpp + 15) >> 4); |
| const int h = plane == 0 ? img->y_crop_height : img->uv_crop_height; |
| data.cb_wi_count = w * h; |
| data.cb_src_offset = src->planes[plane].offset; |
| data.cb_src_stride = src->planes[plane].stride; |
| data.cb_dst_width = w; |
| data.cb_dst_offset = dst->planes[plane].offset; |
| data.cb_dst_stride = dst->planes[plane].stride; |
| command_list->SetComputeRoot32BitConstants(1, 6, &data, 0); |
| command_list->Dispatch((data.cb_wi_count + 63) >> 6, 1, 1); |
| } |
| command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::UAV(dec->frame_buffer_pool->dev)); |
| PutPerfMarker(td, &td->perf_markers[6]); |
| PutPerfMarker(td, &td->perf_markers[7]); |
| } |
| |
| command_list->ResourceBarrier( |
| 1, &CD3DX12_RESOURCE_BARRIER::Transition(dec->frame_buffer_pool->dev, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| D3D12_RESOURCE_STATE_COPY_SOURCE)); |
| if (fb.dx12_buffer) { |
| command_list->CopyBufferRegion(static_cast<ID3D12Resource *>(fb.dx12_buffer), 0, dec->frame_buffer_pool->dev, |
| dst->base_offset, size); |
| } |
| |
| if (fb.dx12_texture[0]) { |
| for (int i = 0; i < num_planes; i++) { |
| ID3D12Resource *dst_texture = static_cast<ID3D12Resource *>(fb.dx12_texture[i]); |
| if (!dst_texture) continue; |
| UINT64 RequiredSize = 0; |
| D3D12_PLACED_SUBRESOURCE_FOOTPRINT buffer_layout = {}; |
| buffer_layout.Offset = dst->planes[i].offset; |
| buffer_layout.Footprint.Depth = 1; |
| buffer_layout.Footprint.Format = |
| img->hbd ? (is_10x3 ? DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R16_UNORM) : DXGI_FORMAT_R8_UNORM; |
| buffer_layout.Footprint.Width = i ? dst_uv_texture_width : dst_y_texture_width; |
| buffer_layout.Footprint.Height = i ? img->uv_crop_height : img->y_crop_height; |
| buffer_layout.Footprint.RowPitch = dst->planes[i].stride; |
| |
| CD3DX12_TEXTURE_COPY_LOCATION Dst(dst_texture, 0); |
| CD3DX12_TEXTURE_COPY_LOCATION Src(dec->frame_buffer_pool->dev, buffer_layout); |
| command_list->CopyTextureRegion(&Dst, 0, 0, 0, &Src, nullptr); |
| } |
| } |
| command_list->ResourceBarrier( |
| 1, &CD3DX12_RESOURCE_BARRIER::Transition(dec->frame_buffer_pool->dev, D3D12_RESOURCE_STATE_COPY_SOURCE, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); |
| return 0; |
| } |