| /* | 
 |  * Copyright (c) 2016, Alliance for Open Media. All rights reserved | 
 |  * | 
 |  * This source code is subject to the terms of the BSD 2 Clause License and | 
 |  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
 |  * was not distributed with this source code in the LICENSE file, you can | 
 |  * obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
 |  * Media Patent License 1.0 was not distributed with this source code in the | 
 |  * PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
 |  */ | 
 |  | 
 | #include <string.h> | 
 | #include <math.h> | 
 |  | 
 | #include "./aom_scale_rtcd.h" | 
 | #include "aom/aom_integer.h" | 
 | #include "av1/common/dering.h" | 
 | #include "av1/common/onyxc_int.h" | 
 | #include "av1/common/reconinter.h" | 
 | #include "av1/common/od_dering.h" | 
 |  | 
 | int compute_level_from_index(int global_level, int gi) { | 
 |   static const int dering_gains[DERING_REFINEMENT_LEVELS] = { 0, 11, 16, 22 }; | 
 |   int level; | 
 |   if (global_level == 0) return 0; | 
 |   level = (global_level * dering_gains[gi] + 8) >> 4; | 
 |   return clamp(level, gi, MAX_DERING_LEVEL - 1); | 
 | } | 
 |  | 
 | int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) { | 
 |   int r, c; | 
 |   int maxc, maxr; | 
 |   int skip = 1; | 
 |   maxc = cm->mi_cols - mi_col; | 
 |   maxr = cm->mi_rows - mi_row; | 
 | #if CONFIG_EXT_PARTITION | 
 |   if (maxr > cm->mib_size_log2) maxr = cm->mib_size_log2; | 
 |   if (maxc > cm->mib_size_log2) maxc = cm->mib_size_log2; | 
 | #else | 
 |   if (maxr > MAX_MIB_SIZE) maxr = MAX_MIB_SIZE; | 
 |   if (maxc > MAX_MIB_SIZE) maxc = MAX_MIB_SIZE; | 
 | #endif | 
 |  | 
 |   for (r = 0; r < maxr; r++) { | 
 |     for (c = 0; c < maxc; c++) { | 
 |       skip = skip && | 
 |              cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c] | 
 |                  ->mbmi.skip; | 
 |     } | 
 |   } | 
 |   return skip; | 
 | } | 
 |  | 
 | int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col, | 
 |                            dering_list *dlist) { | 
 |   int r, c; | 
 |   int maxc, maxr; | 
 |   MODE_INFO **grid; | 
 |   int count = 0; | 
 |   grid = cm->mi_grid_visible; | 
 |   maxc = cm->mi_cols - mi_col; | 
 |   maxr = cm->mi_rows - mi_row; | 
 | #if CONFIG_EXT_PARTITION | 
 |   if (maxr > cm->mib_size_log2) maxr = cm->mib_size_log2; | 
 |   if (maxc > cm->mib_size_log2) maxc = cm->mib_size_log2; | 
 | #else | 
 |   if (maxr > MAX_MIB_SIZE) maxr = MAX_MIB_SIZE; | 
 |   if (maxc > MAX_MIB_SIZE) maxc = MAX_MIB_SIZE; | 
 | #endif | 
 |   for (r = 0; r < maxr; r++) { | 
 |     MODE_INFO **grid_row; | 
 |     grid_row = &grid[(mi_row + r) * cm->mi_stride + mi_col]; | 
 |     for (c = 0; c < maxc; c++) { | 
 |       if (!grid_row[c]->mbmi.skip) { | 
 |         dlist[count].by = r; | 
 |         dlist[count].bx = c; | 
 |         count++; | 
 |       } | 
 |     } | 
 |   } | 
 |   return count; | 
 | } | 
 |  | 
 | static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride, | 
 |                                           int16_t *src, int sstride) { | 
 |   int i, j; | 
 |   for (i = 0; i < 8; i++) | 
 |     for (j = 0; j < 8; j++) | 
 |       dst[i * dstride + j] = (uint8_t)src[i * sstride + j]; | 
 | } | 
 |  | 
 | static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride, | 
 |                                           int16_t *src, int sstride) { | 
 |   int i, j; | 
 |   for (i = 0; i < 4; i++) | 
 |     for (j = 0; j < 4; j++) | 
 |       dst[i * dstride + j] = (uint8_t)src[i * sstride + j]; | 
 | } | 
 |  | 
 | /* TODO: Optimize this function for SSE. */ | 
 | void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src, | 
 |                                dering_list *dlist, int dering_count, | 
 |                                int bsize) { | 
 |   int bi, bx, by; | 
 |   if (bsize == 3) { | 
 |     for (bi = 0; bi < dering_count; bi++) { | 
 |       by = dlist[bi].by; | 
 |       bx = dlist[bi].bx; | 
 |       copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride, | 
 |                              &src[bi << 2 * bsize], 1 << bsize); | 
 |     } | 
 |   } else { | 
 |     for (bi = 0; bi < dering_count; bi++) { | 
 |       by = dlist[bi].by; | 
 |       bx = dlist[bi].bx; | 
 |       copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride, | 
 |                              &src[bi << 2 * bsize], 1 << bsize); | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | /* TODO: Optimize this function for SSE. */ | 
 | static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride, | 
 |                         const uint8_t *src, int src_voffset, int src_hoffset, | 
 |                         int sstride, int vsize, int hsize) { | 
 |   int r, c; | 
 |   (void)cm; | 
 | #if CONFIG_AOM_HIGHBITDEPTH | 
 |   if (cm->use_highbitdepth) { | 
 |     const uint16_t *base = | 
 |         &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset]; | 
 |     for (r = 0; r < vsize; r++) { | 
 |       for (c = 0; c < hsize; c++) { | 
 |         dst[r * dstride + c] = base[r * sstride + c]; | 
 |       } | 
 |     } | 
 |   } else | 
 | #endif | 
 |   { | 
 |     const uint8_t *base = &src[src_voffset * sstride + src_hoffset]; | 
 |     for (r = 0; r < vsize; r++) { | 
 |       for (c = 0; c < hsize; c++) { | 
 |         dst[r * dstride + c] = base[r * sstride + c]; | 
 |       } | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, | 
 |                       MACROBLOCKD *xd, int global_level) { | 
 |   int r, c; | 
 |   int sbr, sbc; | 
 |   int nhsb, nvsb; | 
 |   int16_t src[OD_DERING_INBUF_SIZE]; | 
 |   int16_t *linebuf[3]; | 
 |   int16_t colbuf[3][OD_BSIZE_MAX + 2 * OD_FILT_VBORDER][OD_FILT_HBORDER]; | 
 |   dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE]; | 
 |   unsigned char *row_dering, *prev_row_dering, *curr_row_dering; | 
 |   int dering_count; | 
 |   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } }; | 
 |   int stride; | 
 |   int bsize[3]; | 
 |   int dec[3]; | 
 |   int pli; | 
 |   int dering_left; | 
 |   int coeff_shift = AOMMAX(cm->bit_depth - 8, 0); | 
 |   int nplanes; | 
 |   if (xd->plane[1].subsampling_x == xd->plane[1].subsampling_y && | 
 |       xd->plane[2].subsampling_x == xd->plane[2].subsampling_y) | 
 |     nplanes = 3; | 
 |   else | 
 |     nplanes = 1; | 
 |   nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; | 
 |   nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; | 
 |   av1_setup_dst_planes(xd->plane, frame, 0, 0); | 
 |   row_dering = aom_malloc(sizeof(*row_dering) * nhsb * 2); | 
 |   memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2); | 
 |   prev_row_dering = row_dering + 1; | 
 |   curr_row_dering = prev_row_dering + nhsb + 2; | 
 |   for (pli = 0; pli < nplanes; pli++) { | 
 |     dec[pli] = xd->plane[pli].subsampling_x; | 
 |     bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli]; | 
 |   } | 
 |   stride = (cm->mi_cols << bsize[0]) + 2 * OD_FILT_HBORDER; | 
 |   for (pli = 0; pli < nplanes; pli++) { | 
 |     linebuf[pli] = aom_malloc(sizeof(*linebuf) * OD_FILT_VBORDER * stride); | 
 |   } | 
 |   for (sbr = 0; sbr < nvsb; sbr++) { | 
 |     for (pli = 0; pli < nplanes; pli++) { | 
 |       for (r = 0; r < (MAX_MIB_SIZE << bsize[pli]) + 2 * OD_FILT_VBORDER; r++) { | 
 |         for (c = 0; c < OD_FILT_HBORDER; c++) { | 
 |           colbuf[pli][r][c] = OD_DERING_VERY_LARGE; | 
 |         } | 
 |       } | 
 |     } | 
 |     dering_left = 1; | 
 |     for (sbc = 0; sbc < nhsb; sbc++) { | 
 |       int level; | 
 |       int nhb, nvb; | 
 |       int cstart = 0; | 
 |       if (!dering_left) cstart = -OD_FILT_HBORDER; | 
 |       nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc); | 
 |       nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr); | 
 |       level = compute_level_from_index( | 
 |           global_level, cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride + | 
 |                                             MAX_MIB_SIZE * sbc] | 
 |                             ->mbmi.dering_gain); | 
 |       curr_row_dering[sbc] = 0; | 
 |       if (level == 0 || | 
 |           (dering_count = sb_compute_dering_list( | 
 |                cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) { | 
 |         dering_left = 0; | 
 |         continue; | 
 |       } | 
 |       curr_row_dering[sbc] = 1; | 
 |       for (pli = 0; pli < nplanes; pli++) { | 
 |         int16_t dst[OD_BSIZE_MAX * OD_BSIZE_MAX]; | 
 |         int threshold; | 
 |         int coffset; | 
 |         int rend, cend; | 
 |         if (sbc == nhsb - 1) | 
 |           cend = (nhb << bsize[pli]); | 
 |         else | 
 |           cend = (nhb << bsize[pli]) + OD_FILT_HBORDER; | 
 |         if (sbr == nvsb - 1) | 
 |           rend = (nvb << bsize[pli]); | 
 |         else | 
 |           rend = (nvb << bsize[pli]) + OD_FILT_VBORDER; | 
 |         coffset = sbc * MAX_MIB_SIZE << bsize[pli]; | 
 |         if (sbc == nhsb - 1) { | 
 |           /* On the last superblock column, fill in the right border with | 
 |              OD_DERING_VERY_LARGE to avoid filtering with the outside. */ | 
 |           for (r = 0; r < rend + OD_FILT_VBORDER; r++) { | 
 |             for (c = cend; c < (nhb << bsize[pli]) + OD_FILT_HBORDER; ++c) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   OD_DERING_VERY_LARGE; | 
 |             } | 
 |           } | 
 |         } | 
 |         if (sbr == nvsb - 1) { | 
 |           /* On the last superblock row, fill in the bottom border with | 
 |              OD_DERING_VERY_LARGE to avoid filtering with the outside. */ | 
 |           for (r = rend; r < rend + OD_FILT_VBORDER; r++) { | 
 |             for (c = 0; c < (nhb << bsize[pli]) + 2 * OD_FILT_HBORDER; c++) { | 
 |               src[(r + OD_FILT_VBORDER) * OD_FILT_BSTRIDE + c] = | 
 |                   OD_DERING_VERY_LARGE; | 
 |             } | 
 |           } | 
 |         } | 
 |         /* Copy in the pixels we need from the current superblock for | 
 |            deringing.*/ | 
 |         copy_sb8_16( | 
 |             cm, | 
 |             &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart], | 
 |             OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, | 
 |             (MAX_MIB_SIZE << bsize[pli]) * sbr, coffset + cstart, | 
 |             xd->plane[pli].dst.stride, rend, cend - cstart); | 
 |         if (!prev_row_dering[sbc]) { | 
 |           copy_sb8_16(cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE, | 
 |                       xd->plane[pli].dst.buf, | 
 |                       (MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER, | 
 |                       coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, | 
 |                       nhb << bsize[pli]); | 
 |         } else if (sbr > 0) { | 
 |           for (r = 0; r < OD_FILT_VBORDER; r++) { | 
 |             for (c = 0; c < nhb << bsize[pli]; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   linebuf[pli][r * stride + coffset + c]; | 
 |             } | 
 |           } | 
 |         } else { | 
 |           for (r = 0; r < OD_FILT_VBORDER; r++) { | 
 |             for (c = 0; c < nhb << bsize[pli]; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   OD_DERING_VERY_LARGE; | 
 |             } | 
 |           } | 
 |         } | 
 |         if (!prev_row_dering[sbc - 1]) { | 
 |           copy_sb8_16(cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, | 
 |                       (MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER, | 
 |                       coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride, | 
 |                       OD_FILT_VBORDER, OD_FILT_HBORDER); | 
 |         } else if (sbr > 0 && sbc > 0) { | 
 |           for (r = 0; r < OD_FILT_VBORDER; r++) { | 
 |             for (c = -OD_FILT_HBORDER; c < 0; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   linebuf[pli][r * stride + coffset + c]; | 
 |             } | 
 |           } | 
 |         } else { | 
 |           for (r = 0; r < OD_FILT_VBORDER; r++) { | 
 |             for (c = -OD_FILT_HBORDER; c < 0; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   OD_DERING_VERY_LARGE; | 
 |             } | 
 |           } | 
 |         } | 
 |         if (!prev_row_dering[sbc + 1]) { | 
 |           copy_sb8_16(cm, &src[OD_FILT_HBORDER + (nhb << bsize[pli])], | 
 |                       OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, | 
 |                       (MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER, | 
 |                       coffset + (nhb << bsize[pli]), xd->plane[pli].dst.stride, | 
 |                       OD_FILT_VBORDER, OD_FILT_HBORDER); | 
 |         } else if (sbr > 0 && sbc < nhsb - 1) { | 
 |           for (r = 0; r < OD_FILT_VBORDER; r++) { | 
 |             for (c = nhb << bsize[pli]; | 
 |                  c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   linebuf[pli][r * stride + coffset + c]; | 
 |             } | 
 |           } | 
 |         } else { | 
 |           for (r = 0; r < OD_FILT_VBORDER; r++) { | 
 |             for (c = nhb << bsize[pli]; | 
 |                  c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] = | 
 |                   OD_DERING_VERY_LARGE; | 
 |             } | 
 |           } | 
 |         } | 
 |         if (dering_left) { | 
 |           /* If we deringed the superblock on the left then we need to copy in | 
 |              saved pixels. */ | 
 |           for (r = 0; r < rend + OD_FILT_VBORDER; r++) { | 
 |             for (c = 0; c < OD_FILT_HBORDER; c++) { | 
 |               src[r * OD_FILT_BSTRIDE + c] = colbuf[pli][r][c]; | 
 |             } | 
 |           } | 
 |         } | 
 |         for (r = 0; r < rend + OD_FILT_VBORDER; r++) { | 
 |           for (c = 0; c < OD_FILT_HBORDER; c++) { | 
 |             /* Saving pixels in case we need to dering the superblock on the | 
 |                right. */ | 
 |             colbuf[pli][r][c] = | 
 |                 src[r * OD_FILT_BSTRIDE + c + (nhb << bsize[pli])]; | 
 |           } | 
 |         } | 
 |         copy_sb8_16(cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf, | 
 |                     (MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER, | 
 |                     coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, | 
 |                     (nhb << bsize[pli])); | 
 |  | 
 |         /* FIXME: This is a temporary hack that uses more conservative | 
 |            deringing for chroma. */ | 
 |         if (pli) | 
 |           threshold = (level * 5 + 4) >> 3 << coeff_shift; | 
 |         else | 
 |           threshold = level << coeff_shift; | 
 |         if (threshold == 0) continue; | 
 |         od_dering( | 
 |             dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER], | 
 |             dec[pli], dir, pli, dlist, dering_count, threshold, coeff_shift); | 
 | #if CONFIG_AOM_HIGHBITDEPTH | 
 |         if (cm->use_highbitdepth) { | 
 |           copy_dering_16bit_to_16bit( | 
 |               (int16_t *)&CONVERT_TO_SHORTPTR( | 
 |                   xd->plane[pli] | 
 |                       .dst.buf)[xd->plane[pli].dst.stride * | 
 |                                     (MAX_MIB_SIZE * sbr << bsize[pli]) + | 
 |                                 (sbc * MAX_MIB_SIZE << bsize[pli])], | 
 |               xd->plane[pli].dst.stride, dst, dlist, dering_count, | 
 |               3 - dec[pli]); | 
 |         } else { | 
 | #endif | 
 |           copy_dering_16bit_to_8bit( | 
 |               &xd->plane[pli].dst.buf[xd->plane[pli].dst.stride * | 
 |                                           (MAX_MIB_SIZE * sbr << bsize[pli]) + | 
 |                                       (sbc * MAX_MIB_SIZE << bsize[pli])], | 
 |               xd->plane[pli].dst.stride, dst, dlist, dering_count, bsize[pli]); | 
 | #if CONFIG_AOM_HIGHBITDEPTH | 
 |         } | 
 | #endif | 
 |       } | 
 |       dering_left = 1; | 
 |     } | 
 |     { | 
 |       unsigned char *tmp; | 
 |       tmp = prev_row_dering; | 
 |       prev_row_dering = curr_row_dering; | 
 |       curr_row_dering = tmp; | 
 |     } | 
 |   } | 
 |   aom_free(row_dering); | 
 |   for (pli = 0; pli < nplanes; pli++) { | 
 |     aom_free(linebuf[pli]); | 
 |   } | 
 | } |