av1/encoder/aq_variance.c - aom - Git at Google

 /*
  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include <math.h>
 #include <stdlib.h>

 #include "aom_ports/mem.h"

 #include "av1/encoder/aq_variance.h"
 #include "av1/common/seg_common.h"
 #include "av1/encoder/encodeframe.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/rd.h"
 #include "av1/encoder/segmentation.h"
 #include "av1/encoder/dwt.h"
 #include "config/aom_config.h"

 #if !CONFIG_REALTIME_ONLY
 static const double rate_ratio[MAX_SEGMENTS] = { 2.2, 1.7, 1.3, 1.0,
                                                  0.9, .8,  .7,  .6 };

 static const double deltaq_rate_ratio[MAX_SEGMENTS] = { 2.5,  2.0, 1.5, 1.0,
                                                         0.75, 1.0, 1.0, 1.0 };
 #define ENERGY_MIN (-4)
 #define ENERGY_MAX (1)
 #define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
 #define ENERGY_IN_BOUNDS(energy) \
   assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)

 static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };

 #define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]

 void av1_vaq_frame_setup(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
   const int base_qindex = cm->quant_params.base_qindex;
   struct segmentation *seg = &cm->seg;
   int i;

   int resolution_change =
       cm->prev_frame && (cm->width != cm->prev_frame->width ||
                          cm->height != cm->prev_frame->height);
   int avg_energy = (int)(cpi->twopass_frame.mb_av_energy - 2);
   double avg_ratio;
   if (avg_energy > 7) avg_energy = 7;
   if (avg_energy < 0) avg_energy = 0;
   avg_ratio = rate_ratio[avg_energy];

   if (resolution_change) {
     memset(cpi->enc_seg.map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols);
     av1_clearall_segfeatures(seg);
     av1_disable_segmentation(seg);
     return;
   }
   if (frame_is_intra_only(cm) || cm->features.error_resilient_mode ||
       refresh_frame->alt_ref_frame ||
       (refresh_frame->golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
     cpi->vaq_refresh = 1;

     av1_enable_segmentation(seg);
     av1_clearall_segfeatures(seg);

     for (i = 0; i < MAX_SEGMENTS; ++i) {
       // Set up avg segment id to be 1.0 and adjust the other segments around
       // it.
       int qindex_delta =
           av1_compute_qdelta_by_rate(cpi, cm->current_frame.frame_type,
                                      base_qindex, rate_ratio[i] / avg_ratio);

       // We don't allow qindex 0 in a segment if the base value is not 0.
       // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
       // Q delta is sometimes applied without going back around the rd loop.
       // This could lead to an illegal combination of partition size and q.
       if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) {
         qindex_delta = -base_qindex + 1;
       }

       av1_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta);
       av1_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
     }
   }
 }

 int av1_log_block_avg(const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bs,
                       int mi_row, int mi_col) {
   // This functions returns the block average of luma block
   unsigned int sum, avg, num_pix;
   int r, c;
   const int pic_w = cpi->common.width;
   const int pic_h = cpi->common.height;
   const int bw = MI_SIZE * mi_size_wide[bs];
   const int bh = MI_SIZE * mi_size_high[bs];
   const uint16_t *x16 = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);

   sum = 0;
   num_pix = 0;
   avg = 0;
   int row = mi_row << MI_SIZE_LOG2;
   int col = mi_col << MI_SIZE_LOG2;
   for (r = row; (r < (row + bh)) && (r < pic_h); r++) {
     for (c = col; (c < (col + bw)) && (c < pic_w); c++) {
       sum += *(x16 + r * x->plane[0].src.stride + c);
       num_pix++;
     }
   }
   if (num_pix != 0) {
     avg = sum / num_pix;
   }
   return avg;
 }

 #define DEFAULT_E_MIDPOINT 10.0

 static unsigned int haar_ac_energy(const MACROBLOCK *x, BLOCK_SIZE bs) {
   const MACROBLOCKD *xd = &x->e_mbd;
   int stride = x->plane[0].src.stride;
   const uint8_t *buf = x->plane[0].src.buf;
   const int num_8x8_cols = block_size_wide[bs] / 8;
   const int num_8x8_rows = block_size_high[bs] / 8;
   const int hbd = is_cur_buf_hbd(xd);

   int64_t var = av1_haar_ac_sad_mxn_uint8_input(buf, stride, hbd, num_8x8_rows,
                                                 num_8x8_cols);

   return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs];
 }

 static double log_block_wavelet_energy(const MACROBLOCK *x, BLOCK_SIZE bs) {
   unsigned int haar_sad = haar_ac_energy(x, bs);
   return log1p(haar_sad);
 }

 int av1_block_wavelet_energy_level(const AV1_COMP *cpi, const MACROBLOCK *x,
                                    BLOCK_SIZE bs) {
   double energy, energy_midpoint;
   energy_midpoint = (is_stat_consumption_stage_twopass(cpi))
                         ? cpi->twopass_frame.frame_avg_haar_energy
                         : DEFAULT_E_MIDPOINT;
   energy = log_block_wavelet_energy(x, bs) - energy_midpoint;
   return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
 }

 int av1_compute_q_from_energy_level_deltaq_mode(const AV1_COMP *const cpi,
                                                 int block_var_level) {
   int rate_level;
   const AV1_COMMON *const cm = &cpi->common;

   if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
     ENERGY_IN_BOUNDS(block_var_level);
     rate_level = SEGMENT_ID(block_var_level);
   } else {
     rate_level = block_var_level;
   }
   const int base_qindex = cm->quant_params.base_qindex;
   int qindex_delta =
       av1_compute_qdelta_by_rate(cpi, cm->current_frame.frame_type, base_qindex,
                                  deltaq_rate_ratio[rate_level]);

   if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) {
     qindex_delta = -base_qindex + 1;
   }
   return base_qindex + qindex_delta;
 }

 // Comparer used by qsort() to order an array of unsigned int from smallest to
 // largest.
 static int comp_unsigned_int(const void *a, const void *b) {
   unsigned int arg1 = *(const unsigned int *)a;
   unsigned int arg2 = *(const unsigned int *)b;

   return (arg1 > arg2) - (arg1 < arg2);
 }

 unsigned int av1_get_variance_boost_block_variance(const AV1_COMP *cpi,
                                                    const MACROBLOCK *x) {
 #define SUPERBLOCK_SIZE 64
 #define SUBBLOCK_SIZE 8
 #define SUBBLOCKS_IN_SB_DIM (SUPERBLOCK_SIZE / SUBBLOCK_SIZE)
 #define SUBBLOCKS_IN_SB (SUBBLOCKS_IN_SB_DIM * SUBBLOCKS_IN_SB_DIM)
 #define SUBBLOCKS_IN_OCTILE (SUBBLOCKS_IN_SB / 8)
   DECLARE_ALIGNED(16, static const uint16_t,
                   av1_highbd_all_zeros[SUBBLOCK_SIZE]) = { 0 };
   DECLARE_ALIGNED(16, static const uint8_t,
                   av1_all_zeros[SUBBLOCK_SIZE]) = { 0 };

   const MACROBLOCKD *xd = &x->e_mbd;
   unsigned int sse;
   // Octile is currently hard-coded and optimized for still pictures. In the
   // future, we might want to expose this as a parameter that can be fine-tuned
   // by the caller.
   // An octile of 5 was chosen because it was found to strike the best balance
   // between quality and consistency. Lower octiles tend to score lower in
   // SSIMU2, while higher octiles tend to harm subjective quality consistency,
   // especially in <1 MP images.
   const int octile = 5;
   const uint8_t *all_zeros = is_cur_buf_hbd(xd)
                                  ? CONVERT_TO_BYTEPTR(av1_highbd_all_zeros)
                                  : av1_all_zeros;
   unsigned int variances[SUBBLOCKS_IN_SB];

   // TODO: bug https://crbug.com/aomedia/375221136 - the current implementation
   // truncates variances to integers during normalization, similar to SVT-AV1's
   // counterpart. A possible improvement would be to use rounding: `(n + 32) /
   // 64`, or just return variances as doubles.
   aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_8X8].vf;
   for (int subb_i = 0; subb_i < SUBBLOCKS_IN_SB_DIM; subb_i++) {
     int i = subb_i * SUBBLOCK_SIZE;
     for (int subb_j = 0; subb_j < SUBBLOCKS_IN_SB_DIM; subb_j++) {
       int j = subb_j * SUBBLOCK_SIZE;
       variances[subb_i * SUBBLOCKS_IN_SB_DIM + subb_j] =
           vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j,
              x->plane[0].src.stride, all_zeros, 0, &sse) /
           64;
     }
   }

   // Order the 8x8 SB values from smallest to largest variance.
   qsort(variances, SUBBLOCKS_IN_SB, sizeof(unsigned int), comp_unsigned_int);

   // Sample three 8x8 variance values: at the specified octile, previous octile,
   // and next octile. Make sure we use the last subblock in each octile as the
   // representative of the octile.
   assert(octile >= 1 && octile <= 8);
   const int middle_index = octile * SUBBLOCKS_IN_OCTILE - 1;
   const int lower_index =
       AOMMAX(SUBBLOCKS_IN_OCTILE - 1, middle_index - SUBBLOCKS_IN_OCTILE);
   const int upper_index =
       AOMMIN(SUBBLOCKS_IN_SB - 1, middle_index + SUBBLOCKS_IN_OCTILE);

   // Weigh the three variances in a 1:2:1 ratio, with rounding (the +2 term).
   // This allows for smoother delta-q transitions among superblocks with
   // mixed-variance features.
   const unsigned int variance =
       (variances[lower_index] + (variances[middle_index] * 2) +
        variances[upper_index] + 2) /
       4;

   return variance;
 }
 #endif  // !CONFIG_REALTIME_ONLY

 int av1_log_block_var(const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bs) {
   DECLARE_ALIGNED(16, static const uint16_t,
                   av1_highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
   DECLARE_ALIGNED(16, static const uint8_t, av1_all_zeros[MAX_SB_SIZE]) = { 0 };

   // This function returns a score for the blocks local variance as calculated
   // by: sum of the log of the (4x4 variances) of each subblock to the current
   // block (x,bs)
   // * 32 / number of pixels in the block_size.
   // This is used for segmentation because to avoid situations in which a large
   // block with a gentle gradient gets marked high variance even though each
   // subblock has a low variance.   This allows us to assign the same segment
   // number for the same sorts of area regardless of how the partitioning goes.

   const MACROBLOCKD *xd = &x->e_mbd;
   double var = 0;
   unsigned int sse;
   int i, j;

   int right_overflow =
       (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
   int bottom_overflow =
       (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;

   const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
   const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;

   aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf;
   for (i = 0; i < bh; i += 4) {
     for (j = 0; j < bw; j += 4) {
       if (is_cur_buf_hbd(xd)) {
         var += log1p(vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j,
                         x->plane[0].src.stride,
                         CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) /
                      16.0);
       } else {
         var += log1p(vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j,
                         x->plane[0].src.stride, av1_all_zeros, 0, &sse) /
                      16.0);
       }
     }
   }
   // Use average of 4x4 log variance. The range for 8 bit 0 - 9.704121561.
   var /= (bw / 4 * bh / 4);
   if (var > 7) var = 7;

   return (int)(var);
 }
	/*
	* Copyright (c) 2016, Alliance for Open Media. All rights reserved.
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include <math.h>
	#include <stdlib.h>

	#include "aom_ports/mem.h"

	#include "av1/encoder/aq_variance.h"
	#include "av1/common/seg_common.h"
	#include "av1/encoder/encodeframe.h"
	#include "av1/encoder/ratectrl.h"
	#include "av1/encoder/rd.h"
	#include "av1/encoder/segmentation.h"
	#include "av1/encoder/dwt.h"
	#include "config/aom_config.h"

	#if !CONFIG_REALTIME_ONLY
	static const double rate_ratio[MAX_SEGMENTS] = { 2.2, 1.7, 1.3, 1.0,
	0.9, .8, .7, .6 };

	static const double deltaq_rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0,
	0.75, 1.0, 1.0, 1.0 };
	#define ENERGY_MIN (-4)
	#define ENERGY_MAX (1)
	#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
	#define ENERGY_IN_BOUNDS(energy) \
	assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)

	static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };

	#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]

	void av1_vaq_frame_setup(AV1_COMP *cpi) {
	AV1_COMMON *cm = &cpi->common;
	const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
	const int base_qindex = cm->quant_params.base_qindex;
	struct segmentation *seg = &cm->seg;
	int i;

	int resolution_change =
	cm->prev_frame && (cm->width != cm->prev_frame->width \|\|
	cm->height != cm->prev_frame->height);
	int avg_energy = (int)(cpi->twopass_frame.mb_av_energy - 2);
	double avg_ratio;
	if (avg_energy > 7) avg_energy = 7;
	if (avg_energy < 0) avg_energy = 0;
	avg_ratio = rate_ratio[avg_energy];

	if (resolution_change) {
	memset(cpi->enc_seg.map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols);
	av1_clearall_segfeatures(seg);
	av1_disable_segmentation(seg);
	return;
	}
	if (frame_is_intra_only(cm) \|\| cm->features.error_resilient_mode \|\|
	refresh_frame->alt_ref_frame \|\|
	(refresh_frame->golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
	cpi->vaq_refresh = 1;

	av1_enable_segmentation(seg);
	av1_clearall_segfeatures(seg);

	for (i = 0; i < MAX_SEGMENTS; ++i) {
	// Set up avg segment id to be 1.0 and adjust the other segments around
	// it.
	int qindex_delta =
	av1_compute_qdelta_by_rate(cpi, cm->current_frame.frame_type,
	base_qindex, rate_ratio[i] / avg_ratio);

	// We don't allow qindex 0 in a segment if the base value is not 0.
	// Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
	// Q delta is sometimes applied without going back around the rd loop.
	// This could lead to an illegal combination of partition size and q.
	if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) {
	qindex_delta = -base_qindex + 1;
	}

	av1_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta);
	av1_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
	}
	}
	}

	int av1_log_block_avg(const AV1_COMP cpi, const MACROBLOCK x, BLOCK_SIZE bs,
	int mi_row, int mi_col) {
	// This functions returns the block average of luma block
	unsigned int sum, avg, num_pix;
	int r, c;
	const int pic_w = cpi->common.width;
	const int pic_h = cpi->common.height;
	const int bw = MI_SIZE * mi_size_wide[bs];
	const int bh = MI_SIZE * mi_size_high[bs];
	const uint16_t *x16 = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);

	sum = 0;
	num_pix = 0;
	avg = 0;
	int row = mi_row << MI_SIZE_LOG2;
	int col = mi_col << MI_SIZE_LOG2;
	for (r = row; (r < (row + bh)) && (r < pic_h); r++) {
	for (c = col; (c < (col + bw)) && (c < pic_w); c++) {
	sum += (x16 + r x->plane[0].src.stride + c);
	num_pix++;
	}
	}
	if (num_pix != 0) {
	avg = sum / num_pix;
	}
	return avg;
	}

	#define DEFAULT_E_MIDPOINT 10.0

	static unsigned int haar_ac_energy(const MACROBLOCK *x, BLOCK_SIZE bs) {
	const MACROBLOCKD *xd = &x->e_mbd;
	int stride = x->plane[0].src.stride;
	const uint8_t *buf = x->plane[0].src.buf;
	const int num_8x8_cols = block_size_wide[bs] / 8;
	const int num_8x8_rows = block_size_high[bs] / 8;
	const int hbd = is_cur_buf_hbd(xd);

	int64_t var = av1_haar_ac_sad_mxn_uint8_input(buf, stride, hbd, num_8x8_rows,
	num_8x8_cols);

	return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs];
	}

	static double log_block_wavelet_energy(const MACROBLOCK *x, BLOCK_SIZE bs) {
	unsigned int haar_sad = haar_ac_energy(x, bs);
	return log1p(haar_sad);
	}

	int av1_block_wavelet_energy_level(const AV1_COMP cpi, const MACROBLOCK x,
	BLOCK_SIZE bs) {
	double energy, energy_midpoint;
	energy_midpoint = (is_stat_consumption_stage_twopass(cpi))
	? cpi->twopass_frame.frame_avg_haar_energy
	: DEFAULT_E_MIDPOINT;
	energy = log_block_wavelet_energy(x, bs) - energy_midpoint;
	return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
	}

	int av1_compute_q_from_energy_level_deltaq_mode(const AV1_COMP *const cpi,
	int block_var_level) {
	int rate_level;
	const AV1_COMMON *const cm = &cpi->common;

	if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
	ENERGY_IN_BOUNDS(block_var_level);
	rate_level = SEGMENT_ID(block_var_level);
	} else {
	rate_level = block_var_level;
	}
	const int base_qindex = cm->quant_params.base_qindex;
	int qindex_delta =
	av1_compute_qdelta_by_rate(cpi, cm->current_frame.frame_type, base_qindex,
	deltaq_rate_ratio[rate_level]);

	if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) {
	qindex_delta = -base_qindex + 1;
	}
	return base_qindex + qindex_delta;
	}

	// Comparer used by qsort() to order an array of unsigned int from smallest to
	// largest.
	static int comp_unsigned_int(const void a, const void b) {
	unsigned int arg1 = (const unsigned int )a;
	unsigned int arg2 = (const unsigned int )b;

	return (arg1 > arg2) - (arg1 < arg2);
	}

	unsigned int av1_get_variance_boost_block_variance(const AV1_COMP *cpi,
	const MACROBLOCK *x) {
	#define SUPERBLOCK_SIZE 64
	#define SUBBLOCK_SIZE 8
	#define SUBBLOCKS_IN_SB_DIM (SUPERBLOCK_SIZE / SUBBLOCK_SIZE)
	#define SUBBLOCKS_IN_SB (SUBBLOCKS_IN_SB_DIM * SUBBLOCKS_IN_SB_DIM)
	#define SUBBLOCKS_IN_OCTILE (SUBBLOCKS_IN_SB / 8)
	DECLARE_ALIGNED(16, static const uint16_t,
	av1_highbd_all_zeros[SUBBLOCK_SIZE]) = { 0 };
	DECLARE_ALIGNED(16, static const uint8_t,
	av1_all_zeros[SUBBLOCK_SIZE]) = { 0 };

	const MACROBLOCKD *xd = &x->e_mbd;
	unsigned int sse;
	// Octile is currently hard-coded and optimized for still pictures. In the
	// future, we might want to expose this as a parameter that can be fine-tuned
	// by the caller.
	// An octile of 5 was chosen because it was found to strike the best balance
	// between quality and consistency. Lower octiles tend to score lower in
	// SSIMU2, while higher octiles tend to harm subjective quality consistency,
	// especially in <1 MP images.
	const int octile = 5;
	const uint8_t *all_zeros = is_cur_buf_hbd(xd)
	? CONVERT_TO_BYTEPTR(av1_highbd_all_zeros)
	: av1_all_zeros;
	unsigned int variances[SUBBLOCKS_IN_SB];

	// TODO: bug https://crbug.com/aomedia/375221136 - the current implementation
	// truncates variances to integers during normalization, similar to SVT-AV1's
	// counterpart. A possible improvement would be to use rounding: `(n + 32) /
	// 64`, or just return variances as doubles.
	aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_8X8].vf;
	for (int subb_i = 0; subb_i < SUBBLOCKS_IN_SB_DIM; subb_i++) {
	int i = subb_i * SUBBLOCK_SIZE;
	for (int subb_j = 0; subb_j < SUBBLOCKS_IN_SB_DIM; subb_j++) {
	int j = subb_j * SUBBLOCK_SIZE;
	variances[subb_i * SUBBLOCKS_IN_SB_DIM + subb_j] =
	vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j,
	x->plane[0].src.stride, all_zeros, 0, &sse) /
	64;
	}
	}

	// Order the 8x8 SB values from smallest to largest variance.
	qsort(variances, SUBBLOCKS_IN_SB, sizeof(unsigned int), comp_unsigned_int);

	// Sample three 8x8 variance values: at the specified octile, previous octile,
	// and next octile. Make sure we use the last subblock in each octile as the
	// representative of the octile.
	assert(octile >= 1 && octile <= 8);
	const int middle_index = octile * SUBBLOCKS_IN_OCTILE - 1;
	const int lower_index =
	AOMMAX(SUBBLOCKS_IN_OCTILE - 1, middle_index - SUBBLOCKS_IN_OCTILE);
	const int upper_index =
	AOMMIN(SUBBLOCKS_IN_SB - 1, middle_index + SUBBLOCKS_IN_OCTILE);

	// Weigh the three variances in a 1:2:1 ratio, with rounding (the +2 term).
	// This allows for smoother delta-q transitions among superblocks with
	// mixed-variance features.
	const unsigned int variance =
	(variances[lower_index] + (variances[middle_index] * 2) +
	variances[upper_index] + 2) /
	4;

	return variance;
	}
	#endif // !CONFIG_REALTIME_ONLY

	int av1_log_block_var(const AV1_COMP cpi, const MACROBLOCK x, BLOCK_SIZE bs) {
	DECLARE_ALIGNED(16, static const uint16_t,
	av1_highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
	DECLARE_ALIGNED(16, static const uint8_t, av1_all_zeros[MAX_SB_SIZE]) = { 0 };

	// This function returns a score for the blocks local variance as calculated
	// by: sum of the log of the (4x4 variances) of each subblock to the current
	// block (x,bs)
	// * 32 / number of pixels in the block_size.
	// This is used for segmentation because to avoid situations in which a large
	// block with a gentle gradient gets marked high variance even though each
	// subblock has a low variance. This allows us to assign the same segment
	// number for the same sorts of area regardless of how the partitioning goes.

	const MACROBLOCKD *xd = &x->e_mbd;
	double var = 0;
	unsigned int sse;
	int i, j;

	int right_overflow =
	(xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
	int bottom_overflow =
	(xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;

	const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
	const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;

	aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf;
	for (i = 0; i < bh; i += 4) {
	for (j = 0; j < bw; j += 4) {
	if (is_cur_buf_hbd(xd)) {
	var += log1p(vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j,
	x->plane[0].src.stride,
	CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) /
	16.0);
	} else {
	var += log1p(vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j,
	x->plane[0].src.stride, av1_all_zeros, 0, &sse) /
	16.0);
	}
	}
	}
	// Use average of 4x4 log variance. The range for 8 bit 0 - 9.704121561.
	var /= (bw / 4 * bh / 4);
	if (var > 7) var = 7;

	return (int)(var);
	}