aom_dsp/inv_txfm.c - aom - Git at Google

 /*
  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include <math.h>
 #include <string.h>

 #include "./aom_dsp_rtcd.h"
 #include "aom_dsp/inv_txfm.h"

 void aom_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
   /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
      0.5 shifts per pixel. */
   int i;
   tran_low_t output[16];
   tran_high_t a1, b1, c1, d1, e1;
   const tran_low_t *ip = input;
   tran_low_t *op = output;

   for (i = 0; i < 4; i++) {
     a1 = ip[0] >> UNIT_QUANT_SHIFT;
     c1 = ip[1] >> UNIT_QUANT_SHIFT;
     d1 = ip[2] >> UNIT_QUANT_SHIFT;
     b1 = ip[3] >> UNIT_QUANT_SHIFT;
     a1 += c1;
     d1 -= b1;
     e1 = (a1 - d1) >> 1;
     b1 = e1 - b1;
     c1 = e1 - c1;
     a1 -= b1;
     d1 += c1;
     op[0] = WRAPLOW(a1);
     op[1] = WRAPLOW(b1);
     op[2] = WRAPLOW(c1);
     op[3] = WRAPLOW(d1);
     ip += 4;
     op += 4;
   }

   ip = output;
   for (i = 0; i < 4; i++) {
     a1 = ip[4 * 0];
     c1 = ip[4 * 1];
     d1 = ip[4 * 2];
     b1 = ip[4 * 3];
     a1 += c1;
     d1 -= b1;
     e1 = (a1 - d1) >> 1;
     b1 = e1 - b1;
     c1 = e1 - c1;
     a1 -= b1;
     d1 += c1;
     dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1));
     dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1));
     dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1));
     dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1));

     ip++;
     dest++;
   }
 }

 void aom_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
   int i;
   tran_high_t a1, e1;
   tran_low_t tmp[4];
   const tran_low_t *ip = in;
   tran_low_t *op = tmp;

   a1 = ip[0] >> UNIT_QUANT_SHIFT;
   e1 = a1 >> 1;
   a1 -= e1;
   op[0] = WRAPLOW(a1);
   op[1] = op[2] = op[3] = WRAPLOW(e1);

   ip = tmp;
   for (i = 0; i < 4; i++) {
     e1 = ip[0] >> 1;
     a1 = ip[0] - e1;
     dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
     dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
     dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
     dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
     ip++;
     dest++;
   }
 }

 void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
   tran_low_t step[4];
   tran_high_t temp1, temp2;
   // stage 1
   temp1 = (input[0] + input[2]) * cospi_16_64;
   temp2 = (input[0] - input[2]) * cospi_16_64;
   step[0] = WRAPLOW(dct_const_round_shift(temp1));
   step[1] = WRAPLOW(dct_const_round_shift(temp2));
   temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
   temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
   step[2] = WRAPLOW(dct_const_round_shift(temp1));
   step[3] = WRAPLOW(dct_const_round_shift(temp2));

   // stage 2
   output[0] = WRAPLOW(step[0] + step[3]);
   output[1] = WRAPLOW(step[1] + step[2]);
   output[2] = WRAPLOW(step[1] - step[2]);
   output[3] = WRAPLOW(step[0] - step[3]);
 }

 void aom_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
   tran_low_t out[4 * 4];
   tran_low_t *outptr = out;
   int i, j;
   tran_low_t temp_in[4], temp_out[4];

   // Rows
   for (i = 0; i < 4; ++i) {
     aom_idct4_c(input, outptr);
     input += 4;
     outptr += 4;
   }

   // Columns
   for (i = 0; i < 4; ++i) {
     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
     aom_idct4_c(temp_in, temp_out);
     for (j = 0; j < 4; ++j) {
       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                             ROUND_POWER_OF_TWO(temp_out[j], 4));
     }
   }
 }

 void aom_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
                          int dest_stride) {
   int i;
   tran_high_t a1;
   tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
   out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
   a1 = ROUND_POWER_OF_TWO(out, 4);

   if (a1 == 0) return;

   for (i = 0; i < 4; i++) {
     dest[0] = clip_pixel_add(dest[0], a1);
     dest[1] = clip_pixel_add(dest[1], a1);
     dest[2] = clip_pixel_add(dest[2], a1);
     dest[3] = clip_pixel_add(dest[3], a1);
     dest += dest_stride;
   }
 }

 void aom_iadst4_c(const tran_low_t *input, tran_low_t *output) {
   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;

   tran_low_t x0 = input[0];
   tran_low_t x1 = input[1];
   tran_low_t x2 = input[2];
   tran_low_t x3 = input[3];

   if (!(x0 | x1 | x2 | x3)) {
     output[0] = output[1] = output[2] = output[3] = 0;
     return;
   }

   s0 = sinpi_1_9 * x0;
   s1 = sinpi_2_9 * x0;
   s2 = sinpi_3_9 * x1;
   s3 = sinpi_4_9 * x2;
   s4 = sinpi_1_9 * x2;
   s5 = sinpi_2_9 * x3;
   s6 = sinpi_4_9 * x3;
   s7 = WRAPLOW(x0 - x2 + x3);

   s0 = s0 + s3 + s5;
   s1 = s1 - s4 - s6;
   s3 = s2;
   s2 = sinpi_3_9 * s7;

   // 1-D transform scaling factor is sqrt(2).
   // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
   // + 1b (addition) = 29b.
   // Hence the output bit depth is 15b.
   output[0] = WRAPLOW(dct_const_round_shift(s0 + s3));
   output[1] = WRAPLOW(dct_const_round_shift(s1 + s3));
   output[2] = WRAPLOW(dct_const_round_shift(s2));
   output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
 }

 void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
                                  int stride, int bd) {
   /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
      0.5 shifts per pixel. */
   int i;
   tran_low_t output[16];
   tran_high_t a1, b1, c1, d1, e1;
   const tran_low_t *ip = input;
   tran_low_t *op = output;
   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

   for (i = 0; i < 4; i++) {
     a1 = ip[0] >> UNIT_QUANT_SHIFT;
     c1 = ip[1] >> UNIT_QUANT_SHIFT;
     d1 = ip[2] >> UNIT_QUANT_SHIFT;
     b1 = ip[3] >> UNIT_QUANT_SHIFT;
     a1 += c1;
     d1 -= b1;
     e1 = (a1 - d1) >> 1;
     b1 = e1 - b1;
     c1 = e1 - c1;
     a1 -= b1;
     d1 += c1;
     op[0] = HIGHBD_WRAPLOW(a1, bd);
     op[1] = HIGHBD_WRAPLOW(b1, bd);
     op[2] = HIGHBD_WRAPLOW(c1, bd);
     op[3] = HIGHBD_WRAPLOW(d1, bd);
     ip += 4;
     op += 4;
   }

   ip = output;
   for (i = 0; i < 4; i++) {
     a1 = ip[4 * 0];
     c1 = ip[4 * 1];
     d1 = ip[4 * 2];
     b1 = ip[4 * 3];
     a1 += c1;
     d1 -= b1;
     e1 = (a1 - d1) >> 1;
     b1 = e1 - b1;
     c1 = e1 - c1;
     a1 -= b1;
     d1 += c1;
     dest[stride * 0] =
         highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd);
     dest[stride * 1] =
         highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd);
     dest[stride * 2] =
         highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd);
     dest[stride * 3] =
         highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd);

     ip++;
     dest++;
   }
 }

 void aom_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
                                 int dest_stride, int bd) {
   int i;
   tran_high_t a1, e1;
   tran_low_t tmp[4];
   const tran_low_t *ip = in;
   tran_low_t *op = tmp;
   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
   (void)bd;

   a1 = ip[0] >> UNIT_QUANT_SHIFT;
   e1 = a1 >> 1;
   a1 -= e1;
   op[0] = HIGHBD_WRAPLOW(a1, bd);
   op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd);

   ip = tmp;
   for (i = 0; i < 4; i++) {
     e1 = ip[0] >> 1;
     a1 = ip[0] - e1;
     dest[dest_stride * 0] =
         highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
     dest[dest_stride * 1] =
         highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
     dest[dest_stride * 2] =
         highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
     dest[dest_stride * 3] =
         highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
     ip++;
     dest++;
   }
 }
	/*
	* Copyright (c) 2016, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include <math.h>
	#include <string.h>

	#include "./aom_dsp_rtcd.h"
	#include "aom_dsp/inv_txfm.h"

	void aom_iwht4x4_16_add_c(const tran_low_t input, uint8_t dest, int stride) {
	/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
	0.5 shifts per pixel. */
	int i;
	tran_low_t output[16];
	tran_high_t a1, b1, c1, d1, e1;
	const tran_low_t *ip = input;
	tran_low_t *op = output;

	for (i = 0; i < 4; i++) {
	a1 = ip[0] >> UNIT_QUANT_SHIFT;
	c1 = ip[1] >> UNIT_QUANT_SHIFT;
	d1 = ip[2] >> UNIT_QUANT_SHIFT;
	b1 = ip[3] >> UNIT_QUANT_SHIFT;
	a1 += c1;
	d1 -= b1;
	e1 = (a1 - d1) >> 1;
	b1 = e1 - b1;
	c1 = e1 - c1;
	a1 -= b1;
	d1 += c1;
	op[0] = WRAPLOW(a1);
	op[1] = WRAPLOW(b1);
	op[2] = WRAPLOW(c1);
	op[3] = WRAPLOW(d1);
	ip += 4;
	op += 4;
	}

	ip = output;
	for (i = 0; i < 4; i++) {
	a1 = ip[4 * 0];
	c1 = ip[4 * 1];
	d1 = ip[4 * 2];
	b1 = ip[4 * 3];
	a1 += c1;
	d1 -= b1;
	e1 = (a1 - d1) >> 1;
	b1 = e1 - b1;
	c1 = e1 - c1;
	a1 -= b1;
	d1 += c1;
	dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1));
	dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1));
	dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1));
	dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1));

	ip++;
	dest++;
	}
	}

	void aom_iwht4x4_1_add_c(const tran_low_t in, uint8_t dest, int dest_stride) {
	int i;
	tran_high_t a1, e1;
	tran_low_t tmp[4];
	const tran_low_t *ip = in;
	tran_low_t *op = tmp;

	a1 = ip[0] >> UNIT_QUANT_SHIFT;
	e1 = a1 >> 1;
	a1 -= e1;
	op[0] = WRAPLOW(a1);
	op[1] = op[2] = op[3] = WRAPLOW(e1);

	ip = tmp;
	for (i = 0; i < 4; i++) {
	e1 = ip[0] >> 1;
	a1 = ip[0] - e1;
	dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
	dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
	dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
	dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
	ip++;
	dest++;
	}
	}

	void aom_idct4_c(const tran_low_t input, tran_low_t output) {
	tran_low_t step[4];
	tran_high_t temp1, temp2;
	// stage 1
	temp1 = (input[0] + input[2]) * cospi_16_64;
	temp2 = (input[0] - input[2]) * cospi_16_64;
	step[0] = WRAPLOW(dct_const_round_shift(temp1));
	step[1] = WRAPLOW(dct_const_round_shift(temp2));
	temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
	temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
	step[2] = WRAPLOW(dct_const_round_shift(temp1));
	step[3] = WRAPLOW(dct_const_round_shift(temp2));

	// stage 2
	output[0] = WRAPLOW(step[0] + step[3]);
	output[1] = WRAPLOW(step[1] + step[2]);
	output[2] = WRAPLOW(step[1] - step[2]);
	output[3] = WRAPLOW(step[0] - step[3]);
	}

	void aom_idct4x4_16_add_c(const tran_low_t input, uint8_t dest, int stride) {
	tran_low_t out[4 * 4];
	tran_low_t *outptr = out;
	int i, j;
	tran_low_t temp_in[4], temp_out[4];

	// Rows
	for (i = 0; i < 4; ++i) {
	aom_idct4_c(input, outptr);
	input += 4;
	outptr += 4;
	}

	// Columns
	for (i = 0; i < 4; ++i) {
	for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
	aom_idct4_c(temp_in, temp_out);
	for (j = 0; j < 4; ++j) {
	dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
	ROUND_POWER_OF_TWO(temp_out[j], 4));
	}
	}
	}

	void aom_idct4x4_1_add_c(const tran_low_t input, uint8_t dest,
	int dest_stride) {
	int i;
	tran_high_t a1;
	tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
	out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
	a1 = ROUND_POWER_OF_TWO(out, 4);

	if (a1 == 0) return;

	for (i = 0; i < 4; i++) {
	dest[0] = clip_pixel_add(dest[0], a1);
	dest[1] = clip_pixel_add(dest[1], a1);
	dest[2] = clip_pixel_add(dest[2], a1);
	dest[3] = clip_pixel_add(dest[3], a1);
	dest += dest_stride;
	}
	}

	void aom_iadst4_c(const tran_low_t input, tran_low_t output) {
	tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;

	tran_low_t x0 = input[0];
	tran_low_t x1 = input[1];
	tran_low_t x2 = input[2];
	tran_low_t x3 = input[3];

	if (!(x0 \| x1 \| x2 \| x3)) {
	output[0] = output[1] = output[2] = output[3] = 0;
	return;
	}

	s0 = sinpi_1_9 * x0;
	s1 = sinpi_2_9 * x0;
	s2 = sinpi_3_9 * x1;
	s3 = sinpi_4_9 * x2;
	s4 = sinpi_1_9 * x2;
	s5 = sinpi_2_9 * x3;
	s6 = sinpi_4_9 * x3;
	s7 = WRAPLOW(x0 - x2 + x3);

	s0 = s0 + s3 + s5;
	s1 = s1 - s4 - s6;
	s3 = s2;
	s2 = sinpi_3_9 * s7;

	// 1-D transform scaling factor is sqrt(2).
	// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
	// + 1b (addition) = 29b.
	// Hence the output bit depth is 15b.
	output[0] = WRAPLOW(dct_const_round_shift(s0 + s3));
	output[1] = WRAPLOW(dct_const_round_shift(s1 + s3));
	output[2] = WRAPLOW(dct_const_round_shift(s2));
	output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
	}

	void aom_highbd_iwht4x4_16_add_c(const tran_low_t input, uint8_t dest8,
	int stride, int bd) {
	/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
	0.5 shifts per pixel. */
	int i;
	tran_low_t output[16];
	tran_high_t a1, b1, c1, d1, e1;
	const tran_low_t *ip = input;
	tran_low_t *op = output;
	uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

	for (i = 0; i < 4; i++) {
	a1 = ip[0] >> UNIT_QUANT_SHIFT;
	c1 = ip[1] >> UNIT_QUANT_SHIFT;
	d1 = ip[2] >> UNIT_QUANT_SHIFT;
	b1 = ip[3] >> UNIT_QUANT_SHIFT;
	a1 += c1;
	d1 -= b1;
	e1 = (a1 - d1) >> 1;
	b1 = e1 - b1;
	c1 = e1 - c1;
	a1 -= b1;
	d1 += c1;
	op[0] = HIGHBD_WRAPLOW(a1, bd);
	op[1] = HIGHBD_WRAPLOW(b1, bd);
	op[2] = HIGHBD_WRAPLOW(c1, bd);
	op[3] = HIGHBD_WRAPLOW(d1, bd);
	ip += 4;
	op += 4;
	}

	ip = output;
	for (i = 0; i < 4; i++) {
	a1 = ip[4 * 0];
	c1 = ip[4 * 1];
	d1 = ip[4 * 2];
	b1 = ip[4 * 3];
	a1 += c1;
	d1 -= b1;
	e1 = (a1 - d1) >> 1;
	b1 = e1 - b1;
	c1 = e1 - c1;
	a1 -= b1;
	d1 += c1;
	dest[stride * 0] =
	highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd);
	dest[stride * 1] =
	highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd);
	dest[stride * 2] =
	highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd);
	dest[stride * 3] =
	highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd);

	ip++;
	dest++;
	}
	}

	void aom_highbd_iwht4x4_1_add_c(const tran_low_t in, uint8_t dest8,
	int dest_stride, int bd) {
	int i;
	tran_high_t a1, e1;
	tran_low_t tmp[4];
	const tran_low_t *ip = in;
	tran_low_t *op = tmp;
	uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
	(void)bd;

	a1 = ip[0] >> UNIT_QUANT_SHIFT;
	e1 = a1 >> 1;
	a1 -= e1;
	op[0] = HIGHBD_WRAPLOW(a1, bd);
	op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd);

	ip = tmp;
	for (i = 0; i < 4; i++) {
	e1 = ip[0] >> 1;
	a1 = ip[0] - e1;
	dest[dest_stride * 0] =
	highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
	dest[dest_stride * 1] =
	highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
	dest[dest_stride * 2] =
	highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
	dest[dest_stride * 3] =
	highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
	ip++;
	dest++;
	}
	}