Film grain synthesis algorithm
Change-Id: I7758fc79c1b6709c630f5641705dfd07fc4478e8
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index e21a6d9..8ffb7ff 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -293,6 +293,12 @@
"${AOM_ROOT}/aom_dsp/daalaboolreader.h"
"${AOM_ROOT}/aom_dsp/entdec.c"
"${AOM_ROOT}/aom_dsp/entdec.h")
+
+ set(AOM_DSP_DECODER_SOURCES
+ ${AOM_DSP_DECODER_SOURCES}
+ "${AOM_ROOT}/aom_dsp/grain_synthesis.c"
+ "${AOM_ROOT}/aom_dsp/grain_synthesis.h")
+
endif ()
if (CONFIG_AV1_ENCODER)
diff --git a/aom_dsp/grain_synthesis.c b/aom_dsp/grain_synthesis.c
new file mode 100644
index 0000000..8dbc242
--- /dev/null
+++ b/aom_dsp/grain_synthesis.c
@@ -0,0 +1,1233 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/*!\file
+ * \brief Describes film grain parameters and film grain synthesis
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "aom_dsp/grain_synthesis.h"
+#include "aom_mem/aom_mem.h"
+
+// Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
+// with zero mean and standard deviation of about 512.
+// should be divided by 4 for 10-bit range and 16 for 8-bit range.
+static const int gaussian_sequence[2048] = {
+ 711, -320, 81, 420, 447, 1637, -197, 410, 128, 952, -937,
+ -250, 94, 320, 32, 1021, -437, 214, 452, -306, 515, 262,
+ -81, 169, 371, 748, 65, 326, 250, -398, 1012, -513, -316,
+ -803, -1051, 885, -373, -163, 900, 85, 433, -725, -61, -322,
+ 179, 96, 130, -628, 432, 1103, 345, -143, -346, 373, 159,
+ 458, -511, -722, -604, 911, -588, 924, -323, -442, 19, -32,
+ 211, 437, 417, 305, 767, 246, 806, 492, 52, -488, -606,
+ 655, -78, 119, 129, -711, 721, 191, 307, 548, 100, -469,
+ -229, 583, -66, 12, 149, -119, 619, -488, -739, -425, -786,
+ -95, -597, -90, -8, -49, 568, -954, 252, -177, 605, -117,
+ 780, 552, -211, 288, 375, -371, -705, -130, 307, 45, 21,
+ -507, -551, 169, 375, 321, 45, -332, 634, 348, -598, 26,
+ 369, 451, -739, -814, -410, -222, 286, 746, 51, 1308, -196,
+ -605, -468, -31, 691, 277, 92, -34, -770, -938, 451, -218,
+ -728, 1164, -709, -1613, -156, 800, 89, 367, -153, -409, -147,
+ -116, 152, -575, 317, 715, -510, -794, -590, 140, -477, -625,
+ -146, 550, 443, 397, -1320, 340, -172, -173, -6, 269, -85,
+ 161, 420, -393, 351, 43, 152, -757, -60, -480, 598, 208,
+ -154, 129, -555, 305, -45, 398, 89, 222, -237, -375, 1076,
+ -556, -1171, -376, 495, -537, 287, 879, -365, -815, 579, -138,
+ 164, -188, 811, 309, 287, -300, -241, -189, -1026, -1603, -1364,
+ 151, -370, -344, -974, -277, 982, 296, -428, -638, 262, -3,
+ -1445, -31, 533, 882, -410, 189, -321, -390, -814, -597, 340,
+ 957, 123, 29, 531, -689, 407, -483, -371, 783, 68, 593,
+ 540, -472, 311, -31, 595, 566, -243, 524, -212, -617, 287,
+ -63, -461, -593, -434, 1218, -310, 994, 691, 228, -641, -501,
+ 41, 74, 205, -657, -67, -324, -4, -946, 850, -1065, 370,
+ 135, -853, -259, -82, 23, -491, 521, 537, -149, -359, 446,
+ 447, 468, -1017, -322, -195, 250, 152, -370, 198, 541, 293,
+ -721, -140, -585, -335, -4, 414, -793, -563, -420, 367, 536,
+ -464, 491, 802, -227, 575, -379, 36, 668, -444, 126, 146,
+ 348, 171, -522, -915, -52, 70, 104, -332, 605, 470, -764,
+ 232, 316, 72, 400, 1697, 600, 529, -5, 100, 119, -491,
+ -414, 1119, -221, 658, 329, -280, -210, 329, 492, 397, 233,
+ -49, -590, 38, 365, 669, 1199, -35, 44, 357, 304, -404,
+ -3, -102, 111, -204, 111, -736, 1148, -619, -314, 148, 579,
+ 34, -566, 450, -526, 208, -794, -30, -327, -180, 164, 170,
+ 181, 349, 64, -101, -1142, 154, 33, 1127, -104, 333, 390,
+ 941, 474, -84, 105, 1207, 478, 575, -414, 1554, -298, 1077,
+ 947, -458, 312, -258, -701, -27, 934, 329, 1047, -345, -870,
+ 353, 506, -507, 140, 160, -243, -717, -299, 564, -291, -70,
+ -446, 12, -335, 173, -365, -895, 273, 354, 572, 401, -90,
+ -333, 513, -418, -225, 150, 16, 832, -856, 19, -200, -818,
+ 423, 545, -320, 246, -159, 1095, -24, 474, 500, 442, -751,
+ 250, 31, 390, 1117, -308, 560, -190, -228, -1230, -79, 121,
+ 806, -657, 694, -999, -17, -191, 475, 483, -1369, -76, 293,
+ 56, -161, -793, 630, 381, 176, 374, 368, 374, 265, 146,
+ -363, 170, 8, 358, 883, -458, -30, -299, 285, -439, -361,
+ -255, -71, -141, -319, -178, 42, -269, -512, 42, 272, 268,
+ 12, -174, -780, -143, 201, 108, 578, -905, 78, -183, -522,
+ 345, -435, -291, 399, 888, 422, 10, -221, -683, 570, -3,
+ 264, 163, 146, -148, 708, 140, 269, 334, -1045, -820, 942,
+ -350, 289, -381, 1262, -152, 1054, 336, 246, -107, 157, 23,
+ 561, -389, 492, 55, -318, 339, 561, 787, -911, 309, 118,
+ 75, 80, -10, -198, -7, -15, -411, 341, 386, -120, 287,
+ -33, -22, -753, 465, 271, 245, 517, -414, -526, 9, 146,
+ 535, -377, 319, 281, 13, 126, 258, 112, 18, -809, 545,
+ -217, 546, 500, -161, 139, -741, 112, -144, -565, -266, 246,
+ 1308, 300, -357, 1172, 311, -290, 136, 855, 414, 258, 165,
+ 138, -56, 8, -231, 790, -363, 338, 618, -366, -86, 158,
+ 632, 552, -568, -170, -284, -22, 342, -87, 669, -230, 387,
+ -1240, 222, 435, -247, -965, -172, -419, 428, -735, 465, 32,
+ -508, 16, -428, -161, 1091, 260, -416, -580, -283, -431, -183,
+ -952, -853, 166, 1148, 590, -362, -217, -476, -400, 341, -413,
+ -543, 46, 247, -184, -94, -475, -503, 174, -225, 85, -459,
+ -124, 269, 436, 445, -958, 546, 59, 167, 226, 13, -757,
+ -158, -289, 175, 109, 456, -34, -39, 45, 677, 87, 792,
+ -89, -401, 1132, -319, 45, 143, -344, -1164, 539, -72, -399,
+ 301, 255, -587, -112, -476, 350, 204, 650, 232, -240, 524,
+ 250, -242, -229, -449, -460, 895, -754, -167, 286, -904, 216,
+ 572, -180, 1082, -311, 202, 442, 674, 86, -841, 713, -923,
+ 1640, 633, -624, 195, 603, 33, -760, 66, 133, -967, -6,
+ -597, 317, -311, 577, -410, -657, 323, -373, 23, -711, 1230,
+ -740, 406, 594, -41, -493, -1113, -491, 488, 699, -419, -462,
+ -203, -133, -346, -407, 399, -127, 225, -29, 193, -351, 650,
+ -276, 352, -577, 1060, 90, -475, -469, 26, 761, 401, -447,
+ -154, 1043, 377, -21, 546, -199, 290, 388, -544, 1060, -12,
+ 555, -484, 12, -185, 225, -1053, 492, 672, 491, -650, -83,
+ -8, 310, -352, 477, 404, 406, -485, 75, 733, 268, 121,
+ 14, -331, 28, 234, -282, -150, -558, 709, 190, 414, 66,
+ -11, -49, -113, 130, -196, 551, 677, 532, -122, 57, -165,
+ 167, 74, -257, 812, -1073, -767, -616, 282, 219, 326, -351,
+ 147, 762, -301, 41, 1015, 29, -383, -21, -1384, 110, -785,
+ 103, -52, -151, -584, -850, -328, -571, 449, 230, -208, 253,
+ 409, 73, -262, -141, 179, -135, 1248, 143, -570, -169, 377,
+ -66, 119, 335, 530, 582, -493, 235, 391, -1227, -1067, 780,
+ -160, 409, 52, 270, 1210, 69, 58, 1390, 733, -240, 119,
+ -111, -265, 91, -256, -84, -563, 521, -6, 28, 239, -720,
+ -855, -407, -661, -682, -814, 442, -405, 319, 877, -140, -120,
+ -115, -302, -134, 222, -448, -405, -185, -20, 193, 97, -266,
+ 440, 850, -429, -645, -388, -462, -589, 659, -421, -96, 57,
+ 212, 407, -259, -451, 276, 387, 455, -42, -624, 333, -690,
+ -102, 818, 368, -194, -214, -35, 236, -589, 270, 21, -197,
+ 507, -706, 124, -32, -226, 8, 301, 79, 9, -177, 606,
+ 197, -269, -1075, -162, 1052, 499, -51, 1312, -1074, 52, -141,
+ 351, -261, -898, -513, -253, 45, 844, 534, 199, -176, -490,
+ -305, 894, -229, 41, 328, -73, -57, -4, 517, -522, 126,
+ -50, -63, 478, -787, -377, 192, 470, -449, 396, -129, 233,
+ 411, 118, 257, 174, 508, -542, 440, -446, 24, 407, 570,
+ -81, -41, 14, -696, 273, -584, -398, -186, -96, -860, -69,
+ 223, -905, -309, 70, 91, 433, -1017, -228, 60, -655, -388,
+ -357, -500, -1595, 486, 824, 967, -52, -46, 265, -540, -230,
+ -621, 334, -420, -756, 848, -191, -1000, -780, 146, -666, 270,
+ -549, 600, -202, 40, 834, -586, 94, 104, 367, 112, 591,
+ -204, 128, 259, 418, 456, -257, 726, 405, 827, -77, 89,
+ 833, -372, 421, 176, -138, 86, -414, 302, -457, -1222, 431,
+ -271, 117, 196, -487, -1082, -762, -823, -703, 288, -290, 321,
+ 34, -85, 563, -327, -7, -1304, 892, 505, 258, -154, -669,
+ 108, -543, 352, 273, 334, 404, -464, -529, 335, -271, -34,
+ -736, 165, -636, -642, -7, -521, -921, -1262, -324, 533, 233,
+ 603, 89, -352, 219, 379, -182, -180, -43, -60, 1208, 247,
+ -215, -484, 417, 384, -90, 757, -137, 224, 296, 475, -480,
+ 903, 667, -1184, -69, -299, 502, 990, -56, 243, 580, -16,
+ -248, 346, -148, 723, -423, 150, 198, -80, -136, 681, -157,
+ 199, 198, -37, -214, -356, 65, 657, 99, -1086, 329, 733,
+ 200, 1099, 1514, -315, 812, 284, -579, -465, 871, -296, 983,
+ 797, -36, 284, 157, 144, 940, 333, -298, 582, -484, -114,
+ -75, 263, -895, -38, -858, -196, -681, -383, -261, -8, -85,
+ 453, 558, -198, 454, 558, -131, -342, -769, 864, -382, 504,
+ 938, 603, -437, -123, -741, 141, 595, 113, -523, -2, 32,
+ -224, -481, -19, -20, -43, 244, 408, 946, -1092, 27, 664,
+ 34, 328, -768, 841, 308, -3, -747, 631, -431, -104, 551,
+ -219, 372, 618, 855, 104, -548, 54, -251, -529, 306, -444,
+ -927, -300, -25, 51, 256, 112, -248, -318, 68, 154, 179,
+ -6, -79, 417, -64, -233, -438, -22, 114, -243, -284, -748,
+ 5, 794, -345, -373, -317, -173, 73, -273, -396, -168, 632,
+ 30, 328, -753, 205, -69, 223, -546, -797, -334, -242, 362,
+ 376, -131, -15, -989, -77, -578, -443, -848, -244, 18, 141,
+ -378, 699, -907, -198, 103, 1029, 575, 25, -440, 662, -631,
+ 917, 164, 378, -21, -973, 68, 354, -788, 213, -592, 495,
+ 349, 608, 180, 312, 315, 848, -78, -24, -311, 95, -141,
+ -795, 561, -383, 1456, -136, -228, -578, 150, -840, -149, -286,
+ 25, 738, 608, 762, 221, -586, -44, 102, -47, 280, -152,
+ -164, -147, 360, 1001, 348, 391, 233, -605, 283, -52, -139,
+ -640, -594, 816, -401, 835, -81, -152, 223, -178, -46, -166,
+ -16, 266, 434, 127, 99, -468, 472, -6, 412, 9, 100,
+ 489, -852, -1052, -277, 1017, 353, -259, -537, 568, 45, -152,
+ -188, 713, 860, -60, -767, -41, -490, 689, -933, 689, -67,
+ -751, -276, -411, 842, -472, -556, 178, -517, 228, -474, 348,
+ 74, 982, 299, -590, 805, 518, 303, -548, -261, 743, 1179,
+ 480, 286, 280, 474, -53, 478, -161, 339, -44, 374, 17,
+ -800, -122, 287, -825, -272, 196, -19, -348, -49, -499, 273,
+ -224, -11, -846, 485, 1, 86, -1027, 203, -605, -1159, -42,
+ 171, 520, -75, 84, -759, -519, -473, -650, 348, -228, -68,
+ 592, 330, -168, -606, 318, 146, -255, -688, -500, -540, 823,
+ -250, -703, 632, 177, -315, -212, 97, -160, 107, -640, 449,
+ -72, -875, 511, 174, 207, 901, 678, -889, -124, -295, -1132,
+ -911, -251, -370, 537, 1298, -165, 326, 518, -157, 333, 354,
+ -523, -122, -318, 22, -200, -402, 135, 316, 11, 317, -570,
+ -288, 535, 575, -416, 189, -127, -218, 466, 414, 328, 257,
+ -665, -1015, -275, 1230, -493, -1791, -883, 362, 78, 300, -31,
+ -885, -149, 410, -727, -387, -890, -304, -44, -441, 699, -110,
+ 251, 960, 686, 367, 60, -508, 331, 382, 138, -151, -745,
+ 32, 618, -131, 462, -244, 383, 996, -493, -150, -594, 500,
+ -363, 102, 662, 137, -613, 512, 402, 967, 936, 371, 666,
+ 343, -171, 56, -224, -719, 731, -874, 47, -254, 639, 324,
+ -176, 191, -376, -295, 678, 703, 113, -386, -461, 285, -147,
+ -990, -701, 293, -675, -576, 298, -838, 713, -489, -386, 617,
+ 818, 548, -281, 59, 201, 253, 657, -537, -554, -224, -489,
+ -854, -56, -261, 660, 312, 282, -778, -73, 680, 13, -37,
+ -202, 999, -498, 215, -194, -334, -201, 626, -823, -339, 639,
+ -355, 655, -980, -614, 781, -319, -439, -25, -7, -383, 522,
+ 228, 153, 766, 913, -130, 717, 538, -489, 353, 411, 239,
+ 509, -420, -850, 883, 461, 928, 368, -702, -1114, -35, 112,
+ -153, 642, 55, -494, -621, -774, -148, 818, -107, -454, -207,
+ 88, -569, 385, 793, 215, 549, -320, 936, -312, -690, 973,
+ -562, -411, 675, 250, 153, 90, -372, 547, -1029, 503, -60,
+ 263, -322, 401, 621, 388, 511, 296, 173, -78, -416, 98,
+ -69, -368, 801, -160, 871, 248, 44, 617, 1098, 175, 312,
+ -750, -149, 866, -151, 406, -428, 221, -214, -287, -822, 262,
+ -783, 682, -179, 752, 445, -197, -181, -825, -1293, 213, -142,
+ 211, -340, -606, 892, -567, -235, 781, -703, -276, 739, 565,
+ -262, 433, -616, -530, 427, -532, 931, -49, -81, -1357, -402,
+ 530, 303, 59, 876, -377, -998, 339, -680, -49, -157, -213,
+ -850, 507, -290, 196, 622, -523, 268, 370, -132, -749, 91,
+ -558, 5, -240, -55, -8, -447, -291, 552, -238, 283, -93,
+ 224, 873, 263, -416, 229, -229, 202, -113, 839, 396, -143,
+ -184, 564, 186, 240, -96, -791, 225, -68, 532, 9, -441,
+ 670, 156, 703, -322, -1190, -362, -1010, -633, -265, -484, 20,
+ -369, -325, -379, -286, -67, -122, 449, -845, 81, 436, 116,
+ -521, -59, -1379, 400, 852, -471, -880, 638, -875, 155, -375,
+ -183, 1032, 63, -266, -272, 298, -186, 504, 273, 235, 116,
+ 364, 671, -619, -174, -740, -535, 394, 124, -877, -288, -146,
+ 37, 641, -183, 498, 142, -109, -92, 540, -587, -170, -469,
+ -1347, 852, -313, 176, 366, 220, -403, -384, 135, -321, 706,
+ 254, 575, 511, -6, -363, -497, -611, -457, 475, 245, 374,
+ 1811, 123
+};
+
+static const int gauss_bits = 11;
+
+static const int luma_subblock_size = 32;
+static const int chroma_subblock_size = 16;
+
+static const int min_luma_legal_range = 16;
+static const int max_luma_legal_range = 235;
+
+static const int min_chroma_legal_range = 16;
+static const int max_chroma_legal_range = 240;
+
+static int scaling_lut_y[256];
+static int scaling_lut_cb[256];
+static int scaling_lut_cr[256];
+
+static int grain_center;
+static int grain_min;
+static int grain_max;
+
+static uint16_t random_register = 0; // random number generator register
+
+static void init_arrays(aom_film_grain_t *params, int luma_stride,
+ int chroma_stride, int ***pred_pos_luma_p,
+ int ***pred_pos_chroma_p, int **luma_grain_block,
+ int **cb_grain_block, int **cr_grain_block,
+ int **y_line_buf, int **cb_line_buf, int **cr_line_buf,
+ int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
+ int luma_grain_samples, int chroma_grain_samples) {
+ memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
+ memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
+ memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
+
+ int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
+ int num_pos_chroma = num_pos_luma + 1;
+
+ int **pred_pos_luma;
+ int **pred_pos_chroma;
+
+ pred_pos_luma = (int **)aom_malloc(sizeof(*pred_pos_luma) * num_pos_luma);
+
+ for (int row = 0; row < num_pos_luma; row++) {
+ pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
+ }
+
+ pred_pos_chroma =
+ (int **)aom_malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
+
+ for (int row = 0; row < num_pos_chroma; row++) {
+ pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
+ }
+
+ int pos_ar_index = 0;
+
+ for (int row = -params->ar_coeff_lag; row < 0; row++) {
+ for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1;
+ col++) {
+ pred_pos_luma[pos_ar_index][0] = row;
+ pred_pos_luma[pos_ar_index][1] = col;
+ pred_pos_luma[pos_ar_index][2] = 0;
+
+ pred_pos_chroma[pos_ar_index][0] = row;
+ pred_pos_chroma[pos_ar_index][1] = col;
+ pred_pos_chroma[pos_ar_index][2] = 0;
+ ++pos_ar_index;
+ }
+ }
+
+ for (int col = -params->ar_coeff_lag; col < 0; col++) {
+ pred_pos_luma[pos_ar_index][0] = 0;
+ pred_pos_luma[pos_ar_index][1] = col;
+ pred_pos_luma[pos_ar_index][2] = 0;
+
+ pred_pos_chroma[pos_ar_index][0] = 0;
+ pred_pos_chroma[pos_ar_index][1] = col;
+ pred_pos_chroma[pos_ar_index][2] = 0;
+
+ ++pos_ar_index;
+ }
+
+ pred_pos_chroma[pos_ar_index][0] = 0;
+ pred_pos_chroma[pos_ar_index][1] = 0;
+ pred_pos_chroma[pos_ar_index][2] = 1;
+
+ *pred_pos_luma_p = pred_pos_luma;
+ *pred_pos_chroma_p = pred_pos_chroma;
+
+ *y_line_buf = (int *)aom_malloc(sizeof(**y_line_buf) * luma_stride * 2);
+ *cb_line_buf = (int *)aom_malloc(sizeof(**cb_line_buf) * chroma_stride);
+ *cr_line_buf = (int *)aom_malloc(sizeof(**cr_line_buf) * chroma_stride);
+
+ *y_col_buf =
+ (int *)aom_malloc(sizeof(**y_col_buf) * (luma_subblock_size + 2) * 2);
+ *cb_col_buf =
+ (int *)aom_malloc(sizeof(**cb_col_buf) * (chroma_subblock_size + 1));
+ *cr_col_buf =
+ (int *)aom_malloc(sizeof(**cr_col_buf) * (chroma_subblock_size + 1));
+
+ *luma_grain_block =
+ (int *)aom_malloc(sizeof(**luma_grain_block) * luma_grain_samples);
+ *cb_grain_block =
+ (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
+ *cr_grain_block =
+ (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
+}
+
+static void dealloc_arrays(aom_film_grain_t *params, int ***pred_pos_luma,
+ int ***pred_pos_chroma, int **luma_grain_block,
+ int **cb_grain_block, int **cr_grain_block,
+ int **y_line_buf, int **cb_line_buf,
+ int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
+ int **cr_col_buf) {
+ int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
+ int num_pos_chroma = num_pos_luma + 1;
+
+ for (int row = 0; row < num_pos_luma; row++) {
+ aom_free((*pred_pos_luma)[row]);
+ }
+ aom_free(*pred_pos_luma);
+
+ for (int row = 0; row < num_pos_chroma; row++) {
+ aom_free((*pred_pos_chroma)[row]);
+ }
+ aom_free((*pred_pos_chroma));
+
+ aom_free(*y_line_buf);
+
+ aom_free(*cb_line_buf);
+
+ aom_free(*cr_line_buf);
+
+ aom_free(*y_col_buf);
+
+ aom_free(*cb_col_buf);
+
+ aom_free(*cr_col_buf);
+
+ aom_free(*luma_grain_block);
+
+ aom_free(*cb_grain_block);
+
+ aom_free(*cr_grain_block);
+}
+
+// get a number between 0 and 2^bits - 1
+int get_random_number(int bits) {
+ uint16_t bit;
+ bit = ((random_register >> 0) ^ (random_register >> 1) ^
+ (random_register >> 3) ^ (random_register >> 12)) &
+ 1;
+ random_register = (random_register >> 1) | (bit << 15);
+ return (random_register >> (16 - bits)) & ((1 << bits) - 1);
+}
+
+static void init_random_generator(int luma_line, uint16_t seed) {
+ // same for the picture
+
+ uint16_t msb = (seed >> 8) & 255;
+ uint16_t lsb = seed & 255;
+
+ random_register = (msb << 8) + lsb;
+
+ // changes for each row
+ int luma_num = luma_line >> 5;
+
+ random_register ^= ((luma_num * 37 + 178) & 255) << 8;
+ random_register ^= ((luma_num * 173 + 105) & 255);
+}
+
+static void generate_luma_grain_block(
+ aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
+ int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
+ int left_pad, int top_pad, int right_pad, int bottom_pad) {
+ int bit_depth = params->bit_depth;
+
+ int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
+ int rounding_offset = (1 << (params->ar_coeff_shift - 1));
+
+ for (int i = 0; i < luma_block_size_y; i++)
+ for (int j = 0; j < luma_block_size_x; j++)
+ luma_grain_block[i * luma_grain_stride + j] =
+ (gaussian_sequence[get_random_number(gauss_bits)] +
+ ((1 << (12 - bit_depth)) >> 1)) >>
+ (12 - bit_depth);
+
+ for (int i = top_pad; i < luma_block_size_y - bottom_pad; i++)
+ for (int j = left_pad; j < luma_block_size_x - right_pad; j++) {
+ int wsum = 0;
+ for (int pos = 0; pos < num_pos_luma; pos++) {
+ wsum = wsum +
+ params->ar_coeffs_y[pos] *
+ luma_grain_block[(i + pred_pos_luma[pos][0]) *
+ luma_grain_stride +
+ j + pred_pos_luma[pos][1]];
+ }
+ luma_grain_block[i * luma_grain_stride + j] =
+ clamp(luma_grain_block[i * luma_grain_stride + j] +
+ ((wsum + rounding_offset) >> params->ar_coeff_shift),
+ grain_min, grain_max);
+ }
+}
+
+static void generate_chroma_grain_blocks(
+ aom_film_grain_t *params,
+ // int** pred_pos_luma,
+ int **pred_pos_chroma, int *luma_grain_block, int *cb_grain_block,
+ int *cr_grain_block, int luma_grain_stride, int chroma_block_size_y,
+ int chroma_block_size_x, int chroma_grain_stride, int left_pad, int top_pad,
+ int right_pad, int bottom_pad) {
+ int bit_depth = params->bit_depth;
+
+ int num_pos_chroma =
+ 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1) + 1;
+ int rounding_offset = (1 << (params->ar_coeff_shift - 1));
+
+ for (int i = 0; i < chroma_block_size_y; i++)
+ for (int j = 0; j < chroma_block_size_x; j++) {
+ cb_grain_block[i * chroma_grain_stride + j] =
+ (gaussian_sequence[get_random_number(gauss_bits)] +
+ ((1 << (12 - bit_depth)) >> 1)) >>
+ (12 - bit_depth);
+ cr_grain_block[i * chroma_grain_stride + j] =
+ (gaussian_sequence[get_random_number(gauss_bits)] +
+ ((1 << (12 - bit_depth)) >> 1)) >>
+ (12 - bit_depth);
+ }
+
+ for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
+ for (int j = left_pad; j < chroma_block_size_x - right_pad; j++) {
+ int wsum_cb = 0;
+ int wsum_cr = 0;
+ for (int pos = 0; pos < num_pos_chroma; pos++) {
+ if (pred_pos_chroma[pos][2] == 0) {
+ wsum_cb = wsum_cb +
+ params->ar_coeffs_cb[pos] *
+ cb_grain_block[(i + pred_pos_chroma[pos][0]) *
+ chroma_grain_stride +
+ j + pred_pos_chroma[pos][1]];
+ wsum_cr = wsum_cr +
+ params->ar_coeffs_cr[pos] *
+ cr_grain_block[(i + pred_pos_chroma[pos][0]) *
+ chroma_grain_stride +
+ j + pred_pos_chroma[pos][1]];
+ } else if (pred_pos_chroma[pos][2] == 1) {
+ int av_luma =
+ (luma_grain_block[(((i - top_pad) << 1) + top_pad) *
+ luma_grain_stride +
+ ((j - left_pad) << 1) + left_pad] +
+ luma_grain_block[(((i - top_pad) << 1) + top_pad + 1) *
+ luma_grain_stride +
+ ((j - left_pad) << 1) + left_pad] +
+ luma_grain_block[(((i - top_pad) << 1) + top_pad) *
+ luma_grain_stride +
+ ((j - left_pad) << 1) + left_pad + 1] +
+ luma_grain_block[(((i - top_pad) << 1) + top_pad + 1) *
+ luma_grain_stride +
+ ((j - left_pad) << 1) + left_pad + 1] +
+ 2) >>
+ 2;
+
+ wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
+ wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
+ } else {
+ printf(
+ "Grain synthesis: prediction between two chroma components is "
+ "not supported!");
+ exit(1);
+ }
+ }
+ cb_grain_block[i * chroma_grain_stride + j] =
+ clamp(cb_grain_block[i * chroma_grain_stride + j] +
+ ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
+ grain_min, grain_max);
+ cr_grain_block[i * chroma_grain_stride + j] =
+ clamp(cr_grain_block[i * chroma_grain_stride + j] +
+ ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
+ grain_min, grain_max);
+ }
+}
+
+static void init_scaling_function(int scaling_points[][2], int num_points,
+ int scaling_lut[]) {
+ for (int i = 0; i < scaling_points[0][0]; i++)
+ scaling_lut[i] = scaling_points[0][1];
+
+ for (int point = 0; point < num_points - 1; point++) {
+ int delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
+ int delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
+
+ int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
+
+ for (int x = 0; x < delta_x; x++) {
+ scaling_lut[scaling_points[point][0] + x] =
+ scaling_points[point][1] + (int)((x * delta + 32768) >> 16);
+ }
+ }
+
+ for (int i = scaling_points[num_points - 1][0]; i < 256; i++)
+ scaling_lut[i] = scaling_points[num_points - 1][1];
+}
+
+// function that extracts samples from a LUT (and interpolates intemediate
+// frames for 10 bit video)
+static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
+ int x = index >> (bit_depth - 8);
+
+ if (!(bit_depth - 8) || x >= 255)
+ return scaling_lut[x];
+ else
+ return scaling_lut[x] +
+ (((scaling_lut[x + 1] - scaling_lut[x]) * (index & 3) + 2) >> 2);
+}
+
+static void add_noise_to_block(aom_film_grain_t *params, uint8_t *luma,
+ uint8_t *cb, uint8_t *cr, int luma_stride,
+ int chroma_stride, int *luma_grain,
+ int *cb_grain, int *cr_grain,
+ int luma_grain_stride, int chroma_grain_stride,
+ int chroma_height, int chroma_width,
+ int bit_depth) {
+ int cb_mult = params->cb_mult - 128; // fixed scale
+ int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
+ int cb_offset = params->cb_offset - 256;
+
+ int cr_mult = params->cr_mult - 128; // fixed scale
+ int cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
+ int cr_offset = params->cr_offset - 256;
+
+ int rounding_offset = (1 << (params->scaling_shift - 1));
+
+ if (params->chroma_scaling_from_luma) {
+ cb_mult = 0; // fixed scale
+ cb_luma_mult = 64; // fixed scale
+ cb_offset = 0;
+
+ cr_mult = 0; // fixed scale
+ cr_luma_mult = 64; // fixed scale
+ cr_offset = 0;
+ }
+
+ int min_luma, max_luma, min_chroma, max_chroma;
+
+ if (params->clip_to_restricted_range) {
+ min_luma = min_luma_legal_range;
+ max_luma = max_luma_legal_range;
+
+ min_chroma = min_chroma_legal_range;
+ max_chroma = max_chroma_legal_range;
+ } else {
+ min_luma = min_chroma = 0;
+ max_luma = max_chroma = 255;
+ }
+
+ for (int i = 0; i < chroma_height; i++) {
+ for (int j = 0; j < chroma_width; j++) {
+ int average_luma = (luma[(i << 1) * luma_stride + (j << 1)] +
+ luma[((i << 1)) * luma_stride + (j << 1) + 1] + 1) >>
+ 1;
+
+ luma[((i) << 1) * luma_stride + ((j) << 1)] = clamp(
+ luma[((i) << 1) * luma_stride + ((j) << 1)] +
+ ((scale_LUT(scaling_lut_y,
+ luma[((i) << 1) * luma_stride + ((j) << 1)], 8) *
+ luma_grain[(i << 1) * luma_grain_stride + (j << 1)] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1)] = clamp(
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1)] +
+ ((scale_LUT(scaling_lut_y,
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1)],
+ 8) *
+ luma_grain[((i << 1) + 1) * luma_grain_stride + (j << 1)] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+ luma[(((i) << 1)) * luma_stride + ((j) << 1) + 1] = clamp(
+ luma[(((i) << 1)) * luma_stride + ((j) << 1) + 1] +
+ ((scale_LUT(scaling_lut_y,
+ luma[(((i) << 1)) * luma_stride + ((j) << 1) + 1],
+ 8) *
+ luma_grain[(i << 1) * luma_grain_stride + (j << 1) + 1] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1) + 1] = clamp(
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1) + 1] +
+ ((scale_LUT(scaling_lut_y,
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1) + 1],
+ 8) *
+ luma_grain[((i << 1) + 1) * luma_grain_stride + (j << 1) +
+ 1] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+
+ cb[i * chroma_stride + j] =
+ clamp(cb[i * chroma_stride + j] +
+ ((scale_LUT(scaling_lut_cb,
+ clamp(((average_luma * cb_luma_mult +
+ cb_mult * cb[i * chroma_stride + j]) >>
+ 6) +
+ cb_offset,
+ 0, (256 << (bit_depth - 8)) - 1),
+ 8) *
+ cb_grain[i * chroma_grain_stride + j] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_chroma, max_chroma);
+
+ cr[i * chroma_stride + j] =
+ clamp(cr[i * chroma_stride + j] +
+ ((scale_LUT(scaling_lut_cr,
+ clamp(((average_luma * cr_luma_mult +
+ cr_mult * cr[i * chroma_stride + j]) >>
+ 6) +
+ cr_offset,
+ 0, (256 << (bit_depth - 8)) - 1),
+ 8) *
+ cr_grain[i * chroma_grain_stride + j] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_chroma, max_chroma);
+ }
+ }
+}
+
+static void add_noise_to_block_hbd(aom_film_grain_t *params, uint16_t *luma,
+ uint16_t *cb, uint16_t *cr, int luma_stride,
+ int chroma_stride, int *luma_grain,
+ int *cb_grain, int *cr_grain,
+ int luma_grain_stride,
+ int chroma_grain_stride, int chroma_height,
+ int chroma_width, int bit_depth) {
+ int cb_mult = params->cb_mult - 128; // fixed scale
+ int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
+ // offset value depends on the bit depth
+ int cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
+
+ int cr_mult = params->cr_mult - 128; // fixed scale
+ int cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
+ // offset value depends on the bit depth
+ int cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
+
+ int rounding_offset = (1 << (params->scaling_shift - 1));
+
+ if (params->chroma_scaling_from_luma) {
+ cb_mult = 0; // fixed scale
+ cb_luma_mult = 64; // fixed scale
+ cb_offset = 0;
+
+ cr_mult = 0; // fixed scale
+ cr_luma_mult = 64; // fixed scale
+ cr_offset = 0;
+ }
+
+ int min_luma, max_luma, min_chroma, max_chroma;
+
+ if (params->clip_to_restricted_range) {
+ min_luma = min_luma_legal_range << (bit_depth - 8);
+ max_luma = max_luma_legal_range << (bit_depth - 8);
+
+ min_chroma = min_chroma_legal_range << (bit_depth - 8);
+ max_chroma = max_chroma_legal_range << (bit_depth - 8);
+ } else {
+ min_luma = min_chroma = 0;
+ max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
+ }
+
+ for (int i = 0; i < chroma_height; i++) {
+ for (int j = 0; j < chroma_width; j++) {
+ int average_luma = (luma[(i << 1) * luma_stride + (j << 1)] +
+ luma[((i << 1)) * luma_stride + (j << 1) + 1] + 1) >>
+ 1;
+
+ luma[((i) << 1) * luma_stride + ((j) << 1)] =
+ clamp(luma[((i) << 1) * luma_stride + ((j) << 1)] +
+ ((scale_LUT(scaling_lut_y,
+ luma[((i) << 1) * luma_stride + ((j) << 1)],
+ bit_depth) *
+ luma_grain[(i << 1) * luma_grain_stride + (j << 1)] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1)] = clamp(
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1)] +
+ ((scale_LUT(scaling_lut_y,
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1)],
+ bit_depth) *
+ luma_grain[((i << 1) + 1) * luma_grain_stride + (j << 1)] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+ luma[(((i) << 1)) * luma_stride + ((j) << 1) + 1] = clamp(
+ luma[(((i) << 1)) * luma_stride + ((j) << 1) + 1] +
+ ((scale_LUT(scaling_lut_y,
+ luma[(((i) << 1)) * luma_stride + ((j) << 1) + 1],
+ bit_depth) *
+ luma_grain[(i << 1) * luma_grain_stride + (j << 1) + 1] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1) + 1] = clamp(
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1) + 1] +
+ ((scale_LUT(scaling_lut_y,
+ luma[(((i) << 1) + 1) * luma_stride + ((j) << 1) + 1],
+ bit_depth) *
+ luma_grain[((i << 1) + 1) * luma_grain_stride + (j << 1) +
+ 1] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_luma, max_luma);
+
+ cb[i * chroma_stride + j] =
+ clamp(cb[i * chroma_stride + j] +
+ ((scale_LUT(scaling_lut_cb,
+ clamp(((average_luma * cb_luma_mult +
+ cb_mult * cb[i * chroma_stride + j]) >>
+ 6) +
+ cb_offset,
+ 0, (256 << (bit_depth - 8)) - 1),
+ bit_depth) *
+ cb_grain[i * chroma_grain_stride + j] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_chroma, max_chroma);
+
+ cr[i * chroma_stride + j] =
+ clamp(cr[i * chroma_stride + j] +
+ ((scale_LUT(scaling_lut_cr,
+ clamp(((average_luma * cr_luma_mult +
+ cr_mult * cr[i * chroma_stride + j]) >>
+ 6) +
+ cr_offset,
+ 0, (256 << (bit_depth - 8)) - 1),
+ bit_depth) *
+ cr_grain[i * chroma_grain_stride + j] +
+ rounding_offset) >>
+ params->scaling_shift),
+ min_chroma, max_chroma);
+ }
+ }
+}
+
+static void copy_rect(uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int width, int height,
+ int high_bit_depth) {
+ int hbd_coeff = high_bit_depth ? 2 : 0;
+ while (height) {
+ memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
+ src += src_stride;
+ dst += dst_stride;
+ --height;
+ }
+ return;
+}
+
+void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
+ aom_image_t *dst) {
+ uint8_t *luma, *cb, *cr;
+ int height, width, luma_stride, chroma_stride;
+
+ if (!(src->fmt == AOM_IMG_FMT_I42016) && !(src->fmt == AOM_IMG_FMT_I420)) {
+ printf("Film grain error: only 4:2:0 is currently supported!");
+ exit(1);
+ }
+
+ dst->r_w = src->r_w;
+ dst->r_h = src->r_h;
+
+ copy_rect(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
+ dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], dst->d_w,
+ dst->d_h, src->bit_depth);
+
+ copy_rect(src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
+ dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U], dst->d_w / 2,
+ dst->d_h / 2, src->bit_depth);
+
+ copy_rect(src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V],
+ dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V], dst->d_w / 2,
+ dst->d_h / 2, src->bit_depth);
+
+ luma = dst->planes[AOM_PLANE_Y];
+ cb = dst->planes[AOM_PLANE_U];
+ cr = dst->planes[AOM_PLANE_V];
+ luma_stride = (dst->fmt == AOM_IMG_FMT_I42016) ? dst->stride[AOM_PLANE_Y] / 2
+ : dst->stride[AOM_PLANE_Y];
+ chroma_stride = (dst->fmt == AOM_IMG_FMT_I42016)
+ ? dst->stride[AOM_PLANE_U] / 2
+ : dst->stride[AOM_PLANE_U];
+ width = dst->d_w;
+ height = dst->d_h;
+ params->bit_depth = dst->bit_depth;
+
+ int use_high_bit_depth = 0;
+
+ if (dst->fmt == AOM_IMG_FMT_I42016) {
+ use_high_bit_depth = 1;
+ }
+
+ av1_add_film_grain_run(params, luma, cb, cr, height, width, luma_stride,
+ chroma_stride, use_high_bit_depth);
+ return;
+}
+
+void av1_add_film_grain_run(aom_film_grain_t *params, uint8_t *luma,
+ uint8_t *cb, uint8_t *cr, int height, int width,
+ int luma_stride, int chroma_stride,
+ int use_high_bit_depth) {
+ int **pred_pos_luma;
+ int **pred_pos_chroma;
+ int *luma_grain_block;
+ int *cb_grain_block;
+ int *cr_grain_block;
+
+ int *y_line_buf;
+ int *cb_line_buf;
+ int *cr_line_buf;
+
+ int *y_col_buf;
+ int *cb_col_buf;
+ int *cr_col_buf;
+
+ random_register = params->random_seed;
+
+ int left_pad = 3;
+ int right_pad = 3; // padding to offset for AR coefficients
+ int top_pad = 3;
+ int bottom_pad = 0;
+
+ int ar_padding = 3; // maximum lag used for stabilization of AR coefficients
+
+ // Initial padding is only needed for generation of
+ // film grain templates (to stabilize the AR process)
+ // Only a 64x64 luma and 32x32 chroma part of a template
+ // is used later for adding grain, padding can be discarded
+
+ int luma_block_size_y =
+ top_pad + 2 * ar_padding + luma_subblock_size * 2 + bottom_pad;
+ int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size * 2 +
+ 2 * ar_padding + right_pad;
+
+ int chroma_block_size_y =
+ top_pad + ar_padding + chroma_subblock_size * 2 + bottom_pad;
+ int chroma_block_size_x =
+ left_pad + ar_padding + chroma_subblock_size * 2 + ar_padding + right_pad;
+
+ int luma_grain_stride = luma_block_size_x;
+ int chroma_grain_stride = chroma_block_size_x;
+
+ int overlap = params->overlap_flag;
+ int bit_depth = params->bit_depth;
+
+ grain_center = 128 << (bit_depth - 8);
+ grain_min = 0 - grain_center;
+ grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
+
+ init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
+ &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
+ &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
+ &y_col_buf, &cb_col_buf, &cr_col_buf,
+ luma_block_size_y * luma_block_size_x,
+ chroma_block_size_y * chroma_block_size_x);
+
+ generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
+ luma_block_size_y, luma_block_size_x,
+ luma_grain_stride, left_pad, top_pad, right_pad,
+ bottom_pad);
+
+ generate_chroma_grain_blocks(
+ params,
+ // pred_pos_luma,
+ pred_pos_chroma, luma_grain_block, cb_grain_block, cr_grain_block,
+ luma_grain_stride, chroma_block_size_y, chroma_block_size_x,
+ chroma_grain_stride, left_pad, top_pad, right_pad, bottom_pad);
+
+ init_scaling_function(params->scaling_points_y, params->num_y_points,
+ scaling_lut_y);
+
+ if (params->chroma_scaling_from_luma) {
+ memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
+ memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
+ } else {
+ init_scaling_function(params->scaling_points_cb, params->num_cb_points,
+ scaling_lut_cb);
+ init_scaling_function(params->scaling_points_cr, params->num_cr_points,
+ scaling_lut_cr);
+ }
+ for (int y = 0; y < height / 2; y += chroma_subblock_size) {
+ init_random_generator(y * 2, params->random_seed);
+
+ for (int x = 0; x < width / 2; x += chroma_subblock_size) {
+ int offset_y = get_random_number(8);
+ int offset_x = (offset_y >> 4) & 15;
+ offset_y &= 15;
+
+ int luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
+ int luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
+
+ int chroma_offset_y = top_pad + ar_padding + offset_y;
+ int chroma_offset_x = left_pad + ar_padding + offset_x;
+
+ if (overlap && x) {
+ for (int i = 0; i < AOMMIN(chroma_subblock_size, height / 2 - y); i++) {
+ y_col_buf[(i << 1) * 2] =
+ clamp((27 * y_col_buf[(i << 1) * 2] +
+ 17 * luma_grain_block[(luma_offset_y + (i << 1)) *
+ luma_grain_stride +
+ luma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+
+ y_col_buf[(i << 1) * 2 + 1] =
+ clamp((17 * y_col_buf[(i << 1) * 2 + 1] +
+ 27 * luma_grain_block[(luma_offset_y + (i << 1)) *
+ luma_grain_stride +
+ luma_offset_x + 1] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+
+ y_col_buf[(i << 1) * 2 + 2] =
+ clamp((27 * y_col_buf[(i << 1) * 2 + 2] +
+ 17 * luma_grain_block[(luma_offset_y + (i << 1) + 1) *
+ luma_grain_stride +
+ luma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ y_col_buf[(i << 1) * 2 + 3] =
+ clamp((17 * y_col_buf[(i << 1) * 2 + 3] +
+ 27 * luma_grain_block[(luma_offset_y + (i << 1) + 1) *
+ luma_grain_stride +
+ luma_offset_x + 1] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+
+ cb_col_buf[i] = clamp(
+ (23 * cb_col_buf[i] +
+ 22 * cb_grain_block[(chroma_offset_y + i) * chroma_grain_stride +
+ chroma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ cr_col_buf[i] = clamp(
+ (23 * cr_col_buf[i] +
+ 22 * cr_grain_block[(chroma_offset_y + i) * chroma_grain_stride +
+ chroma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ }
+
+ int i = y ? 1 : 0;
+
+ if (use_high_bit_depth) {
+ add_noise_to_block_hbd(
+ params,
+ (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
+ (uint16_t *)cb + (y + i) * chroma_stride + x,
+ (uint16_t *)cr + (y + i) * chroma_stride + x, luma_stride,
+ chroma_stride, y_col_buf + i * 4, cb_col_buf + i, cr_col_buf + i,
+ 2, 1, AOMMIN(chroma_subblock_size, height / 2 - y) - i, 1,
+ bit_depth);
+ } else {
+ add_noise_to_block(
+ params, luma + ((y + i) << 1) * luma_stride + (x << 1),
+ cb + (y + i) * chroma_stride + x,
+ cr + (y + i) * chroma_stride + x, luma_stride, chroma_stride,
+ y_col_buf + i * 4, cb_col_buf + i, cr_col_buf + i, 2, 1,
+ AOMMIN(chroma_subblock_size, height / 2 - y) - i, 1, bit_depth);
+ }
+ }
+
+ if (overlap && y) {
+ if (x) {
+ y_line_buf[x << 1] =
+ clamp((17 * y_col_buf[0] + 27 * y_line_buf[x << 1] + 16) >> 5,
+ grain_min, grain_max);
+ y_line_buf[(x << 1) + 1] = clamp(
+ (17 * y_col_buf[1] + 27 * y_line_buf[(x << 1) + 1] + 16) >> 5,
+ grain_min, grain_max);
+ y_line_buf[luma_stride + (x << 1)] =
+ clamp((27 * y_col_buf[2] +
+ 17 * y_line_buf[luma_stride + (x << 1)] + 16) >>
+ 5,
+ grain_min, grain_max);
+ y_line_buf[luma_stride + (x << 1) + 1] =
+ clamp((27 * y_col_buf[3] +
+ 17 * y_line_buf[luma_stride + (x << 1) + 1] + 16) >>
+ 5,
+ grain_min, grain_max);
+
+ cb_line_buf[x] =
+ clamp((22 * cb_col_buf[0] + 23 * cb_line_buf[x] + 16) >> 5,
+ grain_min, grain_max);
+ cr_line_buf[x] =
+ clamp((22 * cr_col_buf[0] + 23 * cr_line_buf[x] + 16) >> 5,
+ grain_min, grain_max);
+ }
+
+ for (int j = x ? 1 : 0; j < AOMMIN(chroma_subblock_size, width / 2 - x);
+ j++) {
+ y_line_buf[(x + j) << 1] =
+ clamp((27 * y_line_buf[(x + j) << 1] +
+ 17 * luma_grain_block[luma_offset_y * luma_grain_stride +
+ luma_offset_x + (j << 1)] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ y_line_buf[((x + j) << 1) + 1] =
+ clamp((27 * y_line_buf[((x + j) << 1) + 1] +
+ 17 * luma_grain_block[luma_offset_y * luma_grain_stride +
+ luma_offset_x + (j << 1) + 1] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+
+ y_line_buf[luma_stride + ((x + j) << 1)] = clamp(
+ (17 * y_line_buf[luma_stride + ((x + j) << 1)] +
+ 27 * luma_grain_block[(luma_offset_y + 1) * luma_grain_stride +
+ luma_offset_x + (j << 1)] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ y_line_buf[luma_stride + ((x + j) << 1) + 1] = clamp(
+ (17 * y_line_buf[luma_stride + ((x + j) << 1) + 1] +
+ 27 * luma_grain_block[(luma_offset_y + 1) * luma_grain_stride +
+ luma_offset_x + (j << 1) + 1] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+
+ cb_line_buf[x + j] =
+ clamp((23 * cb_line_buf[x + j] +
+ 22 * cb_grain_block[chroma_offset_y * chroma_grain_stride +
+ chroma_offset_x + j] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ cr_line_buf[x + j] =
+ clamp((23 * cr_line_buf[x + j] +
+ 22 * cr_grain_block[chroma_offset_y * chroma_grain_stride +
+ chroma_offset_x + j] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ }
+
+ if (use_high_bit_depth) {
+ add_noise_to_block_hbd(
+ params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
+ (uint16_t *)cb + y * chroma_stride + x,
+ (uint16_t *)cr + y * chroma_stride + x, luma_stride,
+ chroma_stride, y_line_buf + (x << 1), cb_line_buf + x,
+ cr_line_buf + x, luma_stride, chroma_stride, 1,
+ AOMMIN(chroma_subblock_size, width / 2 - x), bit_depth);
+ } else {
+ add_noise_to_block(
+ params, luma + (y << 1) * luma_stride + (x << 1),
+ cb + y * chroma_stride + x, cr + y * chroma_stride + x,
+ luma_stride, chroma_stride, y_line_buf + (x << 1),
+ cb_line_buf + x, cr_line_buf + x, luma_stride, chroma_stride, 1,
+ AOMMIN(chroma_subblock_size, width / 2 - x), bit_depth);
+ }
+ }
+
+ int i = overlap && y ? 1 : 0;
+ int j = overlap && x ? 1 : 0;
+
+ if (use_high_bit_depth) {
+ add_noise_to_block_hbd(
+ params,
+ (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
+ (uint16_t *)cb + (y + i) * chroma_stride + x + j,
+ (uint16_t *)cr + (y + i) * chroma_stride + x + j, luma_stride,
+ chroma_stride,
+ luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
+ luma_offset_x + (j << 1),
+ cb_grain_block + (chroma_offset_y + i) * chroma_grain_stride +
+ chroma_offset_x + j,
+ cr_grain_block + (chroma_offset_y + i) * chroma_grain_stride +
+ chroma_offset_x + j,
+ luma_grain_stride, chroma_grain_stride,
+ AOMMIN(chroma_subblock_size, height / 2 - y) - i,
+ AOMMIN(chroma_subblock_size, width / 2 - x) - j, bit_depth);
+ } else {
+ add_noise_to_block(
+ params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
+ cb + (y + i) * chroma_stride + x + j,
+ cr + (y + i) * chroma_stride + x + j, luma_stride, chroma_stride,
+ luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
+ luma_offset_x + (j << 1),
+ cb_grain_block + (chroma_offset_y + i) * chroma_grain_stride +
+ chroma_offset_x + j,
+ cr_grain_block + (chroma_offset_y + i) * chroma_grain_stride +
+ chroma_offset_x + j,
+ luma_grain_stride, chroma_grain_stride,
+ AOMMIN(chroma_subblock_size, height / 2 - y) - i,
+ AOMMIN(chroma_subblock_size, width / 2 - x) - j, bit_depth);
+ }
+
+ if (overlap) {
+ if (x) {
+ y_line_buf[x << 1] = clamp(
+ (27 * y_line_buf[x << 1] +
+ 17 * luma_grain_block[(luma_offset_y + luma_subblock_size) *
+ luma_grain_stride +
+ luma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ y_line_buf[(x << 1) + 1] = clamp(
+ (17 * y_line_buf[(x << 1) + 1] +
+ 27 * luma_grain_block[(luma_offset_y + luma_subblock_size) *
+ luma_grain_stride +
+ luma_offset_x + 1] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ y_line_buf[luma_stride + (x << 1)] = clamp(
+ (27 * y_line_buf[luma_stride + (x << 1)] +
+ 17 * luma_grain_block[(luma_offset_y + luma_subblock_size + 1) *
+ luma_grain_stride +
+ luma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ y_line_buf[luma_stride + (x << 1) + 1] = clamp(
+ (17 * y_line_buf[luma_stride + (x << 1) + 1] +
+ 27 * luma_grain_block[(luma_offset_y + luma_subblock_size + 1) *
+ luma_grain_stride +
+ luma_offset_x + 1] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+
+ cb_line_buf[x] = clamp(
+ (23 * cb_col_buf[x] +
+ 22 * cb_grain_block[(chroma_offset_y + chroma_subblock_size) *
+ chroma_grain_stride +
+ chroma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ cr_line_buf[x] = clamp(
+ (23 * cr_col_buf[x] +
+ 22 * cr_grain_block[(chroma_offset_y + chroma_subblock_size) *
+ chroma_grain_stride +
+ chroma_offset_x] +
+ 16) >>
+ 5,
+ grain_min, grain_max);
+ }
+
+ for (int m = overlap && x ? 1 : 0;
+ m < AOMMIN(chroma_subblock_size + 1, width / 2 - x); m++) {
+ y_line_buf[(x + m) << 1] =
+ luma_grain_block[(luma_offset_y + luma_subblock_size) *
+ luma_grain_stride +
+ luma_offset_x + (m << 1)];
+ y_line_buf[((x + m) << 1) + 1] =
+ luma_grain_block[(luma_offset_y + luma_subblock_size) *
+ luma_grain_stride +
+ luma_offset_x + (m << 1) + 1];
+ y_line_buf[luma_stride + ((x + m) << 1)] =
+ luma_grain_block[(luma_offset_y + luma_subblock_size + 1) *
+ luma_grain_stride +
+ luma_offset_x + (m << 1)];
+ y_line_buf[luma_stride + ((x + m) << 1) + 1] =
+ luma_grain_block[(luma_offset_y + luma_subblock_size + 1) *
+ luma_grain_stride +
+ luma_offset_x + (m << 1) + 1];
+
+ cb_line_buf[x + m] =
+ cb_grain_block[(chroma_offset_y + chroma_subblock_size) *
+ chroma_grain_stride +
+ chroma_offset_x + m];
+ cr_line_buf[x + m] =
+ cr_grain_block[(chroma_offset_y + chroma_subblock_size) *
+ chroma_grain_stride +
+ chroma_offset_x + m];
+ }
+
+ for (int n = 0; n < AOMMIN(chroma_subblock_size + 1, height / 2 - y);
+ n++) {
+ y_col_buf[(n << 1) * 2] =
+ luma_grain_block[(luma_offset_y + (n << 1)) * luma_grain_stride +
+ luma_offset_x + luma_subblock_size];
+ y_col_buf[(n << 1) * 2 + 1] =
+ luma_grain_block[(luma_offset_y + (n << 1)) * luma_grain_stride +
+ luma_offset_x + luma_subblock_size + 1];
+
+ y_col_buf[((n << 1) + 1) * 2] =
+ luma_grain_block[(luma_offset_y + (n << 1) + 1) *
+ luma_grain_stride +
+ luma_offset_x + luma_subblock_size];
+ y_col_buf[((n << 1) + 1) * 2 + 1] =
+ luma_grain_block[(luma_offset_y + (n << 1) + 1) *
+ luma_grain_stride +
+ luma_offset_x + luma_subblock_size + 1];
+
+ cb_col_buf[n] =
+ cb_grain_block[(chroma_offset_y + n) * chroma_grain_stride +
+ chroma_offset_x + chroma_subblock_size];
+ cr_col_buf[n] =
+ cr_grain_block[(chroma_offset_y + n) * chroma_grain_stride +
+ chroma_offset_x + chroma_subblock_size];
+ }
+ }
+ }
+ }
+
+ dealloc_arrays(params, &pred_pos_luma, &pred_pos_chroma, &luma_grain_block,
+ &cb_grain_block, &cr_grain_block, &y_line_buf, &cb_line_buf,
+ &cr_line_buf, &y_col_buf, &cb_col_buf, &cr_col_buf);
+}
diff --git a/aom_dsp/grain_synthesis.h b/aom_dsp/grain_synthesis.h
new file mode 100644
index 0000000..6bad640
--- /dev/null
+++ b/aom_dsp/grain_synthesis.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/*!\file
+ * \brief Describes film grain parameters and film grain synthesis
+ *
+ */
+#ifndef AOM_AOM_GRAIN_SYNTHESIS_H_
+#define AOM_AOM_GRAIN_SYNTHESIS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom/aom_image.h"
+
+/*!\brief Structure containing film grain synthesis parameters for a frame
+ *
+ * This structure contains input parameters for film grain synthesis
+ */
+typedef struct {
+ int apply_grain;
+
+ int update_parameters;
+
+ // 8 bit values
+ int scaling_points_y[14][2];
+ int num_y_points; // value: 0..14
+
+ // 8 bit values
+ int scaling_points_cb[10][2];
+ int num_cb_points; // value: 0..10
+
+ // 8 bit values
+ int scaling_points_cr[10][2];
+ int num_cr_points; // value: 0..10
+
+ int scaling_shift; // values : 8..11
+
+ int ar_coeff_lag; // values: 0..3
+
+ // 8 bit values
+ int ar_coeffs_y[24];
+ int ar_coeffs_cb[25];
+ int ar_coeffs_cr[25];
+
+ // Shift value: AR coeffs range
+ // 6: [-2, 2)
+ // 7: [-1, 1)
+ // 8: [-0.5, 0.5)
+ // 9: [-0.25, 0.25)
+ int ar_coeff_shift; // values : 6..9
+
+ int cb_mult; // 8 bits
+ int cb_luma_mult; // 8 bits
+ int cb_offset; // 9 bits
+
+ int cr_mult; // 8 bits
+ int cr_luma_mult; // 8 bits
+ int cr_offset; // 9 bits
+
+ int overlap_flag;
+
+ int clip_to_restricted_range;
+
+ int bit_depth; // video bit depth
+
+ int chroma_scaling_from_luma;
+
+ uint16_t random_seed;
+} aom_film_grain_t;
+
+/*!\brief Add film grain
+ *
+ * Add film grain to an image
+ *
+ * \param[in] grain_params Grain parameters
+ * \param[in] luma luma plane
+ * \param[in] cb cb plane
+ * \param[in] cr cr plane
+ * \param[in] height luma plane height
+ * \param[in] width luma plane width
+ * \param[in] luma_stride luma plane stride
+ * \param[in] chroma_stride chroma plane stride
+ */
+void av1_add_film_grain_run(aom_film_grain_t *grain_params, uint8_t *luma,
+ uint8_t *cb, uint8_t *cr, int height, int width,
+ int luma_stride, int chroma_stride,
+ int use_high_bit_depth);
+
+/*!\brief Add film grain
+ *
+ * Add film grain to an image
+ *
+ * \param[in] grain_params Grain parameters
+ * \param[in] src Source image
+ * \param[in] dst Resulting image with grain
+ */
+void av1_add_film_grain(aom_film_grain_t *grain_params, aom_image_t *src,
+ aom_image_t *dst);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AOM_AOM_GRAIN_SYNTHESIS_H_