Perform first pass analysis in av1_init_second_pass.
Now estimated noise variances and correlation coefficients of the frames are
available for all twopass stats, right after second pass is initialized.
Change-Id: I9e34784e5e8ce4373ef6834c497a3a99bfb4e968
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index ea1b89d..f1b859d 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -108,6 +108,9 @@
section->new_mv_count = 0.0;
section->count = 0.0;
section->duration = 1.0;
+ section->is_flash = 0;
+ section->noise_var = 0;
+ section->cor_coeff = 1.0;
}
void av1_accumulate_stats(FIRSTPASS_STATS *section,
@@ -814,6 +817,9 @@
fps.inactive_zone_rows = (double)stats->image_data_start_row;
fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
fps.raw_error_stdev = raw_err_stdev;
+ fps.is_flash = 0;
+ fps.noise_var = (double)0;
+ fps.cor_coeff = (double)1.0;
if (stats->mv_count > 0) {
fps.MVr = (double)stats->sum_mvr / stats->mv_count;
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
index e3706d9..46c13b3 100644
--- a/av1/encoder/firstpass.h
+++ b/av1/encoder/firstpass.h
@@ -152,6 +152,18 @@
* standard deviation for (0, 0) motion prediction error
*/
double raw_error_stdev;
+ /*!
+ * Whether the frame contains a flash
+ */
+ int64_t is_flash;
+ /*!
+ * Estimated noise variance
+ */
+ double noise_var;
+ /*!
+ * Correlation coefficient with the previous frame
+ */
+ double cor_coeff;
} FIRSTPASS_STATS;
/*!\cond */
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 209c744..2fe41df 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -1016,17 +1016,16 @@
0.242, 0.061, 0.006 };
// Smooth filter intra_error and coded_error in firstpass stats.
-// If ignore[i]==1, the ith element should not be used in the filtering.
-static void smooth_filter_stats(const FIRSTPASS_STATS *stats, const int *ignore,
- int start_idx, int last_idx,
- double *filt_intra_err,
+// If stats[i].is_flash==1, the ith element should not be used in the filtering.
+static void smooth_filter_stats(const FIRSTPASS_STATS *stats, int start_idx,
+ int last_idx, double *filt_intra_err,
double *filt_coded_err) {
int i, j;
for (i = start_idx; i <= last_idx; i++) {
double total_wt = 0;
for (j = -HALF_FILT_LEN; j <= HALF_FILT_LEN; j++) {
int idx = AOMMIN(AOMMAX(i + j, start_idx), last_idx);
- if (ignore[idx]) continue;
+ if (stats[idx].is_flash) continue;
filt_intra_err[i] +=
smooth_filt[j + HALF_FILT_LEN] * stats[idx].intra_error;
@@ -1043,7 +1042,7 @@
for (j = -HALF_FILT_LEN; j <= HALF_FILT_LEN; j++) {
int idx = AOMMIN(AOMMAX(i + j, start_idx), last_idx);
// Coded error involves idx and idx - 1.
- if (ignore[idx] || (idx > 0 && ignore[idx - 1])) continue;
+ if (stats[idx].is_flash || (idx > 0 && stats[idx - 1].is_flash)) continue;
filt_coded_err[i] +=
smooth_filt[j + HALF_FILT_LEN] * stats[idx].coded_error;
@@ -1072,7 +1071,7 @@
}
static int find_next_scenecut(const FIRSTPASS_STATS *const stats_start,
- int first, int last, int *ignore) {
+ int first, int last) {
// Identify unstable areas caused by scenecuts.
// Find the max and 2nd max coded error, and the average of the rest frames.
// If there is only one frame that yields a huge coded error, it is likely a
@@ -1083,14 +1082,16 @@
if (last - first == 0) return -1;
for (int i = first; i <= last; i++) {
- if (ignore[i] || (i > 0 && ignore[i - 1])) continue;
+ if (stats_start[i].is_flash || (i > 0 && stats_start[i - 1].is_flash))
+ continue;
double temp_intra = AOMMAX(stats_start[i].intra_error, 0.01);
this_ratio = stats_start[i].coded_error / temp_intra;
// find the avg ratio in the preceding neighborhood
max_prev_ratio = 0;
max_prev_coded = 0;
for (int j = AOMMAX(first, i - HALF_WIN); j < i; j++) {
- if (ignore[j] || (j > 0 && ignore[j - 1])) continue;
+ if (stats_start[j].is_flash || (j > 0 && stats_start[j - 1].is_flash))
+ continue;
temp_intra = AOMMAX(stats_start[j].intra_error, 0.01);
double temp_ratio = stats_start[j].coded_error / temp_intra;
if (temp_ratio > max_prev_ratio) {
@@ -1104,7 +1105,8 @@
max_next_ratio = 0;
max_next_coded = 0;
for (int j = i + 1; j <= AOMMIN(i + HALF_WIN, last); j++) {
- if (ignore[j] || (j > 0 && ignore[j - 1])) continue;
+ if (stats_start[i].is_flash || (i > 0 && stats_start[i - 1].is_flash))
+ continue;
temp_intra = AOMMAX(stats_start[j].intra_error, 0.01);
double temp_ratio = stats_start[j].coded_error / temp_intra;
if (temp_ratio > max_next_ratio) {
@@ -1137,19 +1139,6 @@
return -1;
}
-static void mark_flashes(const FIRSTPASS_STATS *stats, int start_idx,
- int last_idx, int *is_flash) {
- int i;
- for (i = start_idx; i < last_idx; i++) {
- if (stats[i + 1].pcnt_second_ref > stats[i + 1].pcnt_inter &&
- stats[i + 1].pcnt_second_ref >= 0.5) {
- // this is a new flash frame
- is_flash[i] = 1;
- continue;
- }
- }
-}
-
// Remove the region with index next_region.
// parameter merge: 0: merge with previous; 1: merge with next; 2:
// merge with both, take type from previous if possible
@@ -1222,113 +1211,9 @@
*cur_region_idx = k;
}
-// Estimate the noise variance of each frame from the first pass stats
-// TODO(bohanli): maybe handling of flashes should be done when using the stats,
-// instead of generating them.
-static void estimate_noise(const FIRSTPASS_STATS *stats, const int *is_flash,
- int start, int last, double *noise_arr) {
- double C1, C2, C3, noise;
- int count = 0;
- for (int i = AOMMAX(start, 2); i <= last; i++) {
- noise_arr[i] = 0.0;
- if (is_flash[i] || is_flash[i - 1] || is_flash[i - 2]) continue;
-
- C1 = stats[i - 1].intra_error *
- (stats[i].intra_error - stats[i].coded_error);
- C2 = stats[i - 2].intra_error *
- (stats[i - 1].intra_error - stats[i - 1].coded_error);
- C3 = stats[i - 2].intra_error *
- (stats[i].intra_error - stats[i].sr_coded_error);
- if (C1 <= 0 || C2 <= 0 || C3 <= 0) continue;
- C1 = sqrt(C1);
- C2 = sqrt(C2);
- C3 = sqrt(C3);
-
- noise = stats[i - 1].intra_error - C1 * C2 / C3;
- noise = AOMMAX(noise, 0.01);
- noise_arr[i] = noise;
- count++;
- }
-
- // Copy noise from the neighbor if the noise value is not trustworthy
- for (int i = AOMMAX(start, 2); i <= last; i++) {
- if (is_flash[i] || is_flash[i - 1] || is_flash[i - 2]) continue;
- if (noise_arr[i] < 1) {
- int found = 0;
- for (int c = i + 1; c <= last; c++) {
- if (is_flash[c] || is_flash[c - 1] || is_flash[c - 2] ||
- noise_arr[c] < 1)
- continue;
- found = 1;
- noise_arr[i] = noise_arr[c];
- break;
- }
- if (found) continue;
- for (int c = i - 1; c >= start + 2; c--) {
- if (is_flash[c] || is_flash[c - 1] || is_flash[c - 2] ||
- noise_arr[c] < 1)
- continue;
- found = 1;
- noise_arr[i] = noise_arr[c];
- break;
- }
- if (found) continue;
- noise_arr[i] = 0;
- }
- }
-
- // copy the noise if this is a flash
- for (int i = AOMMAX(start, 2); i <= last; i++) {
- if (is_flash[i] || is_flash[i - 1] || is_flash[i - 2]) {
- int found = 0;
- for (int c = i + 1; c <= last; c++) {
- if (is_flash[c] || is_flash[c - 1] || is_flash[c - 2]) continue;
- found = 1;
- noise_arr[i] = noise_arr[c];
- break;
- }
- if (found) continue;
- for (int c = i - 1; c >= start + 2; c--) {
- if (is_flash[c] || is_flash[c - 1] || is_flash[c - 2]) continue;
- found = 1;
- noise_arr[i] = noise_arr[c];
- break;
- }
- if (found) continue;
- noise_arr[i] = 0;
- }
- }
-
- // if we are at the first 2 frames, copy the noise
- for (int i = start; i < AOMMAX(start, 2); i++) {
- noise_arr[i] = noise_arr[2];
- }
-}
-
-// Estimate correlation coefficient of each frame with its previous frame.
-static void estimate_coeff(const FIRSTPASS_STATS *stats, int start, int last,
- double *noise, double *coeff) {
- for (int i = start; i <= last; i++) {
- const double C =
- sqrt(AOMMAX(stats[i - 1].intra_error *
- (stats[i].intra_error - stats[i].coded_error),
- 0.001));
- const double cor_coeff =
- C / AOMMAX(stats[i - 1].intra_error - noise[i], 0.001);
-
- coeff[i] =
- cor_coeff * sqrt(AOMMAX(stats[i - 1].intra_error - noise[i], 0.001) /
- AOMMAX(stats[i].intra_error - noise[i], 0.001));
- // clip correlation coefficient.
- coeff[i] = AOMMIN(AOMMAX(coeff[i], 0), 1);
- }
-}
-
// Get the average of stats inside a region.
-// Before calling this function, the region's noise variance and correlation
-// coefficients are needed.
static void analyze_region(const FIRSTPASS_STATS *stats, int k,
- REGIONS *regions, double *coeff, double *noise) {
+ REGIONS *regions) {
int i;
regions[k].avg_cor_coeff = 0;
regions[k].avg_sr_fr_ratio = 0;
@@ -1354,25 +1239,26 @@
stats[i].coded_error / (double)(regions[k].last - regions[k].start + 1);
regions[k].avg_cor_coeff +=
- coeff[i] / (double)(regions[k].last - regions[k].start + 1);
+ AOMMAX(stats[i].cor_coeff, 0.001) /
+ (double)(regions[k].last - regions[k].start + 1);
regions[k].avg_noise_var +=
- noise[i] / (double)(regions[k].last - regions[k].start + 1);
+ AOMMAX(stats[i].noise_var, 0.001) /
+ (double)(regions[k].last - regions[k].start + 1);
}
}
// Calculate the regions stats of every region.
static void get_region_stats(const FIRSTPASS_STATS *stats, REGIONS *regions,
- double *coeff, double *noise, int num_regions) {
+ int num_regions) {
for (int k = 0; k < num_regions; k++) {
- analyze_region(stats, k, regions, coeff, noise);
+ analyze_region(stats, k, regions);
}
}
// Find tentative stable regions
static int find_stable_regions(const FIRSTPASS_STATS *stats,
- const double *grad_coded, const int *ignore,
- int this_start, int this_last,
- REGIONS *regions) {
+ const double *grad_coded, int this_start,
+ int this_last, REGIONS *regions) {
int i, j, k = 0;
regions[k].start = this_start;
for (i = this_start; i <= this_last; i++) {
@@ -1382,7 +1268,7 @@
int count = 0;
for (j = -HALF_WIN; j <= HALF_WIN; j++) {
int idx = AOMMIN(AOMMAX(i + j, this_start), this_last);
- if (ignore[idx] || (idx > 0 && ignore[idx - 1])) continue;
+ if (stats[idx].is_flash || (idx > 0 && stats[idx - 1].is_flash)) continue;
mean_intra += stats[idx].intra_error;
var_intra += stats[idx].intra_error * stats[idx].intra_error;
mean_coded += stats[idx].coded_error;
@@ -1456,14 +1342,13 @@
}
static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
- REGIONS *regions, double *coeff,
- double *noise, int *num_regions) {
+ REGIONS *regions, int *num_regions) {
int i, j, k;
// Remove regions that are too short. Likely noise.
remove_short_regions(regions, num_regions, STABLE_REGION, HALF_WIN);
remove_short_regions(regions, num_regions, HIGH_VAR_REGION, HALF_WIN);
- get_region_stats(stats, regions, coeff, noise, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
// Adjust region boundaries. The thresholds are empirically obtained, but
// overall the performance is not very sensitive to small changes to them.
@@ -1489,7 +1374,7 @@
const int intra_close =
fabs(stats[j].intra_error - avg_intra_err) / avg_intra_err < 0.1;
const int coded_small = stats[j].coded_error / avg_intra_err < 0.1;
- const int coeff_close = coeff[j] > 0.995;
+ const int coeff_close = stats[j].cor_coeff > 0.995;
if (!coeff_close || !coded_small) count_coded--;
if (intra_close && count_coded >= 0 && count_grad >= 0) {
// this frame probably belongs to the previous stable region
@@ -1522,7 +1407,7 @@
fabs(stats[j].intra_error - avg_intra_err) / avg_intra_err < 0.1;
const int coded_small =
stats[j + 1].coded_error / avg_intra_err < 0.1;
- const int coeff_close = coeff[j] > 0.995;
+ const int coeff_close = stats[j].cor_coeff > 0.995;
if (!coeff_close || !coded_small) count_coded--;
if (intra_close && count_coded >= 0 && count_grad >= 0) {
// this frame probably belongs to the next stable region
@@ -1538,7 +1423,7 @@
cleanup_regions(regions, num_regions);
remove_short_regions(regions, num_regions, HIGH_VAR_REGION, HALF_WIN);
- get_region_stats(stats, regions, coeff, noise, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
// If a stable regions has higher error than neighboring high var regions,
// or if the stable region has a lower average correlation,
@@ -1555,7 +1440,7 @@
regions[k].avg_cor_coeff < regions[k + 1].avg_cor_coeff * 0.999)))) {
// merge current region with the previous and next regions
remove_region(2, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff, noise);
+ analyze_region(stats, k - 1, regions);
} else if (regions[k].type == HIGH_VAR_REGION &&
(regions[k].last - regions[k].start + 1) < 2 * WINDOW_SIZE &&
((k > 0 && // previous regions
@@ -1570,7 +1455,7 @@
regions[k + 1].avg_cor_coeff * 1.001)))) {
// merge current region with the previous and next regions
remove_region(2, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff, noise);
+ analyze_region(stats, k - 1, regions);
} else {
k++;
}
@@ -1582,9 +1467,7 @@
// Identify blending regions.
static void find_blending_regions(const FIRSTPASS_STATS *stats,
- const int *is_flash, REGIONS *regions,
- int *num_regions, double *coeff,
- double *noise) {
+ REGIONS *regions, int *num_regions) {
int i, k = 0;
// Blending regions will have large content change, therefore will have a
// large consistent change in intra error.
@@ -1599,7 +1482,8 @@
int start = 0, last;
for (i = regions[k].start; i <= regions[k].last; i++) {
// First mark the regions that has consistent large change of intra error.
- if (is_flash[i] || (i > 0 && is_flash[i - 1])) continue;
+ if (k == 0 && i == regions[k].start) continue;
+ if (stats[i].is_flash || (i > 0 && stats[i - 1].is_flash)) continue;
double grad = stats[i].intra_error - stats[i - 1].intra_error;
int large_change = fabs(grad) / AOMMAX(stats[i].intra_error, 0.01) > 0.05;
int this_dir = 0;
@@ -1614,7 +1498,11 @@
insert_region(start, last, BLENDING_REGION, regions, num_regions, &k);
}
dir = this_dir;
- start = i;
+ if (k == 0 && i == regions[k].start + 1) {
+ start = i - 1;
+ } else {
+ start = i;
+ }
}
if (dir != 0) {
last = regions[k].last;
@@ -1625,14 +1513,14 @@
// If the blending region has very low correlation, mark it as high variance
// since we probably cannot benefit from it anyways.
- get_region_stats(stats, regions, coeff, noise, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
for (k = 0; k < *num_regions; k++) {
if (regions[k].type != BLENDING_REGION) continue;
if (regions[k].last == regions[k].start || regions[k].avg_cor_coeff < 0.6 ||
count_stable == 0)
regions[k].type = HIGH_VAR_REGION;
}
- get_region_stats(stats, regions, coeff, noise, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
// It is possible for blending to result in a "dip" in intra error (first
// decrease then increase). Therefore we need to find the dip and combine the
@@ -1661,7 +1549,7 @@
if (regions[k].avg_sr_fr_ratio > ratio_thres) {
regions[k].type = BLENDING_REGION;
remove_region(2, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff, noise);
+ analyze_region(stats, k - 1, regions);
continue;
}
}
@@ -1719,7 +1607,7 @@
if (to_merge) {
remove_region(0, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff, noise);
+ analyze_region(stats, k - 1, regions);
continue;
} else {
// These are possibly two separate blending regions. Mark the boundary
@@ -1727,9 +1615,9 @@
int prev_k = k - 1;
insert_region(regions[prev_k].last, regions[prev_k].last,
HIGH_VAR_REGION, regions, num_regions, &prev_k);
- analyze_region(stats, prev_k, regions, coeff, noise);
+ analyze_region(stats, prev_k, regions);
k = prev_k + 1;
- analyze_region(stats, k, regions, coeff, noise);
+ analyze_region(stats, k, regions);
}
}
k++;
@@ -1785,56 +1673,42 @@
// pointing to.
static void identify_regions(const FIRSTPASS_STATS *const stats_start,
int total_frames, int offset, REGIONS *regions,
- int *total_regions, double *cor_coeff,
- double *noise_var) {
+ int *total_regions) {
int k;
if (total_frames <= 1) return;
- double *coeff = cor_coeff + offset;
- double *noise = noise_var + offset;
-
// store the initial decisions
REGIONS temp_regions[MAX_FIRSTPASS_ANALYSIS_FRAMES];
av1_zero_array(temp_regions, MAX_FIRSTPASS_ANALYSIS_FRAMES);
- int is_flash[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
// buffers for filtered stats
double filt_intra_err[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
double filt_coded_err[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
double grad_coded[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
- int cur_region = 0, this_start = 0, this_last = total_frames - 1;
+ int cur_region = 0, this_start = 0, this_last;
- // find possible flash frames
- mark_flashes(stats_start, 0, total_frames - 1, is_flash);
-
- // first get the obvious scenecuts
int next_scenecut = -1;
-
- estimate_noise(stats_start, is_flash, this_start, this_last, noise);
- estimate_coeff(stats_start, this_start, this_last, noise, coeff);
-
do {
+ // first get the obvious scenecuts
next_scenecut =
- find_next_scenecut(stats_start, this_start, total_frames - 1, is_flash);
+ find_next_scenecut(stats_start, this_start, total_frames - 1);
this_last = (next_scenecut >= 0) ? (next_scenecut - 1) : total_frames - 1;
// low-pass filter the needed stats
- smooth_filter_stats(stats_start, is_flash, this_start, this_last,
- filt_intra_err, filt_coded_err);
+ smooth_filter_stats(stats_start, this_start, this_last, filt_intra_err,
+ filt_coded_err);
get_gradient(filt_coded_err, this_start, this_last, grad_coded);
// find tentative stable regions and unstable regions
- int num_regions = find_stable_regions(stats_start, grad_coded, is_flash,
- this_start, this_last, temp_regions);
+ int num_regions = find_stable_regions(stats_start, grad_coded, this_start,
+ this_last, temp_regions);
- adjust_unstable_region_bounds(stats_start, temp_regions, coeff, noise,
- &num_regions);
+ adjust_unstable_region_bounds(stats_start, temp_regions, &num_regions);
- get_region_stats(stats_start, temp_regions, coeff, noise, num_regions);
+ get_region_stats(stats_start, temp_regions, num_regions);
// Try to identify blending regions in the unstable regions
- find_blending_regions(stats_start, is_flash, temp_regions, &num_regions,
- coeff, noise);
+ find_blending_regions(stats_start, temp_regions, &num_regions);
cleanup_blendings(temp_regions, &num_regions);
// The flash points should all be considered high variance points
@@ -1847,7 +1721,7 @@
int start = temp_regions[k].start;
int last = temp_regions[k].last;
for (int i = start; i <= last; i++) {
- if (is_flash[i]) {
+ if (stats_start[i].is_flash) {
insert_region(i, i, HIGH_VAR_REGION, temp_regions, &num_regions, &k);
}
}
@@ -1878,20 +1752,21 @@
} while (next_scenecut >= 0);
*total_regions = cur_region;
- get_region_stats(stats_start, regions, coeff, noise, *total_regions);
+ get_region_stats(stats_start, regions, *total_regions);
for (k = 0; k < *total_regions; k++) {
// If scenecuts are very minor, mark them as high variance.
if (regions[k].type != SCENECUT_REGION ||
regions[k].avg_cor_coeff *
- (1 - noise[regions[k].start] / regions[k].avg_intra_err) <
+ (1 - stats_start[regions[k].start].noise_var /
+ regions[k].avg_intra_err) <
0.8) {
continue;
}
regions[k].type = HIGH_VAR_REGION;
}
cleanup_regions(regions, total_regions);
- get_region_stats(stats_start, regions, coeff, noise, *total_regions);
+ get_region_stats(stats_start, regions, *total_regions);
for (k = 0; k < *total_regions; k++) {
regions[k].start += offset;
@@ -1927,6 +1802,7 @@
TWO_PASS *const twopass = &cpi->ppi->twopass;
FIRSTPASS_STATS next_frame;
const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
+ const FIRSTPASS_STATS *const stats = start_pos - (rc->frames_since_key == 0);
FRAME_INFO *frame_info = &cpi->frame_info;
int i;
@@ -2030,7 +1906,7 @@
// the next gop start from the scenecut with GF
int is_minor_sc =
(regions[scenecut_idx].avg_cor_coeff *
- (1 - rc->noise_var[regions[scenecut_idx].start] /
+ (1 - stats[regions[scenecut_idx].start - offset].noise_var /
regions[scenecut_idx].avg_intra_err) >
0.6);
cur_last = regions[scenecut_idx].last - offset - !is_minor_sc;
@@ -2052,14 +1928,15 @@
double base_score = 0.0;
// Accumulate base_score in
for (int j = cur_start + 1; j < cur_start + min_shrink_int; j++) {
- base_score = (base_score + 1.0) * rc->cor_coeff[j + offset];
+ base_score = (base_score + 1.0) * stats[j].cor_coeff;
}
int met_blending = 0; // Whether we have met blending areas before
int last_blending = 0; // Whether the previous frame if blending
for (int j = cur_start + min_shrink_int; j <= cur_last; j++) {
- base_score = (base_score + 1.0) * rc->cor_coeff[j + offset];
+ base_score = (base_score + 1.0) * stats[j].cor_coeff;
int this_reg =
find_regions_index(regions, num_regions, j + offset);
+ if (this_reg < 0) continue;
// A GOP should include at most 1 blending region.
if (regions[this_reg].type == BLENDING_REGION) {
last_blending = 1;
@@ -2081,20 +1958,23 @@
// following frames
int count_f = 0;
for (int n = j + 1; n <= j + 3 && n <= last_frame; n++) {
- temp_accu_coeff *= rc->cor_coeff[n + offset];
+ if (stats + n >= twopass->stats_buf_ctx->stats_in_end) break;
+ temp_accu_coeff *= stats[n].cor_coeff;
this_score +=
- temp_accu_coeff * (1 - rc->noise_var[n + offset] /
- regions[this_reg].avg_intra_err);
+ temp_accu_coeff *
+ (1 - stats[n].noise_var /
+ AOMMAX(regions[this_reg].avg_intra_err, 0.001));
count_f++;
}
// preceding frames
temp_accu_coeff = 1.0;
- for (int n = j; n > j - 3 * 2 + count_f && n >= first_frame;
- n--) {
- temp_accu_coeff *= rc->cor_coeff[n + offset];
+ for (int n = j; n > j - 3 * 2 + count_f && n > first_frame; n--) {
+ if (stats + n < twopass->stats_buf_ctx->stats_in_start) break;
+ temp_accu_coeff *= stats[n].cor_coeff;
this_score +=
- temp_accu_coeff * (1 - rc->noise_var[n + offset] /
- regions[this_reg].avg_intra_err);
+ temp_accu_coeff *
+ (1 - stats[n].noise_var /
+ AOMMAX(regions[this_reg].avg_intra_err, 0.001));
}
if (this_score > best_score) {
@@ -3505,6 +3385,140 @@
rc->base_frame_target = target_rate;
}
+static void mark_flashes(FIRSTPASS_STATS *first_stats,
+ FIRSTPASS_STATS *last_stats) {
+ FIRSTPASS_STATS *this_stats = first_stats, *next_stats;
+ while (this_stats < last_stats - 1) {
+ next_stats = this_stats + 1;
+ if (next_stats->pcnt_second_ref > next_stats->pcnt_inter &&
+ next_stats->pcnt_second_ref >= 0.5) {
+ this_stats->is_flash = 1;
+ } else {
+ this_stats->is_flash = 0;
+ }
+ this_stats = next_stats;
+ }
+ // We always treat the last one as none flash.
+ if (last_stats - 1 >= first_stats) {
+ (last_stats - 1)->is_flash = 0;
+ }
+}
+
+// Estimate the noise variance of each frame from the first pass stats
+static void estimate_noise(FIRSTPASS_STATS *first_stats,
+ FIRSTPASS_STATS *last_stats) {
+ FIRSTPASS_STATS *this_stats, *next_stats;
+ double C1, C2, C3, noise;
+ int count = 0;
+ for (this_stats = first_stats + 2; this_stats < last_stats; this_stats++) {
+ this_stats->noise_var = 0.0;
+ // flashes tend to have high correlation of innovations, so ignore them.
+ if (this_stats->is_flash || (this_stats - 1)->is_flash ||
+ (this_stats - 2)->is_flash)
+ continue;
+
+ C1 = (this_stats - 1)->intra_error *
+ (this_stats->intra_error - this_stats->coded_error);
+ C2 = (this_stats - 2)->intra_error *
+ ((this_stats - 1)->intra_error - (this_stats - 1)->coded_error);
+ C3 = (this_stats - 2)->intra_error *
+ (this_stats->intra_error - this_stats->sr_coded_error);
+ if (C1 <= 0 || C2 <= 0 || C3 <= 0) continue;
+ C1 = sqrt(C1);
+ C2 = sqrt(C2);
+ C3 = sqrt(C3);
+
+ noise = (this_stats - 1)->intra_error - C1 * C2 / C3;
+ noise = AOMMAX(noise, 0.01);
+ this_stats->noise_var = noise;
+ count++;
+ }
+
+ // Copy noise from the neighbor if the noise value is not trustworthy
+ for (this_stats = first_stats + 2; this_stats < last_stats; this_stats++) {
+ if (this_stats->is_flash || (this_stats - 1)->is_flash ||
+ (this_stats - 2)->is_flash)
+ continue;
+ if (this_stats->noise_var < 1.0) {
+ int found = 0;
+ // TODO(bohanli): consider expanding to two directions at the same time
+ for (next_stats = this_stats + 1; next_stats < last_stats; next_stats++) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash || next_stats->noise_var < 1.0)
+ continue;
+ found = 1;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ if (found) continue;
+ for (next_stats = this_stats - 1; next_stats >= first_stats + 2;
+ next_stats--) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash || next_stats->noise_var < 1.0)
+ continue;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ }
+ }
+
+ // copy the noise if this is a flash
+ for (this_stats = first_stats + 2; this_stats < last_stats; this_stats++) {
+ if (this_stats->is_flash || (this_stats - 1)->is_flash ||
+ (this_stats - 2)->is_flash) {
+ int found = 0;
+ for (next_stats = this_stats + 1; next_stats < last_stats; next_stats++) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash)
+ continue;
+ found = 1;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ if (found) continue;
+ for (next_stats = this_stats - 1; next_stats >= first_stats + 2;
+ next_stats--) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash)
+ continue;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ }
+ }
+
+ // if we are at the first 2 frames, copy the noise
+ for (this_stats = first_stats;
+ this_stats < first_stats + 2 && (first_stats + 2) < last_stats;
+ this_stats++) {
+ this_stats->noise_var = (first_stats + 2)->noise_var;
+ }
+}
+
+// Estimate correlation coefficient of each frame with its previous frame.
+static void estimate_coeff(FIRSTPASS_STATS *first_stats,
+ FIRSTPASS_STATS *last_stats) {
+ FIRSTPASS_STATS *this_stats;
+ for (this_stats = first_stats + 1; this_stats < last_stats; this_stats++) {
+ const double C =
+ sqrt(AOMMAX((this_stats - 1)->intra_error *
+ (this_stats->intra_error - this_stats->coded_error),
+ 0.001));
+ const double cor_coeff =
+ C /
+ AOMMAX((this_stats - 1)->intra_error - this_stats->noise_var, 0.001);
+
+ this_stats->cor_coeff =
+ cor_coeff *
+ sqrt(AOMMAX((this_stats - 1)->intra_error - this_stats->noise_var,
+ 0.001) /
+ AOMMAX(this_stats->intra_error - this_stats->noise_var, 0.001));
+ // clip correlation coefficient.
+ this_stats->cor_coeff = AOMMIN(AOMMAX(this_stats->cor_coeff, 0), 1);
+ }
+ first_stats->cor_coeff = 1.0;
+}
+
void av1_get_second_pass_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params,
const EncodeFrameInput *const frame_input,
@@ -3631,30 +3645,34 @@
: MAX_GF_LENGTH_LAP;
// Identify regions if needed.
+ // TODO(bohanli): identify regions for all stats available.
if (rc->frames_since_key == 0 || rc->frames_since_key == 1 ||
(rc->frames_till_regions_update - rc->frames_since_key <
rc->frames_to_key &&
rc->frames_till_regions_update - rc->frames_since_key <
max_gop_length + 1)) {
- int is_first_stat =
- twopass->stats_in == twopass->stats_buf_ctx->stats_in_start;
- const FIRSTPASS_STATS *stats_start = twopass->stats_in + is_first_stat;
- // offset of stats_start from the current frame
- int offset = is_first_stat || (rc->frames_since_key == 0);
- // offset of the region indices from the previous key frame
- rc->regions_offset = rc->frames_since_key;
// how many frames we can analyze from this frame
int rest_frames = AOMMIN(rc->frames_to_key + rc->next_is_fwd_key,
MAX_FIRSTPASS_ANALYSIS_FRAMES);
- rest_frames =
- AOMMIN(rest_frames,
- (int)(twopass->stats_buf_ctx->stats_in_end - stats_start + 1) +
- offset);
-
+ rest_frames = AOMMIN(
+ rest_frames, (int)(twopass->stats_buf_ctx->stats_in_end -
+ twopass->stats_in + (rc->frames_since_key == 0)));
rc->frames_till_regions_update = rest_frames;
- identify_regions(stats_start, rest_frames - offset, offset, rc->regions,
- &rc->num_regions, rc->cor_coeff, rc->noise_var);
+ if (cpi->ppi->lap_enabled) {
+ mark_flashes(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_noise(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_coeff(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ identify_regions(twopass->stats_in, rest_frames,
+ (rc->frames_since_key == 0), rc->regions,
+ &rc->num_regions);
+ } else {
+ identify_regions(twopass->stats_in - (rc->frames_since_key == 0),
+ rest_frames, 0, rc->regions, &rc->num_regions);
+ }
}
int cur_region_idx =
@@ -3789,6 +3807,13 @@
if (!twopass->stats_buf_ctx->stats_in_end) return;
+ mark_flashes(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_noise(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_coeff(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+
stats = twopass->stats_buf_ctx->total_stats;
*stats = *twopass->stats_buf_ctx->stats_in_end;
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index 9c96c8d..544a71c 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -206,8 +206,6 @@
/*!\cond */
int num_regions;
REGIONS regions[MAX_FIRSTPASS_ANALYSIS_FRAMES];
- double cor_coeff[MAX_FIRSTPASS_ANALYSIS_FRAMES];
- double noise_var[MAX_FIRSTPASS_ANALYSIS_FRAMES];
int regions_offset; // offset of regions from the last keyframe
int frames_till_regions_update;
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 9689e08..29cc43a 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -957,10 +957,10 @@
AOMMAX(cpi->rc.frames_to_key - arf_src_offset - 1, 0);
// Number of buffered frames before the to-filter frame.
- const int max_before = AOMMIN(filter_frame_lookahead_idx, key_to_curframe);
+ int max_before = AOMMIN(filter_frame_lookahead_idx, key_to_curframe);
// Number of buffered frames after the to-filter frame.
- const int max_after =
+ int max_after =
AOMMIN(lookahead_depth - filter_frame_lookahead_idx - 1, curframe_to_key);
// Estimate noises for each plane.
@@ -976,22 +976,30 @@
}
// Get quantization factor.
const int q = av1_get_q(cpi);
- // Get correlation estimates from first-pass
- RATE_CONTROL *rc = &cpi->rc;
- const double *coeff = rc->cor_coeff;
- const int offset = rc->regions_offset;
- int cur_frame_idx =
- filter_frame_lookahead_idx + rc->frames_since_key - offset;
-
+ // Get correlation estimates from first-pass;
+ const FIRSTPASS_STATS *stats =
+ cpi->ppi->twopass.stats_in - (cpi->rc.frames_since_key == 0);
double accu_coeff0 = 1.0, accu_coeff1 = 1.0;
for (int i = 1; i <= max_after; i++) {
- accu_coeff1 *= coeff[cur_frame_idx + i];
+ if (stats + filter_frame_lookahead_idx + i >=
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_end) {
+ max_after = i - 1;
+ break;
+ }
+ accu_coeff1 *=
+ AOMMAX(stats[filter_frame_lookahead_idx + i].cor_coeff, 0.001);
}
if (max_after >= 1) {
accu_coeff1 = pow(accu_coeff1, 1.0 / (double)max_after);
}
for (int i = 1; i <= max_before; i++) {
- accu_coeff0 *= coeff[cur_frame_idx - i + 1];
+ if (stats + filter_frame_lookahead_idx - i + 1 <=
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_start) {
+ max_before = i - 1;
+ break;
+ }
+ accu_coeff0 *=
+ AOMMAX(stats[filter_frame_lookahead_idx - i + 1].cor_coeff, 0.001);
}
if (max_before >= 1) {
accu_coeff0 = pow(accu_coeff0, 1.0 / (double)max_before);