| #!/usr/bin/python |
| # |
| # Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
| # |
| # This source code is subject to the terms of the BSD 2 Clause License and |
| # the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| # was not distributed with this source code in the LICENSE file, you can |
| # obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| # Media Patent License 1.0 was not distributed with this source code in the |
| # PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| # |
| |
| """Converts video encoding result data from text files to visualization |
| data source.""" |
| |
| __author__ = "jzern@google.com (James Zern)," |
| __author__ += "jimbankoski@google.com (Jim Bankoski)" |
| |
| import fnmatch |
| import numpy as np |
| import scipy as sp |
| import scipy.interpolate |
| import os |
| import re |
| import string |
| import sys |
| import math |
| import warnings |
| |
| import gviz_api |
| |
| from os.path import basename |
| from os.path import splitext |
| |
| warnings.simplefilter('ignore', np.RankWarning) |
| warnings.simplefilter('ignore', RuntimeWarning) |
| |
| def bdsnr2(metric_set1, metric_set2): |
| """ |
| BJONTEGAARD Bjontegaard metric calculation adapted |
| Bjontegaard's snr metric allows to compute the average % saving in decibels |
| between two rate-distortion curves [1]. This is an adaptation of that |
| method that fixes inconsistencies when the curve fit operation goes awry |
| by replacing the curve fit function with a Piecewise Cubic Hermite |
| Interpolating Polynomial and then integrating that by evaluating that |
| function at small intervals using the trapezoid method to calculate |
| the integral. |
| |
| metric_set1 - list of tuples ( bitrate, metric ) for first graph |
| metric_set2 - list of tuples ( bitrate, metric ) for second graph |
| """ |
| |
| if not metric_set1 or not metric_set2: |
| return 0.0 |
| |
| try: |
| |
| # pchip_interlopate requires keys sorted by x axis. x-axis will |
| # be our metric not the bitrate so sort by metric. |
| metric_set1.sort() |
| metric_set2.sort() |
| |
| # Pull the log of the rate and clamped psnr from metric_sets. |
| log_rate1 = [math.log(x[0]) for x in metric_set1] |
| metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] |
| log_rate2 = [math.log(x[0]) for x in metric_set2] |
| metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] |
| |
| # Integration interval. This metric only works on the area that's |
| # overlapping. Extrapolation of these things is sketchy so we avoid. |
| min_int = max([min(log_rate1), min(log_rate2)]) |
| max_int = min([max(log_rate1), max(log_rate2)]) |
| |
| # No overlap means no sensible metric possible. |
| if max_int <= min_int: |
| return 0.0 |
| |
| # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to |
| # create 100 new samples points separated by interval. |
| lin = np.linspace(min_int, max_int, num=100, retstep=True) |
| interval = lin[1] |
| samples = lin[0] |
| v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples) |
| v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples) |
| |
| # Calculate the integral using the trapezoid method on the samples. |
| int_v1 = np.trapz(v1, dx=interval) |
| int_v2 = np.trapz(v2, dx=interval) |
| |
| # Calculate the average improvement. |
| avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) |
| |
| except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: |
| return 0 |
| |
| return avg_exp_diff |
| |
| def bdrate2(metric_set1, metric_set2): |
| """ |
| BJONTEGAARD Bjontegaard metric calculation adapted |
| Bjontegaard's metric allows to compute the average % saving in bitrate |
| between two rate-distortion curves [1]. This is an adaptation of that |
| method that fixes inconsistencies when the curve fit operation goes awry |
| by replacing the curve fit function with a Piecewise Cubic Hermite |
| Interpolating Polynomial and then integrating that by evaluating that |
| function at small intervals using the trapezoid method to calculate |
| the integral. |
| |
| metric_set1 - list of tuples ( bitrate, metric ) for first graph |
| metric_set2 - list of tuples ( bitrate, metric ) for second graph |
| """ |
| |
| if not metric_set1 or not metric_set2: |
| return 0.0 |
| |
| try: |
| |
| # pchip_interlopate requires keys sorted by x axis. x-axis will |
| # be our metric not the bitrate so sort by metric. |
| metric_set1.sort(key=lambda tup: tup[1]) |
| metric_set2.sort(key=lambda tup: tup[1]) |
| |
| # Pull the log of the rate and clamped psnr from metric_sets. |
| log_rate1 = [math.log(x[0]) for x in metric_set1] |
| metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] |
| log_rate2 = [math.log(x[0]) for x in metric_set2] |
| metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] |
| |
| # Integration interval. This metric only works on the area that's |
| # overlapping. Extrapolation of these things is sketchy so we avoid. |
| min_int = max([min(metric1), min(metric2)]) |
| max_int = min([max(metric1), max(metric2)]) |
| |
| # No overlap means no sensible metric possible. |
| if max_int <= min_int: |
| return 0.0 |
| |
| # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to |
| # create 100 new samples points separated by interval. |
| lin = np.linspace(min_int, max_int, num=100, retstep=True) |
| interval = lin[1] |
| samples = lin[0] |
| v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples) |
| v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples) |
| |
| # Calculate the integral using the trapezoid method on the samples. |
| int_v1 = np.trapz(v1, dx=interval) |
| int_v2 = np.trapz(v2, dx=interval) |
| |
| # Calculate the average improvement. |
| avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) |
| |
| except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: |
| return 0 |
| |
| # Convert to a percentage. |
| avg_diff = (math.exp(avg_exp_diff) - 1) * 100 |
| |
| return avg_diff |
| |
| |
| |
| def FillForm(string_for_substitution, dictionary_of_vars): |
| """ |
| This function substitutes all matches of the command string //%% ... %%// |
| with the variable represented by ... . |
| """ |
| return_string = string_for_substitution |
| for i in re.findall("//%%(.*)%%//", string_for_substitution): |
| return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i], |
| return_string) |
| return return_string |
| |
| |
| def HasMetrics(line): |
| """ |
| The metrics files produced by aomenc are started with a B for headers. |
| """ |
| # If the first char of the first word on the line is a digit |
| if len(line) == 0: |
| return False |
| if len(line.split()) == 0: |
| return False |
| if line.split()[0][0:1].isdigit(): |
| return True |
| return False |
| |
| def GetMetrics(file_name): |
| metric_file = open(file_name, "r") |
| return metric_file.readline().split(); |
| |
| def ParseMetricFile(file_name, metric_column): |
| metric_set1 = set([]) |
| metric_file = open(file_name, "r") |
| for line in metric_file: |
| metrics = string.split(line) |
| if HasMetrics(line): |
| if metric_column < len(metrics): |
| try: |
| tuple = float(metrics[0]), float(metrics[metric_column]) |
| except: |
| tuple = float(metrics[0]), 0 |
| else: |
| tuple = float(metrics[0]), 0 |
| metric_set1.add(tuple) |
| metric_set1_sorted = sorted(metric_set1) |
| return metric_set1_sorted |
| |
| |
| def FileBetter(file_name_1, file_name_2, metric_column, method): |
| """ |
| Compares two data files and determines which is better and by how |
| much. Also produces a histogram of how much better, by PSNR. |
| metric_column is the metric. |
| """ |
| # Store and parse our two files into lists of unique tuples. |
| |
| # Read the two files, parsing out lines starting with bitrate. |
| metric_set1_sorted = ParseMetricFile(file_name_1, metric_column) |
| metric_set2_sorted = ParseMetricFile(file_name_2, metric_column) |
| |
| |
| def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2): |
| """ |
| Search through the sorted metric file for metrics on either side of |
| the metric from file 1. Since both lists are sorted we really |
| should not have to search through the entire range, but these |
| are small files.""" |
| total_bitrate_difference_ratio = 0.0 |
| count = 0 |
| for bitrate, metric in metric_set1_sorted: |
| if bitrate == 0: |
| continue |
| for i in range(len(metric_set2_sorted) - 1): |
| s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i] |
| s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1] |
| # We have a point on either side of our metric range. |
| if metric > s2_metric_0 and metric <= s2_metric_1: |
| |
| # Calculate a slope. |
| if s2_metric_1 - s2_metric_0 != 0: |
| metric_slope = ((s2_bitrate_1 - s2_bitrate_0) / |
| (s2_metric_1 - s2_metric_0)) |
| else: |
| metric_slope = 0 |
| |
| estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) * |
| metric_slope) |
| |
| if estimated_s2_bitrate == 0: |
| continue |
| # Calculate percentage difference as given by base. |
| if base_is_set_2 == 0: |
| bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / |
| bitrate) |
| else: |
| bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / |
| estimated_s2_bitrate) |
| |
| total_bitrate_difference_ratio += bitrate_difference_ratio |
| count += 1 |
| break |
| |
| # Calculate the average improvement between graphs. |
| if count != 0: |
| avg = total_bitrate_difference_ratio / count |
| |
| else: |
| avg = 0.0 |
| |
| return avg |
| |
| # Be fair to both graphs by testing all the points in each. |
| if method == 'avg': |
| avg_improvement = 50 * ( |
| GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) - |
| GraphBetter(metric_set2_sorted, metric_set1_sorted, 0)) |
| elif method == 'dsnr': |
| avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted) |
| else: |
| avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted) |
| |
| return avg_improvement |
| |
| |
| def HandleFiles(variables): |
| """ |
| This script creates html for displaying metric data produced from data |
| in a video stats file, as created by the AOM project when enable_psnr |
| is turned on: |
| |
| Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..] |
| |
| The script parses each metrics file [see below] that matches the |
| statfile_pattern in the baseline directory and looks for the file that |
| matches that same file in each of the sub_dirs, and compares the resultant |
| metrics bitrate, avg psnr, glb psnr, and ssim. " |
| |
| It provides a table in which each row is a file in the line directory, |
| and a column for each subdir, with the cells representing how that clip |
| compares to baseline for that subdir. A graph is given for each which |
| compares filesize to that metric. If you click on a point in the graph it |
| zooms in on that point. |
| |
| a SAMPLE metrics file: |
| |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 25.911 38.242 38.104 38.258 38.121 75.790 14103 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 49.982 41.264 41.129 41.255 41.122 83.993 19817 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 74.967 42.911 42.767 42.899 42.756 87.928 17332 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 100.012 43.983 43.838 43.881 43.738 89.695 25389 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 149.980 45.338 45.203 45.184 45.043 91.591 25438 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 199.852 46.225 46.123 46.113 45.999 92.679 28302 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 249.922 46.864 46.773 46.777 46.673 93.334 27244 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 299.998 47.366 47.281 47.317 47.220 93.844 27137 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 349.769 47.746 47.677 47.722 47.648 94.178 32226 |
| Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 399.773 48.032 47.971 48.013 47.946 94.362 36203 |
| |
| sample use: |
| visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html |
| """ |
| |
| # The template file is the html file into which we will write the |
| # data from the stats file, formatted correctly for the gviz_api. |
| template_file = open(variables[1], "r") |
| page_template = template_file.read() |
| template_file.close() |
| |
| # This is the path match pattern for finding stats files amongst |
| # all the other files it could be. eg: *.stt |
| file_pattern = variables[2] |
| |
| # This is the directory with files that we will use to do the comparison |
| # against. |
| baseline_dir = variables[3] |
| snrs = '' |
| filestable = {} |
| |
| filestable['dsnr'] = '' |
| filestable['drate'] = '' |
| filestable['avg'] = '' |
| |
| # Dirs is directories after the baseline to compare to the base. |
| dirs = variables[4:len(variables)] |
| |
| # Find the metric files in the baseline directory. |
| dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern)) |
| |
| metrics = GetMetrics(baseline_dir + "/" + dir_list[0]) |
| |
| metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];' |
| |
| for column in range(1, len(metrics)): |
| |
| for metric in ['avg','dsnr','drate']: |
| description = {"file": ("string", "File")} |
| |
| # Go through each directory and add a column header to our description. |
| countoverall = {} |
| sumoverall = {} |
| |
| for directory in dirs: |
| description[directory] = ("number", directory) |
| countoverall[directory] = 0 |
| sumoverall[directory] = 0 |
| |
| # Data holds the data for the visualization, name given comes from |
| # gviz_api sample code. |
| data = [] |
| for filename in dir_list: |
| row = {'file': splitext(basename(filename))[0] } |
| baseline_file_name = baseline_dir + "/" + filename |
| |
| # Read the metric file from each of the directories in our list. |
| for directory in dirs: |
| metric_file_name = directory + "/" + filename |
| |
| # If there is a metric file in the current directory, open it |
| # and calculate its overall difference between it and the baseline |
| # directory's metric file. |
| if os.path.isfile(metric_file_name): |
| overall = FileBetter(baseline_file_name, metric_file_name, |
| column, metric) |
| row[directory] = overall |
| |
| sumoverall[directory] += overall |
| countoverall[directory] += 1 |
| |
| data.append(row) |
| |
| # Add the overall numbers. |
| row = {"file": "OVERALL" } |
| for directory in dirs: |
| row[directory] = sumoverall[directory] / countoverall[directory] |
| data.append(row) |
| |
| # write the tables out |
| data_table = gviz_api.DataTable(description) |
| data_table.LoadData(data) |
| |
| filestable[metric] = ( filestable[metric] + "filestable_" + metric + |
| "[" + str(column) + "]=" + |
| data_table.ToJSon(columns_order=["file"]+dirs) + "\n" ) |
| |
| filestable_avg = filestable['avg'] |
| filestable_dpsnr = filestable['dsnr'] |
| filestable_drate = filestable['drate'] |
| |
| # Now we collect all the data for all the graphs. First the column |
| # headers which will be Datarate and then each directory. |
| columns = ("datarate",baseline_dir) |
| description = {"datarate":("number", "Datarate")} |
| for directory in dirs: |
| description[directory] = ("number", directory) |
| |
| description[baseline_dir] = ("number", baseline_dir) |
| |
| snrs = snrs + "snrs[" + str(column) + "] = [" |
| |
| # Now collect the data for the graphs, file by file. |
| for filename in dir_list: |
| |
| data = [] |
| |
| # Collect the file in each directory and store all of its metrics |
| # in the associated gviz metrics table. |
| all_dirs = dirs + [baseline_dir] |
| for directory in all_dirs: |
| |
| metric_file_name = directory + "/" + filename |
| if not os.path.isfile(metric_file_name): |
| continue |
| |
| # Read and parse the metrics file storing it to the data we'll |
| # use for the gviz_api.Datatable. |
| metrics = ParseMetricFile(metric_file_name, column) |
| for bitrate, metric in metrics: |
| data.append({"datarate": bitrate, directory: metric}) |
| |
| data_table = gviz_api.DataTable(description) |
| data_table.LoadData(data) |
| snrs = snrs + "'" + data_table.ToJSon( |
| columns_order=tuple(["datarate",baseline_dir]+dirs)) + "'," |
| |
| snrs = snrs + "]\n" |
| |
| formatters = "" |
| for i in range(len(dirs)): |
| formatters = "%s formatter.format(better, %d);" % (formatters, i+1) |
| |
| print FillForm(page_template, vars()) |
| return |
| |
| if len(sys.argv) < 3: |
| print HandleFiles.__doc__ |
| else: |
| HandleFiles(sys.argv) |