|  | #!/usr/bin/python | 
|  | # | 
|  | # Copyright (c) 2016, Alliance for Open Media. All rights reserved. | 
|  | # | 
|  | # This source code is subject to the terms of the BSD 2 Clause License and | 
|  | # the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | # was not distributed with this source code in the LICENSE file, you can | 
|  | # obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | # Media Patent License 1.0 was not distributed with this source code in the | 
|  | # PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | # | 
|  |  | 
|  | """Converts video encoding result data from text files to visualization | 
|  | data source.""" | 
|  |  | 
|  | __author__ = "jzern@google.com (James Zern)," | 
|  | __author__ += "jimbankoski@google.com (Jim Bankoski)" | 
|  |  | 
|  | import fnmatch | 
|  | import numpy as np | 
|  | import scipy as sp | 
|  | import scipy.interpolate | 
|  | import os | 
|  | import re | 
|  | import string | 
|  | import sys | 
|  | import math | 
|  | import warnings | 
|  |  | 
|  | import gviz_api | 
|  |  | 
|  | from os.path import basename | 
|  | from os.path import splitext | 
|  |  | 
|  | warnings.simplefilter('ignore', np.RankWarning) | 
|  | warnings.simplefilter('ignore', RuntimeWarning) | 
|  |  | 
|  | def bdsnr2(metric_set1, metric_set2): | 
|  | """ | 
|  | BJONTEGAARD    Bjontegaard metric calculation adapted | 
|  | Bjontegaard's snr metric allows to compute the average % saving in decibels | 
|  | between two rate-distortion curves [1].  This is an adaptation of that | 
|  | method that fixes inconsistencies when the curve fit operation goes awry | 
|  | by replacing the curve fit function with a Piecewise Cubic Hermite | 
|  | Interpolating Polynomial and then integrating that by evaluating that | 
|  | function at small intervals using the trapezoid method to calculate | 
|  | the integral. | 
|  |  | 
|  | metric_set1 - list of tuples ( bitrate,  metric ) for first graph | 
|  | metric_set2 - list of tuples ( bitrate,  metric ) for second graph | 
|  | """ | 
|  |  | 
|  | if not metric_set1 or not metric_set2: | 
|  | return 0.0 | 
|  |  | 
|  | try: | 
|  |  | 
|  | # pchip_interlopate requires keys sorted by x axis. x-axis will | 
|  | # be our metric not the bitrate so sort by metric. | 
|  | metric_set1.sort() | 
|  | metric_set2.sort() | 
|  |  | 
|  | # Pull the log of the rate and clamped psnr from metric_sets. | 
|  | log_rate1 = [math.log(x[0]) for x in metric_set1] | 
|  | metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] | 
|  | log_rate2 = [math.log(x[0]) for x in metric_set2] | 
|  | metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] | 
|  |  | 
|  | # Integration interval.  This metric only works on the area that's | 
|  | # overlapping.   Extrapolation of these things is sketchy so we avoid. | 
|  | min_int = max([min(log_rate1), min(log_rate2)]) | 
|  | max_int = min([max(log_rate1), max(log_rate2)]) | 
|  |  | 
|  | # No overlap means no sensible metric possible. | 
|  | if max_int <= min_int: | 
|  | return 0.0 | 
|  |  | 
|  | # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to | 
|  | # create 100 new samples points separated by interval. | 
|  | lin = np.linspace(min_int, max_int, num=100, retstep=True) | 
|  | interval = lin[1] | 
|  | samples = lin[0] | 
|  | v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples) | 
|  | v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples) | 
|  |  | 
|  | # Calculate the integral using the trapezoid method on the samples. | 
|  | int_v1 = np.trapz(v1, dx=interval) | 
|  | int_v2 = np.trapz(v2, dx=interval) | 
|  |  | 
|  | # Calculate the average improvement. | 
|  | avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) | 
|  |  | 
|  | except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: | 
|  | return 0 | 
|  |  | 
|  | return avg_exp_diff | 
|  |  | 
|  | def bdrate2(metric_set1, metric_set2): | 
|  | """ | 
|  | BJONTEGAARD    Bjontegaard metric calculation adapted | 
|  | Bjontegaard's metric allows to compute the average % saving in bitrate | 
|  | between two rate-distortion curves [1].  This is an adaptation of that | 
|  | method that fixes inconsistencies when the curve fit operation goes awry | 
|  | by replacing the curve fit function with a Piecewise Cubic Hermite | 
|  | Interpolating Polynomial and then integrating that by evaluating that | 
|  | function at small intervals using the trapezoid method to calculate | 
|  | the integral. | 
|  |  | 
|  | metric_set1 - list of tuples ( bitrate,  metric ) for first graph | 
|  | metric_set2 - list of tuples ( bitrate,  metric ) for second graph | 
|  | """ | 
|  |  | 
|  | if not metric_set1 or not metric_set2: | 
|  | return 0.0 | 
|  |  | 
|  | try: | 
|  |  | 
|  | # pchip_interlopate requires keys sorted by x axis. x-axis will | 
|  | # be our metric not the bitrate so sort by metric. | 
|  | metric_set1.sort(key=lambda tup: tup[1]) | 
|  | metric_set2.sort(key=lambda tup: tup[1]) | 
|  |  | 
|  | # Pull the log of the rate and clamped psnr from metric_sets. | 
|  | log_rate1 = [math.log(x[0]) for x in metric_set1] | 
|  | metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] | 
|  | log_rate2 = [math.log(x[0]) for x in metric_set2] | 
|  | metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] | 
|  |  | 
|  | # Integration interval.  This metric only works on the area that's | 
|  | # overlapping.   Extrapolation of these things is sketchy so we avoid. | 
|  | min_int = max([min(metric1), min(metric2)]) | 
|  | max_int = min([max(metric1), max(metric2)]) | 
|  |  | 
|  | # No overlap means no sensible metric possible. | 
|  | if max_int <= min_int: | 
|  | return 0.0 | 
|  |  | 
|  | # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to | 
|  | # create 100 new samples points separated by interval. | 
|  | lin = np.linspace(min_int, max_int, num=100, retstep=True) | 
|  | interval = lin[1] | 
|  | samples = lin[0] | 
|  | v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples) | 
|  | v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples) | 
|  |  | 
|  | # Calculate the integral using the trapezoid method on the samples. | 
|  | int_v1 = np.trapz(v1, dx=interval) | 
|  | int_v2 = np.trapz(v2, dx=interval) | 
|  |  | 
|  | # Calculate the average improvement. | 
|  | avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) | 
|  |  | 
|  | except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: | 
|  | return 0 | 
|  |  | 
|  | # Convert to a percentage. | 
|  | avg_diff = (math.exp(avg_exp_diff) - 1) * 100 | 
|  |  | 
|  | return avg_diff | 
|  |  | 
|  |  | 
|  |  | 
|  | def FillForm(string_for_substitution, dictionary_of_vars): | 
|  | """ | 
|  | This function substitutes all matches of the command string //%% ... %%// | 
|  | with the variable represented by ...  . | 
|  | """ | 
|  | return_string = string_for_substitution | 
|  | for i in re.findall("//%%(.*)%%//", string_for_substitution): | 
|  | return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i], | 
|  | return_string) | 
|  | return return_string | 
|  |  | 
|  |  | 
|  | def HasMetrics(line): | 
|  | """ | 
|  | The metrics files produced by aomenc are started with a B for headers. | 
|  | """ | 
|  | # If the first char of the first word on the line is a digit | 
|  | if len(line) == 0: | 
|  | return False | 
|  | if len(line.split()) == 0: | 
|  | return False | 
|  | if line.split()[0][0:1].isdigit(): | 
|  | return True | 
|  | return False | 
|  |  | 
|  | def GetMetrics(file_name): | 
|  | metric_file = open(file_name, "r") | 
|  | return metric_file.readline().split(); | 
|  |  | 
|  | def ParseMetricFile(file_name, metric_column): | 
|  | metric_set1 = set([]) | 
|  | metric_file = open(file_name, "r") | 
|  | for line in metric_file: | 
|  | metrics = string.split(line) | 
|  | if HasMetrics(line): | 
|  | if metric_column < len(metrics): | 
|  | try: | 
|  | tuple = float(metrics[0]), float(metrics[metric_column]) | 
|  | except: | 
|  | tuple = float(metrics[0]), 0 | 
|  | else: | 
|  | tuple = float(metrics[0]), 0 | 
|  | metric_set1.add(tuple) | 
|  | metric_set1_sorted = sorted(metric_set1) | 
|  | return metric_set1_sorted | 
|  |  | 
|  |  | 
|  | def FileBetter(file_name_1, file_name_2, metric_column, method): | 
|  | """ | 
|  | Compares two data files and determines which is better and by how | 
|  | much. Also produces a histogram of how much better, by PSNR. | 
|  | metric_column is the metric. | 
|  | """ | 
|  | # Store and parse our two files into lists of unique tuples. | 
|  |  | 
|  | # Read the two files, parsing out lines starting with bitrate. | 
|  | metric_set1_sorted = ParseMetricFile(file_name_1, metric_column) | 
|  | metric_set2_sorted = ParseMetricFile(file_name_2, metric_column) | 
|  |  | 
|  |  | 
|  | def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2): | 
|  | """ | 
|  | Search through the sorted metric file for metrics on either side of | 
|  | the metric from file 1.  Since both lists are sorted we really | 
|  | should not have to search through the entire range, but these | 
|  | are small files.""" | 
|  | total_bitrate_difference_ratio = 0.0 | 
|  | count = 0 | 
|  | for bitrate, metric in metric_set1_sorted: | 
|  | if bitrate == 0: | 
|  | continue | 
|  | for i in range(len(metric_set2_sorted) - 1): | 
|  | s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i] | 
|  | s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1] | 
|  | # We have a point on either side of our metric range. | 
|  | if metric > s2_metric_0 and metric <= s2_metric_1: | 
|  |  | 
|  | # Calculate a slope. | 
|  | if s2_metric_1 - s2_metric_0 != 0: | 
|  | metric_slope = ((s2_bitrate_1 - s2_bitrate_0) / | 
|  | (s2_metric_1 - s2_metric_0)) | 
|  | else: | 
|  | metric_slope = 0 | 
|  |  | 
|  | estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) * | 
|  | metric_slope) | 
|  |  | 
|  | if estimated_s2_bitrate == 0: | 
|  | continue | 
|  | # Calculate percentage difference as given by base. | 
|  | if base_is_set_2 == 0: | 
|  | bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / | 
|  | bitrate) | 
|  | else: | 
|  | bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / | 
|  | estimated_s2_bitrate) | 
|  |  | 
|  | total_bitrate_difference_ratio += bitrate_difference_ratio | 
|  | count += 1 | 
|  | break | 
|  |  | 
|  | # Calculate the average improvement between graphs. | 
|  | if count != 0: | 
|  | avg = total_bitrate_difference_ratio / count | 
|  |  | 
|  | else: | 
|  | avg = 0.0 | 
|  |  | 
|  | return avg | 
|  |  | 
|  | # Be fair to both graphs by testing all the points in each. | 
|  | if method == 'avg': | 
|  | avg_improvement = 50 * ( | 
|  | GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) - | 
|  | GraphBetter(metric_set2_sorted, metric_set1_sorted, 0)) | 
|  | elif method == 'dsnr': | 
|  | avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted) | 
|  | else: | 
|  | avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted) | 
|  |  | 
|  | return avg_improvement | 
|  |  | 
|  |  | 
|  | def HandleFiles(variables): | 
|  | """ | 
|  | This script creates html for displaying metric data produced from data | 
|  | in a video stats file,  as created by the AOM project when enable_psnr | 
|  | is turned on: | 
|  |  | 
|  | Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..] | 
|  |  | 
|  | The script parses each metrics file [see below] that matches the | 
|  | statfile_pattern  in the baseline directory and looks for the file that | 
|  | matches that same file in each of the sub_dirs, and compares the resultant | 
|  | metrics bitrate, avg psnr, glb psnr, and ssim. " | 
|  |  | 
|  | It provides a table in which each row is a file in the line directory, | 
|  | and a column for each subdir, with the cells representing how that clip | 
|  | compares to baseline for that subdir.   A graph is given for each which | 
|  | compares file size to that metric.  If you click on a point in the graph it | 
|  | zooms in on that point. | 
|  |  | 
|  | a SAMPLE metrics file: | 
|  |  | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 25.911   38.242   38.104   38.258   38.121   75.790    14103 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 49.982   41.264   41.129   41.255   41.122   83.993    19817 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 74.967   42.911   42.767   42.899   42.756   87.928    17332 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 100.012   43.983   43.838   43.881   43.738   89.695    25389 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 149.980   45.338   45.203   45.184   45.043   91.591    25438 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 199.852   46.225   46.123   46.113   45.999   92.679    28302 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 249.922   46.864   46.773   46.777   46.673   93.334    27244 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 299.998   47.366   47.281   47.317   47.220   93.844    27137 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 349.769   47.746   47.677   47.722   47.648   94.178    32226 | 
|  | Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
|  | 399.773   48.032   47.971   48.013   47.946   94.362    36203 | 
|  |  | 
|  | sample use: | 
|  | visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html | 
|  | """ | 
|  |  | 
|  | # The template file is the html file into which we will write the | 
|  | # data from the stats file, formatted correctly for the gviz_api. | 
|  | template_file = open(variables[1], "r") | 
|  | page_template = template_file.read() | 
|  | template_file.close() | 
|  |  | 
|  | # This is the path match pattern for finding stats files amongst | 
|  | # all the other files it could be.  eg: *.stt | 
|  | file_pattern = variables[2] | 
|  |  | 
|  | # This is the directory with files that we will use to do the comparison | 
|  | # against. | 
|  | baseline_dir = variables[3] | 
|  | snrs = '' | 
|  | filestable = {} | 
|  |  | 
|  | filestable['dsnr'] = '' | 
|  | filestable['drate'] = '' | 
|  | filestable['avg'] = '' | 
|  |  | 
|  | # Dirs is directories after the baseline to compare to the base. | 
|  | dirs = variables[4:len(variables)] | 
|  |  | 
|  | # Find the metric files in the baseline directory. | 
|  | dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern)) | 
|  |  | 
|  | metrics = GetMetrics(baseline_dir + "/" + dir_list[0]) | 
|  |  | 
|  | metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];' | 
|  |  | 
|  | for column in range(1, len(metrics)): | 
|  |  | 
|  | for metric in ['avg','dsnr','drate']: | 
|  | description = {"file": ("string", "File")} | 
|  |  | 
|  | # Go through each directory and add a column header to our description. | 
|  | countoverall = {} | 
|  | sumoverall = {} | 
|  |  | 
|  | for directory in dirs: | 
|  | description[directory] = ("number", directory) | 
|  | countoverall[directory] = 0 | 
|  | sumoverall[directory] = 0 | 
|  |  | 
|  | # Data holds the data for the visualization, name given comes from | 
|  | # gviz_api sample code. | 
|  | data = [] | 
|  | for filename in dir_list: | 
|  | row = {'file': splitext(basename(filename))[0] } | 
|  | baseline_file_name = baseline_dir + "/" + filename | 
|  |  | 
|  | # Read the metric file from each of the directories in our list. | 
|  | for directory in dirs: | 
|  | metric_file_name = directory + "/" + filename | 
|  |  | 
|  | # If there is a metric file in the current directory, open it | 
|  | # and calculate its overall difference between it and the baseline | 
|  | # directory's metric file. | 
|  | if os.path.isfile(metric_file_name): | 
|  | overall = FileBetter(baseline_file_name, metric_file_name, | 
|  | column, metric) | 
|  | row[directory] = overall | 
|  |  | 
|  | sumoverall[directory] += overall | 
|  | countoverall[directory] += 1 | 
|  |  | 
|  | data.append(row) | 
|  |  | 
|  | # Add the overall numbers. | 
|  | row = {"file": "OVERALL" } | 
|  | for directory in dirs: | 
|  | row[directory] = sumoverall[directory] / countoverall[directory] | 
|  | data.append(row) | 
|  |  | 
|  | # write the tables out | 
|  | data_table = gviz_api.DataTable(description) | 
|  | data_table.LoadData(data) | 
|  |  | 
|  | filestable[metric] = ( filestable[metric] + "filestable_" + metric + | 
|  | "[" + str(column) + "]=" + | 
|  | data_table.ToJSon(columns_order=["file"]+dirs) + "\n" ) | 
|  |  | 
|  | filestable_avg = filestable['avg'] | 
|  | filestable_dpsnr = filestable['dsnr'] | 
|  | filestable_drate = filestable['drate'] | 
|  |  | 
|  | # Now we collect all the data for all the graphs.  First the column | 
|  | # headers which will be Datarate and then each directory. | 
|  | columns = ("datarate",baseline_dir) | 
|  | description = {"datarate":("number", "Datarate")} | 
|  | for directory in dirs: | 
|  | description[directory] = ("number", directory) | 
|  |  | 
|  | description[baseline_dir] = ("number", baseline_dir) | 
|  |  | 
|  | snrs = snrs + "snrs[" + str(column) + "] = [" | 
|  |  | 
|  | # Now collect the data for the graphs, file by file. | 
|  | for filename in dir_list: | 
|  |  | 
|  | data = [] | 
|  |  | 
|  | # Collect the file in each directory and store all of its metrics | 
|  | # in the associated gviz metrics table. | 
|  | all_dirs = dirs + [baseline_dir] | 
|  | for directory in all_dirs: | 
|  |  | 
|  | metric_file_name = directory + "/" + filename | 
|  | if not os.path.isfile(metric_file_name): | 
|  | continue | 
|  |  | 
|  | # Read and parse the metrics file storing it to the data we'll | 
|  | # use for the gviz_api.Datatable. | 
|  | metrics = ParseMetricFile(metric_file_name, column) | 
|  | for bitrate, metric in metrics: | 
|  | data.append({"datarate": bitrate, directory: metric}) | 
|  |  | 
|  | data_table = gviz_api.DataTable(description) | 
|  | data_table.LoadData(data) | 
|  | snrs = snrs + "'" + data_table.ToJSon( | 
|  | columns_order=tuple(["datarate",baseline_dir]+dirs)) + "'," | 
|  |  | 
|  | snrs = snrs + "]\n" | 
|  |  | 
|  | formatters = "" | 
|  | for i in range(len(dirs)): | 
|  | formatters = "%s   formatter.format(better, %d);" % (formatters, i+1) | 
|  |  | 
|  | print FillForm(page_template, vars()) | 
|  | return | 
|  |  | 
|  | if len(sys.argv) < 3: | 
|  | print HandleFiles.__doc__ | 
|  | else: | 
|  | HandleFiles(sys.argv) |