| #!/usr/bin/python | 
 | # | 
 | # Copyright (c) 2016, Alliance for Open Media. All rights reserved. | 
 | # | 
 | # This source code is subject to the terms of the BSD 2 Clause License and | 
 | # the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
 | # was not distributed with this source code in the LICENSE file, you can | 
 | # obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
 | # Media Patent License 1.0 was not distributed with this source code in the | 
 | # PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
 | # | 
 |  | 
 | """Converts video encoding result data from text files to visualization | 
 | data source.""" | 
 |  | 
 | __author__ = "jzern@google.com (James Zern)," | 
 | __author__ += "jimbankoski@google.com (Jim Bankoski)" | 
 |  | 
 | import fnmatch | 
 | import numpy as np | 
 | import scipy as sp | 
 | import scipy.interpolate | 
 | import os | 
 | import re | 
 | import string | 
 | import sys | 
 | import math | 
 | import warnings | 
 |  | 
 | import gviz_api | 
 |  | 
 | from os.path import basename | 
 | from os.path import splitext | 
 |  | 
 | warnings.simplefilter('ignore', np.RankWarning) | 
 | warnings.simplefilter('ignore', RuntimeWarning) | 
 |  | 
 | def bdsnr2(metric_set1, metric_set2): | 
 |   """ | 
 |   BJONTEGAARD    Bjontegaard metric calculation adapted | 
 |   Bjontegaard's snr metric allows to compute the average % saving in decibels | 
 |   between two rate-distortion curves [1].  This is an adaptation of that | 
 |   method that fixes inconsistencies when the curve fit operation goes awry | 
 |   by replacing the curve fit function with a Piecewise Cubic Hermite | 
 |   Interpolating Polynomial and then integrating that by evaluating that | 
 |   function at small intervals using the trapezoid method to calculate | 
 |   the integral. | 
 |  | 
 |   metric_set1 - list of tuples ( bitrate,  metric ) for first graph | 
 |   metric_set2 - list of tuples ( bitrate,  metric ) for second graph | 
 |   """ | 
 |  | 
 |   if not metric_set1 or not metric_set2: | 
 |     return 0.0 | 
 |  | 
 |   try: | 
 |  | 
 |     # pchip_interlopate requires keys sorted by x axis. x-axis will | 
 |     # be our metric not the bitrate so sort by metric. | 
 |     metric_set1.sort() | 
 |     metric_set2.sort() | 
 |  | 
 |     # Pull the log of the rate and clamped psnr from metric_sets. | 
 |     log_rate1 = [math.log(x[0]) for x in metric_set1] | 
 |     metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] | 
 |     log_rate2 = [math.log(x[0]) for x in metric_set2] | 
 |     metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] | 
 |  | 
 |     # Integration interval.  This metric only works on the area that's | 
 |     # overlapping.   Extrapolation of these things is sketchy so we avoid. | 
 |     min_int = max([min(log_rate1), min(log_rate2)]) | 
 |     max_int = min([max(log_rate1), max(log_rate2)]) | 
 |  | 
 |     # No overlap means no sensible metric possible. | 
 |     if max_int <= min_int: | 
 |       return 0.0 | 
 |  | 
 |     # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to | 
 |     # create 100 new samples points separated by interval. | 
 |     lin = np.linspace(min_int, max_int, num=100, retstep=True) | 
 |     interval = lin[1] | 
 |     samples = lin[0] | 
 |     v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples) | 
 |     v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples) | 
 |  | 
 |     # Calculate the integral using the trapezoid method on the samples. | 
 |     int_v1 = np.trapz(v1, dx=interval) | 
 |     int_v2 = np.trapz(v2, dx=interval) | 
 |  | 
 |     # Calculate the average improvement. | 
 |     avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) | 
 |  | 
 |   except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: | 
 |     return 0 | 
 |  | 
 |   return avg_exp_diff | 
 |  | 
 | def bdrate2(metric_set1, metric_set2): | 
 |   """ | 
 |   BJONTEGAARD    Bjontegaard metric calculation adapted | 
 |   Bjontegaard's metric allows to compute the average % saving in bitrate | 
 |   between two rate-distortion curves [1].  This is an adaptation of that | 
 |   method that fixes inconsistencies when the curve fit operation goes awry | 
 |   by replacing the curve fit function with a Piecewise Cubic Hermite | 
 |   Interpolating Polynomial and then integrating that by evaluating that | 
 |   function at small intervals using the trapezoid method to calculate | 
 |   the integral. | 
 |  | 
 |   metric_set1 - list of tuples ( bitrate,  metric ) for first graph | 
 |   metric_set2 - list of tuples ( bitrate,  metric ) for second graph | 
 |   """ | 
 |  | 
 |   if not metric_set1 or not metric_set2: | 
 |     return 0.0 | 
 |  | 
 |   try: | 
 |  | 
 |     # pchip_interlopate requires keys sorted by x axis. x-axis will | 
 |     # be our metric not the bitrate so sort by metric. | 
 |     metric_set1.sort(key=lambda tup: tup[1]) | 
 |     metric_set2.sort(key=lambda tup: tup[1]) | 
 |  | 
 |     # Pull the log of the rate and clamped psnr from metric_sets. | 
 |     log_rate1 = [math.log(x[0]) for x in metric_set1] | 
 |     metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] | 
 |     log_rate2 = [math.log(x[0]) for x in metric_set2] | 
 |     metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] | 
 |  | 
 |     # Integration interval.  This metric only works on the area that's | 
 |     # overlapping.   Extrapolation of these things is sketchy so we avoid. | 
 |     min_int = max([min(metric1), min(metric2)]) | 
 |     max_int = min([max(metric1), max(metric2)]) | 
 |  | 
 |     # No overlap means no sensible metric possible. | 
 |     if max_int <= min_int: | 
 |       return 0.0 | 
 |  | 
 |     # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to | 
 |     # create 100 new samples points separated by interval. | 
 |     lin = np.linspace(min_int, max_int, num=100, retstep=True) | 
 |     interval = lin[1] | 
 |     samples = lin[0] | 
 |     v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples) | 
 |     v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples) | 
 |  | 
 |     # Calculate the integral using the trapezoid method on the samples. | 
 |     int_v1 = np.trapz(v1, dx=interval) | 
 |     int_v2 = np.trapz(v2, dx=interval) | 
 |  | 
 |     # Calculate the average improvement. | 
 |     avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) | 
 |  | 
 |   except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: | 
 |     return 0 | 
 |  | 
 |   # Convert to a percentage. | 
 |   avg_diff = (math.exp(avg_exp_diff) - 1) * 100 | 
 |  | 
 |   return avg_diff | 
 |  | 
 |  | 
 |  | 
 | def FillForm(string_for_substitution, dictionary_of_vars): | 
 |   """ | 
 |   This function substitutes all matches of the command string //%% ... %%// | 
 |   with the variable represented by ...  . | 
 |   """ | 
 |   return_string = string_for_substitution | 
 |   for i in re.findall("//%%(.*)%%//", string_for_substitution): | 
 |     return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i], | 
 |                            return_string) | 
 |   return return_string | 
 |  | 
 |  | 
 | def HasMetrics(line): | 
 |   """ | 
 |   The metrics files produced by aomenc are started with a B for headers. | 
 |   """ | 
 |   # If the first char of the first word on the line is a digit | 
 |   if len(line) == 0: | 
 |     return False | 
 |   if len(line.split()) == 0: | 
 |     return False | 
 |   if line.split()[0][0:1].isdigit(): | 
 |     return True | 
 |   return False | 
 |  | 
 | def GetMetrics(file_name): | 
 |   metric_file = open(file_name, "r") | 
 |   return metric_file.readline().split(); | 
 |  | 
 | def ParseMetricFile(file_name, metric_column): | 
 |   metric_set1 = set([]) | 
 |   metric_file = open(file_name, "r") | 
 |   for line in metric_file: | 
 |     metrics = string.split(line) | 
 |     if HasMetrics(line): | 
 |       if metric_column < len(metrics): | 
 |         try: | 
 |           tuple = float(metrics[0]), float(metrics[metric_column]) | 
 |         except: | 
 |           tuple = float(metrics[0]), 0 | 
 |       else: | 
 |         tuple = float(metrics[0]), 0 | 
 |       metric_set1.add(tuple) | 
 |   metric_set1_sorted = sorted(metric_set1) | 
 |   return metric_set1_sorted | 
 |  | 
 |  | 
 | def FileBetter(file_name_1, file_name_2, metric_column, method): | 
 |   """ | 
 |   Compares two data files and determines which is better and by how | 
 |   much. Also produces a histogram of how much better, by PSNR. | 
 |   metric_column is the metric. | 
 |   """ | 
 |   # Store and parse our two files into lists of unique tuples. | 
 |  | 
 |   # Read the two files, parsing out lines starting with bitrate. | 
 |   metric_set1_sorted = ParseMetricFile(file_name_1, metric_column) | 
 |   metric_set2_sorted = ParseMetricFile(file_name_2, metric_column) | 
 |  | 
 |  | 
 |   def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2): | 
 |     """ | 
 |     Search through the sorted metric file for metrics on either side of | 
 |     the metric from file 1.  Since both lists are sorted we really | 
 |     should not have to search through the entire range, but these | 
 |     are small files.""" | 
 |     total_bitrate_difference_ratio = 0.0 | 
 |     count = 0 | 
 |     for bitrate, metric in metric_set1_sorted: | 
 |       if bitrate == 0: | 
 |         continue | 
 |       for i in range(len(metric_set2_sorted) - 1): | 
 |         s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i] | 
 |         s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1] | 
 |         # We have a point on either side of our metric range. | 
 |         if metric > s2_metric_0 and metric <= s2_metric_1: | 
 |  | 
 |           # Calculate a slope. | 
 |           if s2_metric_1 - s2_metric_0 != 0: | 
 |             metric_slope = ((s2_bitrate_1 - s2_bitrate_0) / | 
 |                             (s2_metric_1 - s2_metric_0)) | 
 |           else: | 
 |             metric_slope = 0 | 
 |  | 
 |           estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) * | 
 |                                   metric_slope) | 
 |  | 
 |           if estimated_s2_bitrate == 0: | 
 |             continue | 
 |           # Calculate percentage difference as given by base. | 
 |           if base_is_set_2 == 0: | 
 |             bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / | 
 |                                         bitrate) | 
 |           else: | 
 |             bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / | 
 |                                         estimated_s2_bitrate) | 
 |  | 
 |           total_bitrate_difference_ratio += bitrate_difference_ratio | 
 |           count += 1 | 
 |           break | 
 |  | 
 |     # Calculate the average improvement between graphs. | 
 |     if count != 0: | 
 |       avg = total_bitrate_difference_ratio / count | 
 |  | 
 |     else: | 
 |       avg = 0.0 | 
 |  | 
 |     return avg | 
 |  | 
 |   # Be fair to both graphs by testing all the points in each. | 
 |   if method == 'avg': | 
 |     avg_improvement = 50 * ( | 
 |                        GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) - | 
 |                        GraphBetter(metric_set2_sorted, metric_set1_sorted, 0)) | 
 |   elif method == 'dsnr': | 
 |       avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted) | 
 |   else: | 
 |       avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted) | 
 |  | 
 |   return avg_improvement | 
 |  | 
 |  | 
 | def HandleFiles(variables): | 
 |   """ | 
 |   This script creates html for displaying metric data produced from data | 
 |   in a video stats file,  as created by the AOM project when enable_psnr | 
 |   is turned on: | 
 |  | 
 |   Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..] | 
 |  | 
 |   The script parses each metrics file [see below] that matches the | 
 |   statfile_pattern  in the baseline directory and looks for the file that | 
 |   matches that same file in each of the sub_dirs, and compares the resultant | 
 |   metrics bitrate, avg psnr, glb psnr, and ssim. " | 
 |  | 
 |   It provides a table in which each row is a file in the line directory, | 
 |   and a column for each subdir, with the cells representing how that clip | 
 |   compares to baseline for that subdir.   A graph is given for each which | 
 |   compares file size to that metric.  If you click on a point in the graph it | 
 |   zooms in on that point. | 
 |  | 
 |   a SAMPLE metrics file: | 
 |  | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |    25.911   38.242   38.104   38.258   38.121   75.790    14103 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |    49.982   41.264   41.129   41.255   41.122   83.993    19817 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |    74.967   42.911   42.767   42.899   42.756   87.928    17332 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   100.012   43.983   43.838   43.881   43.738   89.695    25389 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   149.980   45.338   45.203   45.184   45.043   91.591    25438 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   199.852   46.225   46.123   46.113   45.999   92.679    28302 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   249.922   46.864   46.773   46.777   46.673   93.334    27244 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   299.998   47.366   47.281   47.317   47.220   93.844    27137 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   349.769   47.746   47.677   47.722   47.648   94.178    32226 | 
 |   Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us) | 
 |   399.773   48.032   47.971   48.013   47.946   94.362    36203 | 
 |  | 
 |   sample use: | 
 |   visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html | 
 |   """ | 
 |  | 
 |   # The template file is the html file into which we will write the | 
 |   # data from the stats file, formatted correctly for the gviz_api. | 
 |   template_file = open(variables[1], "r") | 
 |   page_template = template_file.read() | 
 |   template_file.close() | 
 |  | 
 |   # This is the path match pattern for finding stats files amongst | 
 |   # all the other files it could be.  eg: *.stt | 
 |   file_pattern = variables[2] | 
 |  | 
 |   # This is the directory with files that we will use to do the comparison | 
 |   # against. | 
 |   baseline_dir = variables[3] | 
 |   snrs = '' | 
 |   filestable = {} | 
 |  | 
 |   filestable['dsnr'] = '' | 
 |   filestable['drate'] = '' | 
 |   filestable['avg'] = '' | 
 |  | 
 |   # Dirs is directories after the baseline to compare to the base. | 
 |   dirs = variables[4:len(variables)] | 
 |  | 
 |   # Find the metric files in the baseline directory. | 
 |   dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern)) | 
 |  | 
 |   metrics = GetMetrics(baseline_dir + "/" + dir_list[0]) | 
 |  | 
 |   metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];' | 
 |  | 
 |   for column in range(1, len(metrics)): | 
 |  | 
 |     for metric in ['avg','dsnr','drate']: | 
 |       description = {"file": ("string", "File")} | 
 |  | 
 |       # Go through each directory and add a column header to our description. | 
 |       countoverall = {} | 
 |       sumoverall = {} | 
 |  | 
 |       for directory in dirs: | 
 |         description[directory] = ("number", directory) | 
 |         countoverall[directory] = 0 | 
 |         sumoverall[directory] = 0 | 
 |  | 
 |       # Data holds the data for the visualization, name given comes from | 
 |       # gviz_api sample code. | 
 |       data = [] | 
 |       for filename in dir_list: | 
 |         row = {'file': splitext(basename(filename))[0] } | 
 |         baseline_file_name = baseline_dir + "/" + filename | 
 |  | 
 |         # Read the metric file from each of the directories in our list. | 
 |         for directory in dirs: | 
 |           metric_file_name = directory + "/" + filename | 
 |  | 
 |           # If there is a metric file in the current directory, open it | 
 |           # and calculate its overall difference between it and the baseline | 
 |           # directory's metric file. | 
 |           if os.path.isfile(metric_file_name): | 
 |             overall = FileBetter(baseline_file_name, metric_file_name, | 
 |                                  column, metric) | 
 |             row[directory] = overall | 
 |  | 
 |             sumoverall[directory] += overall | 
 |             countoverall[directory] += 1 | 
 |  | 
 |         data.append(row) | 
 |  | 
 |       # Add the overall numbers. | 
 |       row = {"file": "OVERALL" } | 
 |       for directory in dirs: | 
 |         row[directory] = sumoverall[directory] / countoverall[directory] | 
 |       data.append(row) | 
 |  | 
 |       # write the tables out | 
 |       data_table = gviz_api.DataTable(description) | 
 |       data_table.LoadData(data) | 
 |  | 
 |       filestable[metric] = ( filestable[metric] + "filestable_" + metric + | 
 |                              "[" + str(column) + "]=" + | 
 |                              data_table.ToJSon(columns_order=["file"]+dirs) + "\n" ) | 
 |  | 
 |     filestable_avg = filestable['avg'] | 
 |     filestable_dpsnr = filestable['dsnr'] | 
 |     filestable_drate = filestable['drate'] | 
 |  | 
 |     # Now we collect all the data for all the graphs.  First the column | 
 |     # headers which will be Datarate and then each directory. | 
 |     columns = ("datarate",baseline_dir) | 
 |     description = {"datarate":("number", "Datarate")} | 
 |     for directory in dirs: | 
 |       description[directory] = ("number", directory) | 
 |  | 
 |     description[baseline_dir] = ("number", baseline_dir) | 
 |  | 
 |     snrs = snrs + "snrs[" + str(column) + "] = [" | 
 |  | 
 |     # Now collect the data for the graphs, file by file. | 
 |     for filename in dir_list: | 
 |  | 
 |       data = [] | 
 |  | 
 |       # Collect the file in each directory and store all of its metrics | 
 |       # in the associated gviz metrics table. | 
 |       all_dirs = dirs + [baseline_dir] | 
 |       for directory in all_dirs: | 
 |  | 
 |         metric_file_name = directory + "/" + filename | 
 |         if not os.path.isfile(metric_file_name): | 
 |           continue | 
 |  | 
 |         # Read and parse the metrics file storing it to the data we'll | 
 |         # use for the gviz_api.Datatable. | 
 |         metrics = ParseMetricFile(metric_file_name, column) | 
 |         for bitrate, metric in metrics: | 
 |           data.append({"datarate": bitrate, directory: metric}) | 
 |  | 
 |       data_table = gviz_api.DataTable(description) | 
 |       data_table.LoadData(data) | 
 |       snrs = snrs + "'" + data_table.ToJSon( | 
 |          columns_order=tuple(["datarate",baseline_dir]+dirs)) + "'," | 
 |  | 
 |     snrs = snrs + "]\n" | 
 |  | 
 |     formatters = "" | 
 |     for i in range(len(dirs)): | 
 |       formatters = "%s   formatter.format(better, %d);" % (formatters, i+1) | 
 |  | 
 |   print FillForm(page_template, vars()) | 
 |   return | 
 |  | 
 | if len(sys.argv) < 3: | 
 |   print HandleFiles.__doc__ | 
 | else: | 
 |   HandleFiles(sys.argv) |