Tom Finegan | 9007d34 | 2017-12-14 07:47:13 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| 4 | # |
| 5 | # This source code is subject to the terms of the BSD 2 Clause License and |
| 6 | # the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 7 | # was not distributed with this source code in the LICENSE file, you can |
| 8 | # obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 9 | # Media Patent License 1.0 was not distributed with this source code in the |
| 10 | # PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 11 | # |
| 12 | |
| 13 | """Converts video encoding result data from text files to visualization |
| 14 | data source.""" |
| 15 | |
| 16 | __author__ = "jzern@google.com (James Zern)," |
| 17 | __author__ += "jimbankoski@google.com (Jim Bankoski)" |
| 18 | |
| 19 | import fnmatch |
| 20 | import numpy as np |
| 21 | import scipy as sp |
| 22 | import scipy.interpolate |
| 23 | import os |
| 24 | import re |
| 25 | import string |
| 26 | import sys |
| 27 | import math |
| 28 | import warnings |
| 29 | |
| 30 | import gviz_api |
| 31 | |
| 32 | from os.path import basename |
| 33 | from os.path import splitext |
| 34 | |
| 35 | warnings.simplefilter('ignore', np.RankWarning) |
| 36 | warnings.simplefilter('ignore', RuntimeWarning) |
| 37 | |
| 38 | def bdsnr2(metric_set1, metric_set2): |
| 39 | """ |
| 40 | BJONTEGAARD Bjontegaard metric calculation adapted |
| 41 | Bjontegaard's snr metric allows to compute the average % saving in decibels |
| 42 | between two rate-distortion curves [1]. This is an adaptation of that |
| 43 | method that fixes inconsistencies when the curve fit operation goes awry |
| 44 | by replacing the curve fit function with a Piecewise Cubic Hermite |
| 45 | Interpolating Polynomial and then integrating that by evaluating that |
| 46 | function at small intervals using the trapezoid method to calculate |
| 47 | the integral. |
| 48 | |
| 49 | metric_set1 - list of tuples ( bitrate, metric ) for first graph |
| 50 | metric_set2 - list of tuples ( bitrate, metric ) for second graph |
| 51 | """ |
| 52 | |
| 53 | if not metric_set1 or not metric_set2: |
| 54 | return 0.0 |
| 55 | |
| 56 | try: |
| 57 | |
| 58 | # pchip_interlopate requires keys sorted by x axis. x-axis will |
| 59 | # be our metric not the bitrate so sort by metric. |
| 60 | metric_set1.sort() |
| 61 | metric_set2.sort() |
| 62 | |
| 63 | # Pull the log of the rate and clamped psnr from metric_sets. |
| 64 | log_rate1 = [math.log(x[0]) for x in metric_set1] |
| 65 | metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] |
| 66 | log_rate2 = [math.log(x[0]) for x in metric_set2] |
| 67 | metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] |
| 68 | |
| 69 | # Integration interval. This metric only works on the area that's |
| 70 | # overlapping. Extrapolation of these things is sketchy so we avoid. |
| 71 | min_int = max([min(log_rate1), min(log_rate2)]) |
| 72 | max_int = min([max(log_rate1), max(log_rate2)]) |
| 73 | |
| 74 | # No overlap means no sensible metric possible. |
| 75 | if max_int <= min_int: |
| 76 | return 0.0 |
| 77 | |
| 78 | # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to |
| 79 | # create 100 new samples points separated by interval. |
| 80 | lin = np.linspace(min_int, max_int, num=100, retstep=True) |
| 81 | interval = lin[1] |
| 82 | samples = lin[0] |
| 83 | v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples) |
| 84 | v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples) |
| 85 | |
| 86 | # Calculate the integral using the trapezoid method on the samples. |
| 87 | int_v1 = np.trapz(v1, dx=interval) |
| 88 | int_v2 = np.trapz(v2, dx=interval) |
| 89 | |
| 90 | # Calculate the average improvement. |
| 91 | avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) |
| 92 | |
| 93 | except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: |
| 94 | return 0 |
| 95 | |
| 96 | return avg_exp_diff |
| 97 | |
| 98 | def bdrate2(metric_set1, metric_set2): |
| 99 | """ |
| 100 | BJONTEGAARD Bjontegaard metric calculation adapted |
| 101 | Bjontegaard's metric allows to compute the average % saving in bitrate |
| 102 | between two rate-distortion curves [1]. This is an adaptation of that |
| 103 | method that fixes inconsistencies when the curve fit operation goes awry |
| 104 | by replacing the curve fit function with a Piecewise Cubic Hermite |
| 105 | Interpolating Polynomial and then integrating that by evaluating that |
| 106 | function at small intervals using the trapezoid method to calculate |
| 107 | the integral. |
| 108 | |
| 109 | metric_set1 - list of tuples ( bitrate, metric ) for first graph |
| 110 | metric_set2 - list of tuples ( bitrate, metric ) for second graph |
| 111 | """ |
| 112 | |
| 113 | if not metric_set1 or not metric_set2: |
| 114 | return 0.0 |
| 115 | |
| 116 | try: |
| 117 | |
| 118 | # pchip_interlopate requires keys sorted by x axis. x-axis will |
| 119 | # be our metric not the bitrate so sort by metric. |
| 120 | metric_set1.sort(key=lambda tup: tup[1]) |
| 121 | metric_set2.sort(key=lambda tup: tup[1]) |
| 122 | |
| 123 | # Pull the log of the rate and clamped psnr from metric_sets. |
| 124 | log_rate1 = [math.log(x[0]) for x in metric_set1] |
| 125 | metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] |
| 126 | log_rate2 = [math.log(x[0]) for x in metric_set2] |
| 127 | metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] |
| 128 | |
| 129 | # Integration interval. This metric only works on the area that's |
| 130 | # overlapping. Extrapolation of these things is sketchy so we avoid. |
| 131 | min_int = max([min(metric1), min(metric2)]) |
| 132 | max_int = min([max(metric1), max(metric2)]) |
| 133 | |
| 134 | # No overlap means no sensible metric possible. |
| 135 | if max_int <= min_int: |
| 136 | return 0.0 |
| 137 | |
| 138 | # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to |
| 139 | # create 100 new samples points separated by interval. |
| 140 | lin = np.linspace(min_int, max_int, num=100, retstep=True) |
| 141 | interval = lin[1] |
| 142 | samples = lin[0] |
| 143 | v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples) |
| 144 | v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples) |
| 145 | |
| 146 | # Calculate the integral using the trapezoid method on the samples. |
| 147 | int_v1 = np.trapz(v1, dx=interval) |
| 148 | int_v2 = np.trapz(v2, dx=interval) |
| 149 | |
| 150 | # Calculate the average improvement. |
| 151 | avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) |
| 152 | |
| 153 | except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: |
| 154 | return 0 |
| 155 | |
| 156 | # Convert to a percentage. |
| 157 | avg_diff = (math.exp(avg_exp_diff) - 1) * 100 |
| 158 | |
| 159 | return avg_diff |
| 160 | |
| 161 | |
| 162 | |
| 163 | def FillForm(string_for_substitution, dictionary_of_vars): |
| 164 | """ |
| 165 | This function substitutes all matches of the command string //%% ... %%// |
| 166 | with the variable represented by ... . |
| 167 | """ |
| 168 | return_string = string_for_substitution |
| 169 | for i in re.findall("//%%(.*)%%//", string_for_substitution): |
| 170 | return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i], |
| 171 | return_string) |
| 172 | return return_string |
| 173 | |
| 174 | |
| 175 | def HasMetrics(line): |
| 176 | """ |
| 177 | The metrics files produced by aomenc are started with a B for headers. |
| 178 | """ |
| 179 | # If the first char of the first word on the line is a digit |
| 180 | if len(line) == 0: |
| 181 | return False |
| 182 | if len(line.split()) == 0: |
| 183 | return False |
| 184 | if line.split()[0][0:1].isdigit(): |
| 185 | return True |
| 186 | return False |
| 187 | |
| 188 | def GetMetrics(file_name): |
| 189 | metric_file = open(file_name, "r") |
| 190 | return metric_file.readline().split(); |
| 191 | |
| 192 | def ParseMetricFile(file_name, metric_column): |
| 193 | metric_set1 = set([]) |
| 194 | metric_file = open(file_name, "r") |
| 195 | for line in metric_file: |
| 196 | metrics = string.split(line) |
| 197 | if HasMetrics(line): |
| 198 | if metric_column < len(metrics): |
| 199 | try: |
| 200 | tuple = float(metrics[0]), float(metrics[metric_column]) |
| 201 | except: |
| 202 | tuple = float(metrics[0]), 0 |
| 203 | else: |
| 204 | tuple = float(metrics[0]), 0 |
| 205 | metric_set1.add(tuple) |
| 206 | metric_set1_sorted = sorted(metric_set1) |
| 207 | return metric_set1_sorted |
| 208 | |
| 209 | |
| 210 | def FileBetter(file_name_1, file_name_2, metric_column, method): |
| 211 | """ |
| 212 | Compares two data files and determines which is better and by how |
| 213 | much. Also produces a histogram of how much better, by PSNR. |
| 214 | metric_column is the metric. |
| 215 | """ |
| 216 | # Store and parse our two files into lists of unique tuples. |
| 217 | |
| 218 | # Read the two files, parsing out lines starting with bitrate. |
| 219 | metric_set1_sorted = ParseMetricFile(file_name_1, metric_column) |
| 220 | metric_set2_sorted = ParseMetricFile(file_name_2, metric_column) |
| 221 | |
| 222 | |
| 223 | def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2): |
| 224 | """ |
| 225 | Search through the sorted metric file for metrics on either side of |
| 226 | the metric from file 1. Since both lists are sorted we really |
| 227 | should not have to search through the entire range, but these |
| 228 | are small files.""" |
| 229 | total_bitrate_difference_ratio = 0.0 |
| 230 | count = 0 |
| 231 | for bitrate, metric in metric_set1_sorted: |
| 232 | if bitrate == 0: |
| 233 | continue |
| 234 | for i in range(len(metric_set2_sorted) - 1): |
| 235 | s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i] |
| 236 | s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1] |
| 237 | # We have a point on either side of our metric range. |
| 238 | if metric > s2_metric_0 and metric <= s2_metric_1: |
| 239 | |
| 240 | # Calculate a slope. |
| 241 | if s2_metric_1 - s2_metric_0 != 0: |
| 242 | metric_slope = ((s2_bitrate_1 - s2_bitrate_0) / |
| 243 | (s2_metric_1 - s2_metric_0)) |
| 244 | else: |
| 245 | metric_slope = 0 |
| 246 | |
| 247 | estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) * |
| 248 | metric_slope) |
| 249 | |
| 250 | if estimated_s2_bitrate == 0: |
| 251 | continue |
| 252 | # Calculate percentage difference as given by base. |
| 253 | if base_is_set_2 == 0: |
| 254 | bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / |
| 255 | bitrate) |
| 256 | else: |
| 257 | bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / |
| 258 | estimated_s2_bitrate) |
| 259 | |
| 260 | total_bitrate_difference_ratio += bitrate_difference_ratio |
| 261 | count += 1 |
| 262 | break |
| 263 | |
| 264 | # Calculate the average improvement between graphs. |
| 265 | if count != 0: |
| 266 | avg = total_bitrate_difference_ratio / count |
| 267 | |
| 268 | else: |
| 269 | avg = 0.0 |
| 270 | |
| 271 | return avg |
| 272 | |
| 273 | # Be fair to both graphs by testing all the points in each. |
| 274 | if method == 'avg': |
| 275 | avg_improvement = 50 * ( |
| 276 | GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) - |
| 277 | GraphBetter(metric_set2_sorted, metric_set1_sorted, 0)) |
| 278 | elif method == 'dsnr': |
| 279 | avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted) |
| 280 | else: |
| 281 | avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted) |
| 282 | |
| 283 | return avg_improvement |
| 284 | |
| 285 | |
| 286 | def HandleFiles(variables): |
| 287 | """ |
| 288 | This script creates html for displaying metric data produced from data |
| 289 | in a video stats file, as created by the AOM project when enable_psnr |
| 290 | is turned on: |
| 291 | |
| 292 | Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..] |
| 293 | |
| 294 | The script parses each metrics file [see below] that matches the |
| 295 | statfile_pattern in the baseline directory and looks for the file that |
| 296 | matches that same file in each of the sub_dirs, and compares the resultant |
| 297 | metrics bitrate, avg psnr, glb psnr, and ssim. " |
| 298 | |
| 299 | It provides a table in which each row is a file in the line directory, |
| 300 | and a column for each subdir, with the cells representing how that clip |
| 301 | compares to baseline for that subdir. A graph is given for each which |
| 302 | compares filesize to that metric. If you click on a point in the graph it |
| 303 | zooms in on that point. |
| 304 | |
| 305 | a SAMPLE metrics file: |
| 306 | |
| 307 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 308 | 25.911 38.242 38.104 38.258 38.121 75.790 14103 |
| 309 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 310 | 49.982 41.264 41.129 41.255 41.122 83.993 19817 |
| 311 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 312 | 74.967 42.911 42.767 42.899 42.756 87.928 17332 |
| 313 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 314 | 100.012 43.983 43.838 43.881 43.738 89.695 25389 |
| 315 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 316 | 149.980 45.338 45.203 45.184 45.043 91.591 25438 |
| 317 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 318 | 199.852 46.225 46.123 46.113 45.999 92.679 28302 |
| 319 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 320 | 249.922 46.864 46.773 46.777 46.673 93.334 27244 |
| 321 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 322 | 299.998 47.366 47.281 47.317 47.220 93.844 27137 |
| 323 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 324 | 349.769 47.746 47.677 47.722 47.648 94.178 32226 |
| 325 | Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) |
| 326 | 399.773 48.032 47.971 48.013 47.946 94.362 36203 |
| 327 | |
| 328 | sample use: |
| 329 | visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html |
| 330 | """ |
| 331 | |
| 332 | # The template file is the html file into which we will write the |
| 333 | # data from the stats file, formatted correctly for the gviz_api. |
| 334 | template_file = open(variables[1], "r") |
| 335 | page_template = template_file.read() |
| 336 | template_file.close() |
| 337 | |
| 338 | # This is the path match pattern for finding stats files amongst |
| 339 | # all the other files it could be. eg: *.stt |
| 340 | file_pattern = variables[2] |
| 341 | |
| 342 | # This is the directory with files that we will use to do the comparison |
| 343 | # against. |
| 344 | baseline_dir = variables[3] |
| 345 | snrs = '' |
| 346 | filestable = {} |
| 347 | |
| 348 | filestable['dsnr'] = '' |
| 349 | filestable['drate'] = '' |
| 350 | filestable['avg'] = '' |
| 351 | |
| 352 | # Dirs is directories after the baseline to compare to the base. |
| 353 | dirs = variables[4:len(variables)] |
| 354 | |
| 355 | # Find the metric files in the baseline directory. |
| 356 | dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern)) |
| 357 | |
| 358 | metrics = GetMetrics(baseline_dir + "/" + dir_list[0]) |
| 359 | |
| 360 | metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];' |
| 361 | |
| 362 | for column in range(1, len(metrics)): |
| 363 | |
| 364 | for metric in ['avg','dsnr','drate']: |
| 365 | description = {"file": ("string", "File")} |
| 366 | |
| 367 | # Go through each directory and add a column header to our description. |
| 368 | countoverall = {} |
| 369 | sumoverall = {} |
| 370 | |
| 371 | for directory in dirs: |
| 372 | description[directory] = ("number", directory) |
| 373 | countoverall[directory] = 0 |
| 374 | sumoverall[directory] = 0 |
| 375 | |
| 376 | # Data holds the data for the visualization, name given comes from |
| 377 | # gviz_api sample code. |
| 378 | data = [] |
| 379 | for filename in dir_list: |
| 380 | row = {'file': splitext(basename(filename))[0] } |
| 381 | baseline_file_name = baseline_dir + "/" + filename |
| 382 | |
| 383 | # Read the metric file from each of the directories in our list. |
| 384 | for directory in dirs: |
| 385 | metric_file_name = directory + "/" + filename |
| 386 | |
| 387 | # If there is a metric file in the current directory, open it |
| 388 | # and calculate its overall difference between it and the baseline |
| 389 | # directory's metric file. |
| 390 | if os.path.isfile(metric_file_name): |
| 391 | overall = FileBetter(baseline_file_name, metric_file_name, |
| 392 | column, metric) |
| 393 | row[directory] = overall |
| 394 | |
| 395 | sumoverall[directory] += overall |
| 396 | countoverall[directory] += 1 |
| 397 | |
| 398 | data.append(row) |
| 399 | |
| 400 | # Add the overall numbers. |
| 401 | row = {"file": "OVERALL" } |
| 402 | for directory in dirs: |
| 403 | row[directory] = sumoverall[directory] / countoverall[directory] |
| 404 | data.append(row) |
| 405 | |
| 406 | # write the tables out |
| 407 | data_table = gviz_api.DataTable(description) |
| 408 | data_table.LoadData(data) |
| 409 | |
| 410 | filestable[metric] = ( filestable[metric] + "filestable_" + metric + |
| 411 | "[" + str(column) + "]=" + |
| 412 | data_table.ToJSon(columns_order=["file"]+dirs) + "\n" ) |
| 413 | |
| 414 | filestable_avg = filestable['avg'] |
| 415 | filestable_dpsnr = filestable['dsnr'] |
| 416 | filestable_drate = filestable['drate'] |
| 417 | |
| 418 | # Now we collect all the data for all the graphs. First the column |
| 419 | # headers which will be Datarate and then each directory. |
| 420 | columns = ("datarate",baseline_dir) |
| 421 | description = {"datarate":("number", "Datarate")} |
| 422 | for directory in dirs: |
| 423 | description[directory] = ("number", directory) |
| 424 | |
| 425 | description[baseline_dir] = ("number", baseline_dir) |
| 426 | |
| 427 | snrs = snrs + "snrs[" + str(column) + "] = [" |
| 428 | |
| 429 | # Now collect the data for the graphs, file by file. |
| 430 | for filename in dir_list: |
| 431 | |
| 432 | data = [] |
| 433 | |
| 434 | # Collect the file in each directory and store all of its metrics |
| 435 | # in the associated gviz metrics table. |
| 436 | all_dirs = dirs + [baseline_dir] |
| 437 | for directory in all_dirs: |
| 438 | |
| 439 | metric_file_name = directory + "/" + filename |
| 440 | if not os.path.isfile(metric_file_name): |
| 441 | continue |
| 442 | |
| 443 | # Read and parse the metrics file storing it to the data we'll |
| 444 | # use for the gviz_api.Datatable. |
| 445 | metrics = ParseMetricFile(metric_file_name, column) |
| 446 | for bitrate, metric in metrics: |
| 447 | data.append({"datarate": bitrate, directory: metric}) |
| 448 | |
| 449 | data_table = gviz_api.DataTable(description) |
| 450 | data_table.LoadData(data) |
| 451 | snrs = snrs + "'" + data_table.ToJSon( |
| 452 | columns_order=tuple(["datarate",baseline_dir]+dirs)) + "'," |
| 453 | |
| 454 | snrs = snrs + "]\n" |
| 455 | |
| 456 | formatters = "" |
| 457 | for i in range(len(dirs)): |
| 458 | formatters = "%s formatter.format(better, %d);" % (formatters, i+1) |
| 459 | |
| 460 | print FillForm(page_template, vars()) |
| 461 | return |
| 462 | |
| 463 | if len(sys.argv) < 3: |
| 464 | print HandleFiles.__doc__ |
| 465 | else: |
| 466 | HandleFiles(sys.argv) |