blob: 9055feb334bcc8a768fec1650da056ab5cd10002 [file] [log] [blame]
Tom Finegan9007d342017-12-14 07:47:13 -08001#!/usr/bin/python
2#
3# Copyright (c) 2016, Alliance for Open Media. All rights reserved
4#
5# This source code is subject to the terms of the BSD 2 Clause License and
6# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7# was not distributed with this source code in the LICENSE file, you can
8# obtain it at www.aomedia.org/license/software. If the Alliance for Open
9# Media Patent License 1.0 was not distributed with this source code in the
10# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
11#
12
13"""Converts video encoding result data from text files to visualization
14data source."""
15
16__author__ = "jzern@google.com (James Zern),"
17__author__ += "jimbankoski@google.com (Jim Bankoski)"
18
19import fnmatch
20import numpy as np
21import scipy as sp
22import scipy.interpolate
23import os
24import re
25import string
26import sys
27import math
28import warnings
29
30import gviz_api
31
32from os.path import basename
33from os.path import splitext
34
35warnings.simplefilter('ignore', np.RankWarning)
36warnings.simplefilter('ignore', RuntimeWarning)
37
38def bdsnr2(metric_set1, metric_set2):
39 """
40 BJONTEGAARD Bjontegaard metric calculation adapted
41 Bjontegaard's snr metric allows to compute the average % saving in decibels
42 between two rate-distortion curves [1]. This is an adaptation of that
43 method that fixes inconsistencies when the curve fit operation goes awry
44 by replacing the curve fit function with a Piecewise Cubic Hermite
45 Interpolating Polynomial and then integrating that by evaluating that
46 function at small intervals using the trapezoid method to calculate
47 the integral.
48
49 metric_set1 - list of tuples ( bitrate, metric ) for first graph
50 metric_set2 - list of tuples ( bitrate, metric ) for second graph
51 """
52
53 if not metric_set1 or not metric_set2:
54 return 0.0
55
56 try:
57
58 # pchip_interlopate requires keys sorted by x axis. x-axis will
59 # be our metric not the bitrate so sort by metric.
60 metric_set1.sort()
61 metric_set2.sort()
62
63 # Pull the log of the rate and clamped psnr from metric_sets.
64 log_rate1 = [math.log(x[0]) for x in metric_set1]
65 metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
66 log_rate2 = [math.log(x[0]) for x in metric_set2]
67 metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
68
69 # Integration interval. This metric only works on the area that's
70 # overlapping. Extrapolation of these things is sketchy so we avoid.
71 min_int = max([min(log_rate1), min(log_rate2)])
72 max_int = min([max(log_rate1), max(log_rate2)])
73
74 # No overlap means no sensible metric possible.
75 if max_int <= min_int:
76 return 0.0
77
78 # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
79 # create 100 new samples points separated by interval.
80 lin = np.linspace(min_int, max_int, num=100, retstep=True)
81 interval = lin[1]
82 samples = lin[0]
83 v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples)
84 v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples)
85
86 # Calculate the integral using the trapezoid method on the samples.
87 int_v1 = np.trapz(v1, dx=interval)
88 int_v2 = np.trapz(v2, dx=interval)
89
90 # Calculate the average improvement.
91 avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
92
93 except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
94 return 0
95
96 return avg_exp_diff
97
98def bdrate2(metric_set1, metric_set2):
99 """
100 BJONTEGAARD Bjontegaard metric calculation adapted
101 Bjontegaard's metric allows to compute the average % saving in bitrate
102 between two rate-distortion curves [1]. This is an adaptation of that
103 method that fixes inconsistencies when the curve fit operation goes awry
104 by replacing the curve fit function with a Piecewise Cubic Hermite
105 Interpolating Polynomial and then integrating that by evaluating that
106 function at small intervals using the trapezoid method to calculate
107 the integral.
108
109 metric_set1 - list of tuples ( bitrate, metric ) for first graph
110 metric_set2 - list of tuples ( bitrate, metric ) for second graph
111 """
112
113 if not metric_set1 or not metric_set2:
114 return 0.0
115
116 try:
117
118 # pchip_interlopate requires keys sorted by x axis. x-axis will
119 # be our metric not the bitrate so sort by metric.
120 metric_set1.sort(key=lambda tup: tup[1])
121 metric_set2.sort(key=lambda tup: tup[1])
122
123 # Pull the log of the rate and clamped psnr from metric_sets.
124 log_rate1 = [math.log(x[0]) for x in metric_set1]
125 metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
126 log_rate2 = [math.log(x[0]) for x in metric_set2]
127 metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
128
129 # Integration interval. This metric only works on the area that's
130 # overlapping. Extrapolation of these things is sketchy so we avoid.
131 min_int = max([min(metric1), min(metric2)])
132 max_int = min([max(metric1), max(metric2)])
133
134 # No overlap means no sensible metric possible.
135 if max_int <= min_int:
136 return 0.0
137
138 # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
139 # create 100 new samples points separated by interval.
140 lin = np.linspace(min_int, max_int, num=100, retstep=True)
141 interval = lin[1]
142 samples = lin[0]
143 v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples)
144 v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples)
145
146 # Calculate the integral using the trapezoid method on the samples.
147 int_v1 = np.trapz(v1, dx=interval)
148 int_v2 = np.trapz(v2, dx=interval)
149
150 # Calculate the average improvement.
151 avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
152
153 except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
154 return 0
155
156 # Convert to a percentage.
157 avg_diff = (math.exp(avg_exp_diff) - 1) * 100
158
159 return avg_diff
160
161
162
163def FillForm(string_for_substitution, dictionary_of_vars):
164 """
165 This function substitutes all matches of the command string //%% ... %%//
166 with the variable represented by ... .
167 """
168 return_string = string_for_substitution
169 for i in re.findall("//%%(.*)%%//", string_for_substitution):
170 return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i],
171 return_string)
172 return return_string
173
174
175def HasMetrics(line):
176 """
177 The metrics files produced by aomenc are started with a B for headers.
178 """
179 # If the first char of the first word on the line is a digit
180 if len(line) == 0:
181 return False
182 if len(line.split()) == 0:
183 return False
184 if line.split()[0][0:1].isdigit():
185 return True
186 return False
187
188def GetMetrics(file_name):
189 metric_file = open(file_name, "r")
190 return metric_file.readline().split();
191
192def ParseMetricFile(file_name, metric_column):
193 metric_set1 = set([])
194 metric_file = open(file_name, "r")
195 for line in metric_file:
196 metrics = string.split(line)
197 if HasMetrics(line):
198 if metric_column < len(metrics):
199 try:
200 tuple = float(metrics[0]), float(metrics[metric_column])
201 except:
202 tuple = float(metrics[0]), 0
203 else:
204 tuple = float(metrics[0]), 0
205 metric_set1.add(tuple)
206 metric_set1_sorted = sorted(metric_set1)
207 return metric_set1_sorted
208
209
210def FileBetter(file_name_1, file_name_2, metric_column, method):
211 """
212 Compares two data files and determines which is better and by how
213 much. Also produces a histogram of how much better, by PSNR.
214 metric_column is the metric.
215 """
216 # Store and parse our two files into lists of unique tuples.
217
218 # Read the two files, parsing out lines starting with bitrate.
219 metric_set1_sorted = ParseMetricFile(file_name_1, metric_column)
220 metric_set2_sorted = ParseMetricFile(file_name_2, metric_column)
221
222
223 def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2):
224 """
225 Search through the sorted metric file for metrics on either side of
226 the metric from file 1. Since both lists are sorted we really
227 should not have to search through the entire range, but these
228 are small files."""
229 total_bitrate_difference_ratio = 0.0
230 count = 0
231 for bitrate, metric in metric_set1_sorted:
232 if bitrate == 0:
233 continue
234 for i in range(len(metric_set2_sorted) - 1):
235 s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i]
236 s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1]
237 # We have a point on either side of our metric range.
238 if metric > s2_metric_0 and metric <= s2_metric_1:
239
240 # Calculate a slope.
241 if s2_metric_1 - s2_metric_0 != 0:
242 metric_slope = ((s2_bitrate_1 - s2_bitrate_0) /
243 (s2_metric_1 - s2_metric_0))
244 else:
245 metric_slope = 0
246
247 estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) *
248 metric_slope)
249
250 if estimated_s2_bitrate == 0:
251 continue
252 # Calculate percentage difference as given by base.
253 if base_is_set_2 == 0:
254 bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
255 bitrate)
256 else:
257 bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
258 estimated_s2_bitrate)
259
260 total_bitrate_difference_ratio += bitrate_difference_ratio
261 count += 1
262 break
263
264 # Calculate the average improvement between graphs.
265 if count != 0:
266 avg = total_bitrate_difference_ratio / count
267
268 else:
269 avg = 0.0
270
271 return avg
272
273 # Be fair to both graphs by testing all the points in each.
274 if method == 'avg':
275 avg_improvement = 50 * (
276 GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) -
277 GraphBetter(metric_set2_sorted, metric_set1_sorted, 0))
278 elif method == 'dsnr':
279 avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted)
280 else:
281 avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted)
282
283 return avg_improvement
284
285
286def HandleFiles(variables):
287 """
288 This script creates html for displaying metric data produced from data
289 in a video stats file, as created by the AOM project when enable_psnr
290 is turned on:
291
292 Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..]
293
294 The script parses each metrics file [see below] that matches the
295 statfile_pattern in the baseline directory and looks for the file that
296 matches that same file in each of the sub_dirs, and compares the resultant
297 metrics bitrate, avg psnr, glb psnr, and ssim. "
298
299 It provides a table in which each row is a file in the line directory,
300 and a column for each subdir, with the cells representing how that clip
301 compares to baseline for that subdir. A graph is given for each which
302 compares filesize to that metric. If you click on a point in the graph it
303 zooms in on that point.
304
305 a SAMPLE metrics file:
306
307 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
308 25.911 38.242 38.104 38.258 38.121 75.790 14103
309 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
310 49.982 41.264 41.129 41.255 41.122 83.993 19817
311 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
312 74.967 42.911 42.767 42.899 42.756 87.928 17332
313 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
314 100.012 43.983 43.838 43.881 43.738 89.695 25389
315 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
316 149.980 45.338 45.203 45.184 45.043 91.591 25438
317 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
318 199.852 46.225 46.123 46.113 45.999 92.679 28302
319 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
320 249.922 46.864 46.773 46.777 46.673 93.334 27244
321 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
322 299.998 47.366 47.281 47.317 47.220 93.844 27137
323 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
324 349.769 47.746 47.677 47.722 47.648 94.178 32226
325 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
326 399.773 48.032 47.971 48.013 47.946 94.362 36203
327
328 sample use:
329 visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html
330 """
331
332 # The template file is the html file into which we will write the
333 # data from the stats file, formatted correctly for the gviz_api.
334 template_file = open(variables[1], "r")
335 page_template = template_file.read()
336 template_file.close()
337
338 # This is the path match pattern for finding stats files amongst
339 # all the other files it could be. eg: *.stt
340 file_pattern = variables[2]
341
342 # This is the directory with files that we will use to do the comparison
343 # against.
344 baseline_dir = variables[3]
345 snrs = ''
346 filestable = {}
347
348 filestable['dsnr'] = ''
349 filestable['drate'] = ''
350 filestable['avg'] = ''
351
352 # Dirs is directories after the baseline to compare to the base.
353 dirs = variables[4:len(variables)]
354
355 # Find the metric files in the baseline directory.
356 dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern))
357
358 metrics = GetMetrics(baseline_dir + "/" + dir_list[0])
359
360 metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];'
361
362 for column in range(1, len(metrics)):
363
364 for metric in ['avg','dsnr','drate']:
365 description = {"file": ("string", "File")}
366
367 # Go through each directory and add a column header to our description.
368 countoverall = {}
369 sumoverall = {}
370
371 for directory in dirs:
372 description[directory] = ("number", directory)
373 countoverall[directory] = 0
374 sumoverall[directory] = 0
375
376 # Data holds the data for the visualization, name given comes from
377 # gviz_api sample code.
378 data = []
379 for filename in dir_list:
380 row = {'file': splitext(basename(filename))[0] }
381 baseline_file_name = baseline_dir + "/" + filename
382
383 # Read the metric file from each of the directories in our list.
384 for directory in dirs:
385 metric_file_name = directory + "/" + filename
386
387 # If there is a metric file in the current directory, open it
388 # and calculate its overall difference between it and the baseline
389 # directory's metric file.
390 if os.path.isfile(metric_file_name):
391 overall = FileBetter(baseline_file_name, metric_file_name,
392 column, metric)
393 row[directory] = overall
394
395 sumoverall[directory] += overall
396 countoverall[directory] += 1
397
398 data.append(row)
399
400 # Add the overall numbers.
401 row = {"file": "OVERALL" }
402 for directory in dirs:
403 row[directory] = sumoverall[directory] / countoverall[directory]
404 data.append(row)
405
406 # write the tables out
407 data_table = gviz_api.DataTable(description)
408 data_table.LoadData(data)
409
410 filestable[metric] = ( filestable[metric] + "filestable_" + metric +
411 "[" + str(column) + "]=" +
412 data_table.ToJSon(columns_order=["file"]+dirs) + "\n" )
413
414 filestable_avg = filestable['avg']
415 filestable_dpsnr = filestable['dsnr']
416 filestable_drate = filestable['drate']
417
418 # Now we collect all the data for all the graphs. First the column
419 # headers which will be Datarate and then each directory.
420 columns = ("datarate",baseline_dir)
421 description = {"datarate":("number", "Datarate")}
422 for directory in dirs:
423 description[directory] = ("number", directory)
424
425 description[baseline_dir] = ("number", baseline_dir)
426
427 snrs = snrs + "snrs[" + str(column) + "] = ["
428
429 # Now collect the data for the graphs, file by file.
430 for filename in dir_list:
431
432 data = []
433
434 # Collect the file in each directory and store all of its metrics
435 # in the associated gviz metrics table.
436 all_dirs = dirs + [baseline_dir]
437 for directory in all_dirs:
438
439 metric_file_name = directory + "/" + filename
440 if not os.path.isfile(metric_file_name):
441 continue
442
443 # Read and parse the metrics file storing it to the data we'll
444 # use for the gviz_api.Datatable.
445 metrics = ParseMetricFile(metric_file_name, column)
446 for bitrate, metric in metrics:
447 data.append({"datarate": bitrate, directory: metric})
448
449 data_table = gviz_api.DataTable(description)
450 data_table.LoadData(data)
451 snrs = snrs + "'" + data_table.ToJSon(
452 columns_order=tuple(["datarate",baseline_dir]+dirs)) + "',"
453
454 snrs = snrs + "]\n"
455
456 formatters = ""
457 for i in range(len(dirs)):
458 formatters = "%s formatter.format(better, %d);" % (formatters, i+1)
459
460 print FillForm(page_template, vars())
461 return
462
463if len(sys.argv) < 3:
464 print HandleFiles.__doc__
465else:
466 HandleFiles(sys.argv)