|  | # | 
|  | # Copyright (c) 2016, Alliance for Open Media. All rights reserved | 
|  | # | 
|  | # This source code is subject to the terms of the BSD 2 Clause License and | 
|  | # the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | # was not distributed with this source code in the LICENSE file, you can | 
|  | # obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | # Media Patent License 1.0 was not distributed with this source code in the | 
|  | # PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | # | 
|  | # This simple script pulls test files from the webm homepage | 
|  | # It is intelligent enough to only pull files if | 
|  | #   1) File / test_data folder does not exist | 
|  | #   2) SHA mismatch | 
|  |  | 
|  | import pycurl | 
|  | import csv | 
|  | import hashlib | 
|  | import re | 
|  | import os.path | 
|  | import time | 
|  | import itertools | 
|  | import sys | 
|  | import getopt | 
|  |  | 
|  | #globals | 
|  | url = '' | 
|  | file_list_path = '' | 
|  | local_resource_path = '' | 
|  |  | 
|  | # Helper functions: | 
|  | # A simple function which returns the sha hash of a file in hex | 
|  | def get_file_sha(filename): | 
|  | try: | 
|  | sha_hash = hashlib.sha1() | 
|  | with open(filename, 'rb') as file: | 
|  | buf = file.read(HASH_CHUNK) | 
|  | while len(buf) > 0: | 
|  | sha_hash.update(buf) | 
|  | buf = file.read(HASH_CHUNK) | 
|  | return sha_hash.hexdigest() | 
|  | except IOError: | 
|  | print "Error reading " + filename | 
|  |  | 
|  | # Downloads a file from a url, and then checks the sha against the passed | 
|  | # in sha | 
|  | def download_and_check_sha(url, filename, sha): | 
|  | path = os.path.join(local_resource_path, filename) | 
|  | fp = open(path, "wb") | 
|  | curl = pycurl.Curl() | 
|  | curl.setopt(pycurl.URL, url + "/" + filename) | 
|  | curl.setopt(pycurl.WRITEDATA, fp) | 
|  | curl.perform() | 
|  | curl.close() | 
|  | fp.close() | 
|  | return get_file_sha(path) == sha | 
|  |  | 
|  | #constants | 
|  | ftp_retries = 3 | 
|  |  | 
|  | SHA_COL = 0 | 
|  | NAME_COL = 1 | 
|  | EXPECTED_COL = 2 | 
|  | HASH_CHUNK = 65536 | 
|  |  | 
|  | # Main script | 
|  | try: | 
|  | opts, args = \ | 
|  | getopt.getopt(sys.argv[1:], \ | 
|  | "u:i:o:", ["url=", "input_csv=", "output_dir="]) | 
|  | except: | 
|  | print 'get_files.py -u <url> -i <input_csv> -o <output_dir>' | 
|  | sys.exit(2) | 
|  |  | 
|  | for opt, arg in opts: | 
|  | if opt == '-u': | 
|  | url = arg | 
|  | elif opt in ("-i", "--input_csv"): | 
|  | file_list_path = os.path.join(arg) | 
|  | elif opt in ("-o", "--output_dir"): | 
|  | local_resource_path = os.path.join(arg) | 
|  |  | 
|  | if len(sys.argv) != 7: | 
|  | print "Expects two paths and a url!" | 
|  | exit(1) | 
|  |  | 
|  | if not os.path.isdir(local_resource_path): | 
|  | os.makedirs(local_resource_path) | 
|  |  | 
|  | file_list_csv = open(file_list_path, "rb") | 
|  |  | 
|  | # Our 'csv' file uses multiple spaces as a delimiter, python's | 
|  | # csv class only uses single character delimiters, so we convert them below | 
|  | file_list_reader = csv.reader((re.sub(' +', ' ', line) \ | 
|  | for line in file_list_csv), delimiter = ' ') | 
|  |  | 
|  | file_shas = [] | 
|  | file_names = [] | 
|  |  | 
|  | for row in file_list_reader: | 
|  | if len(row) != EXPECTED_COL: | 
|  | continue | 
|  | file_shas.append(row[SHA_COL]) | 
|  | file_names.append(row[NAME_COL]) | 
|  |  | 
|  | file_list_csv.close() | 
|  |  | 
|  | # Download files, only if they don't already exist and have correct shas | 
|  | for filename, sha in itertools.izip(file_names, file_shas): | 
|  | path = os.path.join(local_resource_path, filename) | 
|  | if os.path.isfile(path) \ | 
|  | and get_file_sha(path) == sha: | 
|  | print path + ' exists, skipping' | 
|  | continue | 
|  | for retry in range(0, ftp_retries): | 
|  | print "Downloading " + path | 
|  | if not download_and_check_sha(url, filename, sha): | 
|  | print "Sha does not match, retrying..." | 
|  | else: | 
|  | break |