Blame - test/android/get_files.py - avm

blob: bdae9a315e7edb7d235a13fb6c1e3539383dce84 [file] [log] [blame]

Joshua Litt	c88f1ec	2013-11-11 12:31:42 -0800	[diff] [blame]	1	#
Yaowu Xu	9c01aa1	2016-09-01 14:32:49 -0700	[diff] [blame]	2	# Copyright (c) 2016, Alliance for Open Media. All rights reserved
				3	#
				4	# This source code is subject to the terms of the BSD 2 Clause License and
				5	# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
				6	# was not distributed with this source code in the LICENSE file, you can
				7	# obtain it at www.aomedia.org/license/software. If the Alliance for Open
				8	# Media Patent License 1.0 was not distributed with this source code in the
				9	# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Joshua Litt	c88f1ec	2013-11-11 12:31:42 -0800	[diff] [blame]	10	#
				11	# This simple script pulls test files from the webm homepage
				12	# It is intelligent enough to only pull files if
				13	# 1) File / test_data folder does not exist
				14	# 2) SHA mismatch
				15
				16	import pycurl
				17	import csv
				18	import hashlib
				19	import re
				20	import os.path
				21	import time
				22	import itertools
				23	import sys
				24	import getopt
				25
				26	#globals
				27	url = ''
				28	file_list_path = ''
				29	local_resource_path = ''
				30
				31	# Helper functions:
				32	# A simple function which returns the sha hash of a file in hex
				33	def get_file_sha(filename):
				34	try:
				35	sha_hash = hashlib.sha1()
				36	with open(filename, 'rb') as file:
				37	buf = file.read(HASH_CHUNK)
				38	while len(buf) > 0:
				39	sha_hash.update(buf)
				40	buf = file.read(HASH_CHUNK)
				41	return sha_hash.hexdigest()
				42	except IOError:
				43	print "Error reading " + filename
				44
				45	# Downloads a file from a url, and then checks the sha against the passed
				46	# in sha
				47	def download_and_check_sha(url, filename, sha):
				48	path = os.path.join(local_resource_path, filename)
				49	fp = open(path, "wb")
				50	curl = pycurl.Curl()
				51	curl.setopt(pycurl.URL, url + "/" + filename)
				52	curl.setopt(pycurl.WRITEDATA, fp)
				53	curl.perform()
				54	curl.close()
				55	fp.close()
				56	return get_file_sha(path) == sha
				57
				58	#constants
				59	ftp_retries = 3
				60
				61	SHA_COL = 0
				62	NAME_COL = 1
				63	EXPECTED_COL = 2
				64	HASH_CHUNK = 65536
				65
				66	# Main script
				67	try:
				68	opts, args = \
				69	getopt.getopt(sys.argv[1:], \
				70	"u:i:o:", ["url=", "input_csv=", "output_dir="])
				71	except:
				72	print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
				73	sys.exit(2)
				74
				75	for opt, arg in opts:
				76	if opt == '-u':
				77	url = arg
				78	elif opt in ("-i", "--input_csv"):
				79	file_list_path = os.path.join(arg)
				80	elif opt in ("-o", "--output_dir"):
				81	local_resource_path = os.path.join(arg)
				82
				83	if len(sys.argv) != 7:
				84	print "Expects two paths and a url!"
				85	exit(1)
				86
				87	if not os.path.isdir(local_resource_path):
				88	os.makedirs(local_resource_path)
				89
				90	file_list_csv = open(file_list_path, "rb")
				91
				92	# Our 'csv' file uses multiple spaces as a delimiter, python's
				93	# csv class only uses single character delimiters, so we convert them below
				94	file_list_reader = csv.reader((re.sub(' +', ' ', line) \
				95	for line in file_list_csv), delimiter = ' ')
				96
				97	file_shas = []
				98	file_names = []
				99
				100	for row in file_list_reader:
				101	if len(row) != EXPECTED_COL:
				102	continue
				103	file_shas.append(row[SHA_COL])
				104	file_names.append(row[NAME_COL])
				105
				106	file_list_csv.close()
				107
				108	# Download files, only if they don't already exist and have correct shas
				109	for filename, sha in itertools.izip(file_names, file_shas):
				110	path = os.path.join(local_resource_path, filename)
				111	if os.path.isfile(path) \
				112	and get_file_sha(path) == sha:
				113	print path + ' exists, skipping'
				114	continue
				115	for retry in range(0, ftp_retries):
				116	print "Downloading " + path
				117	if not download_and_check_sha(url, filename, sha):
				118	print "Sha does not match, retrying..."
				119	else:
				120	break