From 0de0dca042c13773c5a9cb22835864076b79a616 Mon Sep 17 00:00:00 2001 From: Max Stromberg Date: Thu, 2 Jul 2020 18:44:27 +0200 Subject: [PATCH 1/3] improve downloader ram usage and make replace optional --- frameioclient/client.py | 25 +++++++++++++------------ frameioclient/download.py | 23 ++++++++++++++++------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/frameioclient/client.py b/frameioclient/client.py index d058de14..630ffa84 100644 --- a/frameioclient/client.py +++ b/frameioclient/client.py @@ -322,21 +322,22 @@ def upload(self, asset, file): """ uploader = FrameioUploader(asset, file) uploader.upload() - - def download(self, asset, download_folder): - """ - Download an asset. The method will exit once the file is downloaded. - :Args: - asset (object): The asset object. - download_folder (path): The location to download the file to. + def download(self, asset, download_folder, replace=True): + """ + Download an asset. The method will exit once the file is downloaded. - Example:: + :Args: + asset (object): The asset object. + download_folder (path): The location to download the file to. + replace (bool): Replace the file if it exists. - client.download(asset, "~./Downloads") - """ - downloader = FrameioDownloader(asset, download_folder) - downloader.download() + Example:: + + client.download(asset, "~./Downloads") + """ + downloader = FrameioDownloader(asset, download_folder, replace) + downloader.download() def get_comment(self, comment_id, **kwargs): """ diff --git a/frameioclient/download.py b/frameioclient/download.py index b2ed6cba..ca117a5a 100644 --- a/frameioclient/download.py +++ b/frameioclient/download.py @@ -1,18 +1,27 @@ import requests -import math import os class FrameioDownloader(object): - def __init__(self, asset, download_folder): + def __init__(self, asset, download_folder, replace): self.asset = asset self.download_folder = download_folder + self.replace = replace def download(self): original_filename = self.asset['name'] final_destination = os.path.join(self.download_folder, original_filename) - + + if os.path.isfile(final_destination) and not self.replace: + try: + raise FileExistsError # Added in python 3.3 + except NameError: + raise OSError('File exists') + url = self.asset['original'] - r = requests.get(url) - - open(final_destination, 'wb').write(r.content) - \ No newline at end of file + r = requests.get(url, stream=True) + + handle = open(final_destination, 'wb') + + for chunk in r.iter_content(chunk_size=4096): + if chunk: + handle.write(chunk) From 98227e65c1f0c873ed74b61bb60ff247bea4d152 Mon Sep 17 00:00:00 2001 From: Max Stromberg Date: Tue, 7 Jul 2020 18:35:42 +0200 Subject: [PATCH 2/3] add requests.Session() and retry strategy --- frameioclient/download.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/frameioclient/download.py b/frameioclient/download.py index ca117a5a..f564d72c 100644 --- a/frameioclient/download.py +++ b/frameioclient/download.py @@ -1,11 +1,20 @@ -import requests import os +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + class FrameioDownloader(object): def __init__(self, asset, download_folder, replace): self.asset = asset self.download_folder = download_folder self.replace = replace + self.retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429], + method_whitelist=['GET'] + ) def download(self): original_filename = self.asset['name'] @@ -13,12 +22,21 @@ def download(self): if os.path.isfile(final_destination) and not self.replace: try: - raise FileExistsError # Added in python 3.3 + raise FileExistsError except NameError: - raise OSError('File exists') + raise OSError('File exists') # Python < 3.3 + + adapter = HTTPAdapter(max_retries=self.retry_strategy) + http = requests.Session() + http.mount('https://', adapter) url = self.asset['original'] - r = requests.get(url, stream=True) + + r = http.request( + 'GET', + url, + stream=True + ) handle = open(final_destination, 'wb') From 47e0e985beb8734cf3c3173f554de1678f5d02d6 Mon Sep 17 00:00:00 2001 From: Max Stromberg Date: Fri, 10 Jul 2020 17:30:01 +0200 Subject: [PATCH 3/3] XXHash verify downloads and retry on fail --- frameioclient/download.py | 47 ++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/frameioclient/download.py b/frameioclient/download.py index f564d72c..8f1bc272 100644 --- a/frameioclient/download.py +++ b/frameioclient/download.py @@ -2,6 +2,7 @@ import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from frameioclient.utils import calculate_hash class FrameioDownloader(object): @@ -9,11 +10,14 @@ def __init__(self, asset, download_folder, replace): self.asset = asset self.download_folder = download_folder self.replace = replace - self.retry_strategy = Retry( - total=3, - backoff_factor=1, - status_forcelist=[429], - method_whitelist=['GET'] + self.attempts = 0 + self.retry_limit = 3 + + self.http_retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[408, 500, 502, 503, 504], + method_whitelist=['GET'] ) def download(self): @@ -26,20 +30,33 @@ def download(self): except NameError: raise OSError('File exists') # Python < 3.3 - adapter = HTTPAdapter(max_retries=self.retry_strategy) + adapter = HTTPAdapter(max_retries=self.http_retry_strategy) http = requests.Session() http.mount('https://', adapter) url = self.asset['original'] - r = http.request( - 'GET', - url, - stream=True - ) + try: + original_checksum = self.asset['checksums']['xx_hash'] + except (TypeError, KeyError): + original_checksum = None + + while self.attempts < self.retry_limit: + r = http.request('GET', url, stream=True) + + with open(final_destination, 'wb') as handle: + try: + for chunk in r.iter_content(chunk_size=4096): + if chunk: + handle.write(chunk) + except requests.exceptions.ChunkedEncodingError: + self.attempts += 1 + continue + + if not original_checksum: + break - handle = open(final_destination, 'wb') + if calculate_hash(final_destination) == original_checksum: + break - for chunk in r.iter_content(chunk_size=4096): - if chunk: - handle.write(chunk) + self.attempts += 1