Skip to content

Commit de74b81

Browse files
authored
Inmproved download functionality (#79)
Add helper functions to simplify a couple of tasks Fix FrameioHelpers class Fix FrameioHelpers import Fix __init__ imports Finish fixing helpers... Don't do multi-part downloads on files smaller than 25 MB Add download improvements from strombergdev Fix extra prefix in client.assets.download() Tweak download function Fix missing call to _get_path() to set self.destination Add test to Makefile Fix download function's keyword via ** keyword args Don't do multi-part downloads on files smaller than 25 MB
1 parent ffaa6be commit de74b81

File tree

4 files changed

+101
-29
lines changed

4 files changed

+101
-29
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ bump-patch:
1515

1616
clean:
1717
find . -name "*.pyc" -exec rm -f {} \;
18+
19+
test:
20+
cd tests && pipenv run python integration.py

frameioclient/lib/download.py

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,18 @@
88
import concurrent.futures
99

1010
from .utils import Utils
11-
from .exceptions import DownloadException, WatermarkIDDownloadException, AssetNotFullyUploaded
11+
from .exceptions import (
12+
DownloadException,
13+
WatermarkIDDownloadException,
14+
AssetNotFullyUploaded,
15+
AssetChecksumNotPresent,
16+
AssetChecksumMismatch
17+
)
1218

1319
thread_local = threading.local()
1420

1521
class FrameioDownloader(object):
16-
def __init__(self, asset, download_folder, prefix, multi_part=False, concurrency=5, replace=False):
22+
def __init__(self, asset, download_folder, prefix=None, replace=False, checksum_verification=True, multi_part=False, concurrency=5):
1723
self.multi_part = multi_part
1824
self.asset = asset
1925
self.asset_type = None
@@ -29,8 +35,10 @@ def __init__(self, asset, download_folder, prefix, multi_part=False, concurrency
2935
self.prefix = prefix
3036
self.filename = Utils.normalize_filename(asset["name"])
3137
self.replace = replace
38+
self.checksum_verification = checksum_verification
3239

3340
self._evaluate_asset()
41+
self._get_path()
3442

3543
def _evaluate_asset(self):
3644
if self.asset.get("_type") != "file":
@@ -45,19 +53,39 @@ def _get_session(self):
4553
return thread_local.session
4654

4755
def _create_file_stub(self):
56+
if self.replace == True:
57+
os.remove(self.destination) # Remove the file
58+
self._create_file_stub() # Create a new stub
59+
4860
try:
4961
fp = open(self.destination, "w")
5062
# fp.write(b"\0" * self.file_size) # Disabled to prevent pre-allocatation of disk space
5163
fp.close()
52-
except FileExistsError as e:
53-
if self.replace == True:
54-
os.remove(self.destination) # Remove the file
55-
self._create_file_stub() # Create a new stub
56-
else:
57-
print(e)
58-
raise e
64+
65+
except Exception as e:
66+
raise e
67+
5968
return True
6069

70+
def _get_path(self):
71+
print("prefix:", self.prefix)
72+
if self.prefix != None:
73+
self.filename = self.prefix + self.filename
74+
75+
if self.destination == None:
76+
final_destination = os.path.join(self.download_folder, self.filename)
77+
self.destination = final_destination
78+
79+
return self.destination
80+
81+
def _get_checksum(self):
82+
try:
83+
self.original_checksum = self.asset['checksums']['xx_hash']
84+
except (TypeError, KeyError):
85+
self.original_checksum = None
86+
87+
return self.original_checksum
88+
6189
def get_download_key(self):
6290
try:
6391
url = self.asset['original']
@@ -84,26 +112,27 @@ def get_download_key(self):
84112

85113
return url
86114

87-
def get_path(self):
88-
if self.prefix != None:
89-
self.filename = self.prefix + self.filename
115+
def download_handler(self):
116+
if os.path.isfile(self.destination) and self.replace != True:
117+
try:
118+
raise FileExistsError
119+
except NameError:
120+
raise OSError('File exists') # Python < 3.3
90121

91-
if self.destination == None:
92-
final_destination = os.path.join(self.download_folder, self.filename)
93-
self.destination = final_destination
94-
95-
return self.destination
122+
url = self.get_download_key()
96123

97-
def download_handler(self):
98-
if os.path.isfile(self.get_path()):
99-
print("File already exists at this location.")
100-
return self.destination
124+
if self.watermarked == True:
125+
return self.download(url)
101126
else:
102-
url = self.get_download_key()
103-
104-
if self.watermarked == True:
127+
# Don't use multi-part download for files below 25 MB
128+
if self.asset['filesize'] < 26214400:
105129
return self.download(url)
130+
if self.multi_part == True:
131+
return self.multi_part_download(url)
106132
else:
133+
# Don't use multi-part download for files below 25 MB
134+
if self.asset['filesize'] < 26214400:
135+
return self.download(url)
107136
if self.multi_part == True:
108137
return self.multi_part_download(url)
109138
else:
@@ -114,8 +143,17 @@ def download(self, url):
114143
print("Beginning download -- {} -- {}".format(self.asset["name"], Utils.format_bytes(self.file_size, type="size")))
115144

116145
# Downloading
117-
r = requests.get(url)
118-
open(self.destination, "wb").write(r.content)
146+
session = self._get_session()
147+
r = session.get('GET', url, stream=True)
148+
149+
with open(self.destination, 'wb') as handle:
150+
try:
151+
# TODO make sure this approach works for SBWM download
152+
for chunk in r.iter_content(chunk_size=4096):
153+
if chunk:
154+
handle.write(chunk)
155+
except requests.exceptions.ChunkedEncodingError as e:
156+
raise e
119157

120158
download_time = time.time() - start_time
121159
download_speed = Utils.format_bytes(math.ceil(self.file_size/(download_time)))
@@ -161,7 +199,17 @@ def multi_part_download(self, url):
161199
download_speed = Utils.format_bytes(math.ceil(self.file_size/(download_time)))
162200
print("Downloaded {} at {}".format(Utils.format_bytes(self.file_size, type="size"), download_speed))
163201

164-
return self.destination
202+
if self.checksum_verification == True:
203+
# Check for checksum, if not present throw error
204+
if self._get_checksum() == None:
205+
raise AssetChecksumNotPresent
206+
else:
207+
if Utils.calculate_hash(self.destination) != self.original_checksum:
208+
raise AssetChecksumMismatch
209+
else:
210+
return self.destination
211+
else:
212+
return self.destination
165213

166214
def download_chunk(self, task):
167215
# Download a particular chunk

frameioclient/lib/exceptions.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,24 @@ def __init__(
4040
):
4141
self.message = message
4242
super().__init__(self.message)
43+
44+
class AssetChecksumNotPresent(Exception):
45+
"""Exception raised when there's no checksum present for the Frame.io asset.
46+
"""
47+
def __init__(
48+
self,
49+
message="""No checksum found on Frame.io for this asset. This could be because it was uploaded \
50+
before we introduced the feature, the media pipeline failed to process the asset, or the asset has yet to finish being processed."""
51+
):
52+
self.message = message
53+
super().__init__(self.message)
54+
55+
class AssetChecksumMismatch(Exception):
56+
"""Exception raised when the checksum for the downloaded file doesn't match what's found on Frame.io.
57+
"""
58+
def __init__(
59+
self,
60+
message="Checksum mismatch, you should re-download the asset to resolve any corrupt bits."
61+
):
62+
self.message = message
63+
super().__init__(self.message)

frameioclient/service/assets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def upload(self, destination_id, filepath, asset=None):
260260

261261
return asset
262262

263-
def download(self, asset, download_folder, prefix=None, multi_part=False, concurrency=5, replace=False):
263+
def download(self, asset, download_folder, **kwargs):
264264
"""
265265
Download an asset. The method will exit once the file is downloaded.
266266
@@ -272,5 +272,5 @@ def download(self, asset, download_folder, prefix=None, multi_part=False, concur
272272
273273
client.assets.download(asset, "~./Downloads")
274274
"""
275-
downloader = FrameioDownloader(asset, download_folder, prefix, multi_part, concurrency)
275+
downloader = FrameioDownloader(asset, download_folder, **kwargs)
276276
return downloader.download_handler()

0 commit comments

Comments
 (0)