Skip to content

Commit 61c5d5f

Browse files
feat: Adding support of single shot download (#1493)
1 parent 5821134 commit 61c5d5f

File tree

10 files changed

+285
-23
lines changed

10 files changed

+285
-23
lines changed

google/cloud/storage/_media/_download.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ class Download(DownloadBase):
140140
``start`` to the end of the media.
141141
headers (Optional[Mapping[str, str]]): Extra headers that should
142142
be sent with the request, e.g. headers for encrypted data.
143-
checksum Optional([str]): The type of checksum to compute to verify
143+
checksum (Optional[str]): The type of checksum to compute to verify
144144
the integrity of the object. The response headers must contain
145145
a checksum of the requested type. If the headers lack an
146146
appropriate checksum (for instance in the case of transcoded or
@@ -157,6 +157,9 @@ class Download(DownloadBase):
157157
See the retry.py source code and docstrings in this package
158158
(google.cloud.storage.retry) for information on retry types and how
159159
to configure them.
160+
single_shot_download (Optional[bool]): If true, download the object in a single request.
161+
Caution: Enabling this will increase the memory overload for your application.
162+
Please enable this as per your use case.
160163
161164
"""
162165

@@ -169,6 +172,7 @@ def __init__(
169172
headers=None,
170173
checksum="auto",
171174
retry=DEFAULT_RETRY,
175+
single_shot_download=False,
172176
):
173177
super(Download, self).__init__(
174178
media_url, stream=stream, start=start, end=end, headers=headers, retry=retry
@@ -178,6 +182,7 @@ def __init__(
178182
self.checksum = (
179183
"crc32c" if _helpers._is_crc32c_available_and_fast() else "md5"
180184
)
185+
self.single_shot_download = single_shot_download
181186
self._bytes_downloaded = 0
182187
self._expected_checksum = None
183188
self._checksum_object = None

google/cloud/storage/_media/requests/download.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,24 @@ def _write_to_stream(self, response):
132132
# the stream is indeed compressed, this will delegate the checksum
133133
# object to the decoder and return a _DoNothingHash here.
134134
local_checksum_object = _add_decoder(response.raw, checksum_object)
135-
body_iter = response.iter_content(
136-
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False
137-
)
138-
for chunk in body_iter:
139-
self._stream.write(chunk)
140-
self._bytes_downloaded += len(chunk)
141-
local_checksum_object.update(chunk)
135+
136+
# This is useful for smaller files, or when the user wants to
137+
# download the entire file in one go.
138+
if self.single_shot_download:
139+
content = response.raw.read(decode_content=True)
140+
self._stream.write(content)
141+
self._bytes_downloaded += len(content)
142+
local_checksum_object.update(content)
143+
response._content_consumed = True
144+
else:
145+
body_iter = response.iter_content(
146+
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE,
147+
decode_unicode=False,
148+
)
149+
for chunk in body_iter:
150+
self._stream.write(chunk)
151+
self._bytes_downloaded += len(chunk)
152+
local_checksum_object.update(chunk)
142153

143154
# Don't validate the checksum for partial responses.
144155
if (
@@ -345,13 +356,21 @@ def _write_to_stream(self, response):
345356
checksum_object = self._checksum_object
346357

347358
with response:
348-
body_iter = response.raw.stream(
349-
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
350-
)
351-
for chunk in body_iter:
352-
self._stream.write(chunk)
353-
self._bytes_downloaded += len(chunk)
354-
checksum_object.update(chunk)
359+
# This is useful for smaller files, or when the user wants to
360+
# download the entire file in one go.
361+
if self.single_shot_download:
362+
content = response.raw.read()
363+
self._stream.write(content)
364+
self._bytes_downloaded += len(content)
365+
checksum_object.update(content)
366+
else:
367+
body_iter = response.raw.stream(
368+
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
369+
)
370+
for chunk in body_iter:
371+
self._stream.write(chunk)
372+
self._bytes_downloaded += len(chunk)
373+
checksum_object.update(chunk)
355374
response._content_consumed = True
356375

357376
# Don't validate the checksum for partial responses.

google/cloud/storage/blob.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,7 @@ def _do_download(
987987
timeout=_DEFAULT_TIMEOUT,
988988
checksum="auto",
989989
retry=DEFAULT_RETRY,
990+
single_shot_download=False,
990991
):
991992
"""Perform a download without any error handling.
992993
@@ -1047,13 +1048,20 @@ def _do_download(
10471048
See the retry.py source code and docstrings in this package
10481049
(google.cloud.storage.retry) for information on retry types and how
10491050
to configure them.
1051+
1052+
:type single_shot_download: bool
1053+
:param single_shot_download:
1054+
(Optional) If true, download the object in a single request.
1055+
Caution: Enabling this will increase the memory overload for your application.
1056+
Please enable this as per your use case.
10501057
"""
10511058

10521059
extra_attributes = {
10531060
"url.full": download_url,
10541061
"download.chunk_size": f"{self.chunk_size}",
10551062
"download.raw_download": raw_download,
10561063
"upload.checksum": f"{checksum}",
1064+
"download.single_shot_download": single_shot_download,
10571065
}
10581066
args = {"timeout": timeout}
10591067

@@ -1073,6 +1081,10 @@ def _do_download(
10731081
end=end,
10741082
checksum=checksum,
10751083
retry=retry,
1084+
# NOTE: single_shot_download is only supported in Download and RawDownload
1085+
# classes, i.e., when chunk_size is set to None (the default value). It is
1086+
# not supported for chunked downloads.
1087+
single_shot_download=single_shot_download,
10761088
)
10771089
with create_trace_span(
10781090
name=f"Storage.{download_class}/consume",
@@ -1127,6 +1139,7 @@ def download_to_file(
11271139
timeout=_DEFAULT_TIMEOUT,
11281140
checksum="auto",
11291141
retry=DEFAULT_RETRY,
1142+
single_shot_download=False,
11301143
):
11311144
"""Download the contents of this blob into a file-like object.
11321145
@@ -1222,6 +1235,12 @@ def download_to_file(
12221235
(google.cloud.storage.retry) for information on retry types and how
12231236
to configure them.
12241237
1238+
:type single_shot_download: bool
1239+
:param single_shot_download:
1240+
(Optional) If true, download the object in a single request.
1241+
Caution: Enabling this will increase the memory overload for your application.
1242+
Please enable this as per your use case.
1243+
12251244
:raises: :class:`google.cloud.exceptions.NotFound`
12261245
"""
12271246
with create_trace_span(name="Storage.Blob.downloadToFile"):
@@ -1240,6 +1259,7 @@ def download_to_file(
12401259
timeout=timeout,
12411260
checksum=checksum,
12421261
retry=retry,
1262+
single_shot_download=single_shot_download,
12431263
)
12441264

12451265
def _handle_filename_and_download(self, filename, *args, **kwargs):
@@ -1285,6 +1305,7 @@ def download_to_filename(
12851305
timeout=_DEFAULT_TIMEOUT,
12861306
checksum="auto",
12871307
retry=DEFAULT_RETRY,
1308+
single_shot_download=False,
12881309
):
12891310
"""Download the contents of this blob into a named file.
12901311
@@ -1370,6 +1391,12 @@ def download_to_filename(
13701391
(google.cloud.storage.retry) for information on retry types and how
13711392
to configure them.
13721393
1394+
:type single_shot_download: bool
1395+
:param single_shot_download:
1396+
(Optional) If true, download the object in a single request.
1397+
Caution: Enabling this will increase the memory overload for your application.
1398+
Please enable this as per your use case.
1399+
13731400
:raises: :class:`google.cloud.exceptions.NotFound`
13741401
"""
13751402
with create_trace_span(name="Storage.Blob.downloadToFilename"):
@@ -1388,6 +1415,7 @@ def download_to_filename(
13881415
timeout=timeout,
13891416
checksum=checksum,
13901417
retry=retry,
1418+
single_shot_download=single_shot_download,
13911419
)
13921420

13931421
def download_as_bytes(
@@ -1405,6 +1433,7 @@ def download_as_bytes(
14051433
timeout=_DEFAULT_TIMEOUT,
14061434
checksum="auto",
14071435
retry=DEFAULT_RETRY,
1436+
single_shot_download=False,
14081437
):
14091438
"""Download the contents of this blob as a bytes object.
14101439
@@ -1484,6 +1513,12 @@ def download_as_bytes(
14841513
(google.cloud.storage.retry) for information on retry types and how
14851514
to configure them.
14861515
1516+
:type single_shot_download: bool
1517+
:param single_shot_download:
1518+
(Optional) If true, download the object in a single request.
1519+
Caution: Enabling this will increase the memory overload for your application.
1520+
Please enable this as per your use case.
1521+
14871522
:rtype: bytes
14881523
:returns: The data stored in this blob.
14891524
@@ -1507,6 +1542,7 @@ def download_as_bytes(
15071542
timeout=timeout,
15081543
checksum=checksum,
15091544
retry=retry,
1545+
single_shot_download=single_shot_download,
15101546
)
15111547
return string_buffer.getvalue()
15121548

@@ -1524,6 +1560,7 @@ def download_as_string(
15241560
if_metageneration_not_match=None,
15251561
timeout=_DEFAULT_TIMEOUT,
15261562
retry=DEFAULT_RETRY,
1563+
single_shot_download=False,
15271564
):
15281565
"""(Deprecated) Download the contents of this blob as a bytes object.
15291566
@@ -1594,6 +1631,12 @@ def download_as_string(
15941631
(google.cloud.storage.retry) for information on retry types and how
15951632
to configure them.
15961633
1634+
:type single_shot_download: bool
1635+
:param single_shot_download:
1636+
(Optional) If true, download the object in a single request.
1637+
Caution: Enabling this will increase the memory overload for your application.
1638+
Please enable this as per your use case.
1639+
15971640
:rtype: bytes
15981641
:returns: The data stored in this blob.
15991642
@@ -1616,6 +1659,7 @@ def download_as_string(
16161659
if_metageneration_not_match=if_metageneration_not_match,
16171660
timeout=timeout,
16181661
retry=retry,
1662+
single_shot_download=single_shot_download,
16191663
)
16201664

16211665
def download_as_text(
@@ -1633,6 +1677,7 @@ def download_as_text(
16331677
if_metageneration_not_match=None,
16341678
timeout=_DEFAULT_TIMEOUT,
16351679
retry=DEFAULT_RETRY,
1680+
single_shot_download=False,
16361681
):
16371682
"""Download the contents of this blob as text (*not* bytes).
16381683
@@ -1705,6 +1750,12 @@ def download_as_text(
17051750
(google.cloud.storage.retry) for information on retry types and how
17061751
to configure them.
17071752
1753+
:type single_shot_download: bool
1754+
:param single_shot_download:
1755+
(Optional) If true, download the object in a single request.
1756+
Caution: Enabling this will increase the memory overload for your application.
1757+
Please enable this as per your use case.
1758+
17081759
:rtype: text
17091760
:returns: The data stored in this blob, decoded to text.
17101761
"""
@@ -1722,6 +1773,7 @@ def download_as_text(
17221773
if_metageneration_not_match=if_metageneration_not_match,
17231774
timeout=timeout,
17241775
retry=retry,
1776+
single_shot_download=single_shot_download,
17251777
)
17261778

17271779
if encoding is not None:
@@ -4019,6 +4071,7 @@ def open(
40194071
For downloads only, the following additional arguments are supported:
40204072
40214073
- ``raw_download``
4074+
- ``single_shot_download``
40224075
40234076
For uploads only, the following additional arguments are supported:
40244077
@@ -4209,6 +4262,7 @@ def _prep_and_do_download(
42094262
timeout=_DEFAULT_TIMEOUT,
42104263
checksum="auto",
42114264
retry=DEFAULT_RETRY,
4265+
single_shot_download=False,
42124266
command=None,
42134267
):
42144268
"""Download the contents of a blob object into a file-like object.
@@ -4294,6 +4348,12 @@ def _prep_and_do_download(
42944348
(google.cloud.storage.retry) for information on retry types and how
42954349
to configure them.
42964350
4351+
:type single_shot_download: bool
4352+
:param single_shot_download:
4353+
(Optional) If true, download the object in a single request.
4354+
Caution: Enabling this will increase the memory overload for your application.
4355+
Please enable this as per your use case.
4356+
42974357
:type command: str
42984358
:param command:
42994359
(Optional) Information about which interface for download was used,
@@ -4349,6 +4409,7 @@ def _prep_and_do_download(
43494409
timeout=timeout,
43504410
checksum=checksum,
43514411
retry=retry,
4412+
single_shot_download=single_shot_download,
43524413
)
43534414
except InvalidResponse as exc:
43544415
_raise_from_invalid_response(exc)

google/cloud/storage/client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,7 @@ def download_blob_to_file(
11431143
timeout=_DEFAULT_TIMEOUT,
11441144
checksum="auto",
11451145
retry=DEFAULT_RETRY,
1146+
single_shot_download=False,
11461147
):
11471148
"""Download the contents of a blob object or blob URI into a file-like object.
11481149
@@ -1216,6 +1217,9 @@ def download_blob_to_file(
12161217
See the retry.py source code and docstrings in this package
12171218
(google.cloud.storage.retry) for information on retry types and how
12181219
to configure them.
1220+
1221+
single_shot_download (bool):
1222+
(Optional) If true, download the object in a single request.
12191223
"""
12201224
with create_trace_span(name="Storage.Client.downloadBlobToFile"):
12211225
if not isinstance(blob_or_uri, Blob):
@@ -1236,6 +1240,7 @@ def download_blob_to_file(
12361240
timeout=timeout,
12371241
checksum=checksum,
12381242
retry=retry,
1243+
single_shot_download=single_shot_download,
12391244
)
12401245

12411246
def list_blobs(

google/cloud/storage/fileio.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
"timeout",
3636
"retry",
3737
"raw_download",
38+
"single_shot_download",
3839
}
3940

4041
# Valid keyword arguments for upload methods.
@@ -99,8 +100,9 @@ class BlobReader(io.BufferedIOBase):
99100
- ``if_metageneration_not_match``
100101
- ``timeout``
101102
- ``raw_download``
103+
- ``single_shot_download``
102104
103-
Note that download_kwargs (excluding ``raw_download``) are also applied to blob.reload(),
105+
Note that download_kwargs (excluding ``raw_download`` and ``single_shot_download``) are also applied to blob.reload(),
104106
if a reload is needed during seek().
105107
"""
106108

@@ -177,7 +179,9 @@ def seek(self, pos, whence=0):
177179

178180
if self._blob.size is None:
179181
reload_kwargs = {
180-
k: v for k, v in self._download_kwargs.items() if k != "raw_download"
182+
k: v
183+
for k, v in self._download_kwargs.items()
184+
if (k != "raw_download" and k != "single_shot_download")
181185
}
182186
self._blob.reload(**reload_kwargs)
183187

0 commit comments

Comments
 (0)