Skip to content

Commit 68f6774

Browse files
authored
Use HF_HUB_OFFLINE instead of HF_DATASETS_OFFLINE (#6968)
* Use HF_HUB_OFFLINE instead of HF_DATASETS_OFFLINE * style
1 parent 574791e commit 68f6774

File tree

8 files changed

+21
-18
lines changed

8 files changed

+21
-18
lines changed

docs/source/loading.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ For more details, check out the [how to load tabular datasets from Pandas DataFr
341341

342342
Even if you don't have an internet connection, it is still possible to load a dataset. As long as you've downloaded a dataset from the Hub repository before, it should be cached. This means you can reload the dataset from the cache and use it offline.
343343

344-
If you know you won't have internet access, you can run 🤗 Datasets in full offline mode. This saves time because instead of waiting for the Dataset builder download to time out, 🤗 Datasets will look directly in the cache. Set the environment variable `HF_DATASETS_OFFLINE` to `1` to enable full offline mode.
344+
If you know you won't have internet access, you can run 🤗 Datasets in full offline mode. This saves time because instead of waiting for the Dataset builder download to time out, 🤗 Datasets will look directly in the cache. Set the environment variable `HF_HUB_OFFLINE` to `1` to enable full offline mode.
345345

346346
## Slice splits
347347

src/datasets/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77
from typing import Optional
88

9+
from huggingface_hub import constants
910
from packaging import version
1011

1112

@@ -215,7 +216,9 @@
215216
PARQUET_ROW_GROUP_SIZE_FOR_BINARY_DATASETS = 100
216217

217218
# Offline mode
218-
HF_DATASETS_OFFLINE = os.environ.get("HF_DATASETS_OFFLINE", "AUTO").upper() in ENV_VARS_TRUE_VALUES
219+
_offline = os.environ.get("HF_DATASETS_OFFLINE")
220+
HF_HUB_OFFLINE = constants.HF_HUB_OFFLINE if _offline is None else _offline.upper() in ENV_VARS_TRUE_VALUES
221+
HF_DATASETS_OFFLINE = HF_HUB_OFFLINE # kept for backward-compatibility
219222

220223
# Here, `True` will disable progress bars globally without possibility of enabling it
221224
# programmatically. `False` will enable them without possibility of disabling them.

src/datasets/load.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def files_to_hash(file_paths: List[str]) -> str:
285285

286286
def increase_load_count(name: str, resource_type: str):
287287
"""Update the download count of a dataset or metric."""
288-
if not config.HF_DATASETS_OFFLINE and config.HF_UPDATE_DOWNLOAD_COUNTS:
288+
if not config.HF_HUB_OFFLINE and config.HF_UPDATE_DOWNLOAD_COUNTS:
289289
try:
290290
head_hf_s3(name, filename=name + ".py", dataset=(resource_type == "dataset"))
291291
except Exception:
@@ -1595,7 +1595,7 @@ def _get_modification_time(module_hash):
15951595
f"(last modified on {time.ctime(_get_modification_time(hash))}) since it "
15961596
f"couldn't be found locally at {self.name}"
15971597
)
1598-
if not config.HF_DATASETS_OFFLINE:
1598+
if not config.HF_HUB_OFFLINE:
15991599
warning_msg += ", or remotely on the Hugging Face Hub."
16001600
logger.warning(warning_msg)
16011601
importable_file_path = _get_importable_file_path(
@@ -1632,7 +1632,7 @@ def _get_modification_time(module_hash):
16321632
"dataset_name": self.name.split("/")[-1],
16331633
}
16341634
warning_msg = f"Using the latest cached version of the dataset since {self.name} couldn't be found on the Hugging Face Hub"
1635-
if config.HF_DATASETS_OFFLINE:
1635+
if config.HF_HUB_OFFLINE:
16361636
warning_msg += " (offline mode is enabled)."
16371637
logger.warning(warning_msg)
16381638
return DatasetModule(

src/datasets/utils/file_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,8 @@ class OfflineModeIsEnabled(ConnectionError):
299299

300300

301301
def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None):
302-
"""Raise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_DATASETS_OFFLINE is True."""
303-
if config.HF_DATASETS_OFFLINE:
302+
"""Raise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_HUB_OFFLINE is True."""
303+
if config.HF_HUB_OFFLINE:
304304
raise OfflineModeIsEnabled(
305305
"Offline mode is enabled." if msg is None else "Offline mode is enabled. " + str(msg)
306306
)
@@ -317,7 +317,7 @@ def _request_with_retry(
317317
) -> requests.Response:
318318
"""Wrapper around requests to retry in case it fails with a ConnectTimeout, with exponential backoff.
319319
320-
Note that if the environment variable HF_DATASETS_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.
320+
Note that if the environment variable HF_HUB_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.
321321
322322
Args:
323323
method (str): HTTP method, such as 'GET' or 'HEAD'.

tests/test_file_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,13 @@ def test_get_from_cache_fsspec(tmpfs_file):
143143
assert output_file_content == FILE_CONTENT
144144

145145

146-
@patch("datasets.config.HF_DATASETS_OFFLINE", True)
146+
@patch("datasets.config.HF_HUB_OFFLINE", True)
147147
def test_cached_path_offline():
148148
with pytest.raises(OfflineModeIsEnabled):
149149
cached_path("https://huggingface.co")
150150

151151

152-
@patch("datasets.config.HF_DATASETS_OFFLINE", True)
152+
@patch("datasets.config.HF_HUB_OFFLINE", True)
153153
def test_http_offline(tmp_path_factory):
154154
filename = tmp_path_factory.mktemp("data") / "file.html"
155155
with pytest.raises(OfflineModeIsEnabled):
@@ -158,7 +158,7 @@ def test_http_offline(tmp_path_factory):
158158
http_head("https://huggingface.co")
159159

160160

161-
@patch("datasets.config.HF_DATASETS_OFFLINE", True)
161+
@patch("datasets.config.HF_HUB_OFFLINE", True)
162162
def test_ftp_offline(tmp_path_factory):
163163
filename = tmp_path_factory.mktemp("data") / "file.html"
164164
with pytest.raises(OfflineModeIsEnabled):
@@ -167,7 +167,7 @@ def test_ftp_offline(tmp_path_factory):
167167
ftp_head("ftp://huggingface.co")
168168

169169

170-
@patch("datasets.config.HF_DATASETS_OFFLINE", True)
170+
@patch("datasets.config.HF_HUB_OFFLINE", True)
171171
def test_fsspec_offline(tmp_path_factory):
172172
filename = tmp_path_factory.mktemp("data") / "file.html"
173173
with pytest.raises(OfflineModeIsEnabled):

tests/test_load.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1076,7 +1076,7 @@ def test_load_dataset_from_hub(self):
10761076
with offline(offline_simulation_mode):
10771077
with self.assertRaises(ConnectionError) as context:
10781078
datasets.load_dataset("_dummy")
1079-
if offline_simulation_mode != OfflineSimulationMode.HF_DATASETS_OFFLINE_SET_TO_1:
1079+
if offline_simulation_mode != OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1:
10801080
self.assertIn(
10811081
"Couldn't reach '_dummy' on the Hub",
10821082
str(context.exception),

tests/test_offline_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@ def test_offline_with_connection_error():
2323

2424

2525
def test_offline_with_datasets_offline_mode_enabled():
26-
with offline(OfflineSimulationMode.HF_DATASETS_OFFLINE_SET_TO_1):
26+
with offline(OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1):
2727
with pytest.raises(ConnectionError):
2828
http_head("https://huggingface.co")

tests/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ class RequestWouldHangIndefinitelyError(Exception):
347347
class OfflineSimulationMode(Enum):
348348
CONNECTION_FAILS = 0
349349
CONNECTION_TIMES_OUT = 1
350-
HF_DATASETS_OFFLINE_SET_TO_1 = 2
350+
HF_HUB_OFFLINE_SET_TO_1 = 2
351351

352352

353353
@contextmanager
@@ -362,7 +362,7 @@ def offline(mode=OfflineSimulationMode.CONNECTION_FAILS, timeout=1e-16):
362362
CONNECTION_TIMES_OUT: the connection hangs until it times out.
363363
The default timeout value is low (1e-16) to speed up the tests.
364364
Timeout errors are created by mocking requests.request
365-
HF_DATASETS_OFFLINE_SET_TO_1: the HF_DATASETS_OFFLINE environment variable is set to 1.
365+
HF_HUB_OFFLINE_SET_TO_1: the HF_HUB_OFFLINE environment variable is set to 1.
366366
This makes the http/ftp calls of the library instantly fail and raise an OfflineModeEmabled error.
367367
"""
368368
online_request = requests.Session().request
@@ -395,8 +395,8 @@ def raise_connection_error(session, prepared_request, **kwargs):
395395
# inspired from https://stackoverflow.com/a/904609
396396
with patch("requests.Session.request", timeout_request):
397397
yield
398-
elif mode is OfflineSimulationMode.HF_DATASETS_OFFLINE_SET_TO_1:
399-
with patch("datasets.config.HF_DATASETS_OFFLINE", True):
398+
elif mode is OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1:
399+
with patch("datasets.config.HF_HUB_OFFLINE", True):
400400
yield
401401
else:
402402
raise ValueError("Please use a value from the OfflineSimulationMode enum.")

0 commit comments

Comments
 (0)