Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/datasets/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@
)
TIME_OUT_REMOTE_CODE = 15

# Datasets-server
# Dataset viewer API
USE_PARQUET_EXPORT = True

# Batch size constants. For more info, see:
Expand Down
12 changes: 6 additions & 6 deletions src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
_hash_python_lines,
)
from .splits import Split
from .utils import _datasets_server
from .utils import _dataset_viewer
from .utils.deprecation_utils import deprecated
from .utils.file_utils import (
OfflineModeIsEnabled,
Expand Down Expand Up @@ -1236,7 +1236,7 @@ def get_module(self) -> DatasetModule:
metadata_configs = MetadataConfigs.from_dataset_card_data(dataset_card_data)
dataset_infos = DatasetInfosDict.from_dataset_card_data(dataset_card_data)
try:
exported_dataset_infos = _datasets_server.get_exported_dataset_infos(
exported_dataset_infos = _dataset_viewer.get_exported_dataset_infos(
dataset=self.name, revision=self.revision, token=self.download_config.token
)
exported_dataset_infos = DatasetInfosDict(
Expand All @@ -1245,7 +1245,7 @@ def get_module(self) -> DatasetModule:
for config_name in exported_dataset_infos
}
)
except _datasets_server.DatasetsServerError:
except _dataset_viewer.DatasetViewerError:
exported_dataset_infos = None
if exported_dataset_infos:
exported_dataset_infos.update(dataset_infos)
Expand Down Expand Up @@ -1372,10 +1372,10 @@ def __init__(
increase_load_count(name, resource_type="dataset")

def get_module(self) -> DatasetModule:
exported_parquet_files = _datasets_server.get_exported_parquet_files(
exported_parquet_files = _dataset_viewer.get_exported_parquet_files(
dataset=self.name, revision=self.revision, token=self.download_config.token
)
exported_dataset_infos = _datasets_server.get_exported_dataset_infos(
exported_dataset_infos = _dataset_viewer.get_exported_dataset_infos(
dataset=self.name, revision=self.revision, token=self.download_config.token
)
dataset_infos = DatasetInfosDict(
Expand Down Expand Up @@ -1864,7 +1864,7 @@ def dataset_module_factory(
return HubDatasetModuleFactoryWithParquetExport(
path, download_config=download_config, revision=dataset_info.sha
).get_module()
except _datasets_server.DatasetsServerError:
except _dataset_viewer.DatasetViewerError:
pass
# Otherwise we must use the dataset script if the user trusts it
return HubDatasetModuleFactoryWithScript(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
logger = get_logger(__name__)


class DatasetsServerError(DatasetsError):
"""Dataset-server error.
class DatasetViewerError(DatasetsError):
"""Dataset viewer error.

Raised when trying to use the Datasets-server HTTP API and when trying to access:
Raised when trying to use the dataset viewer HTTP API and when trying to access:
- a missing dataset, or
- a private/gated dataset and the user is not authenticated.
- unavailable /parquet or /info responses
Expand All @@ -27,10 +27,10 @@ def get_exported_parquet_files(dataset: str, revision: str, token: Optional[Unio
Get the dataset exported parquet files
Docs: https://huggingface.co/docs/datasets-server/parquet
"""
datasets_server_parquet_url = config.HF_ENDPOINT.replace("://", "://datasets-server.") + "/parquet?dataset="
dataset_viewer_parquet_url = config.HF_ENDPOINT.replace("://", "://datasets-server.") + "/parquet?dataset="
try:
parquet_data_files_response = http_get(
url=datasets_server_parquet_url + dataset,
url=dataset_viewer_parquet_url + dataset,
temp_file=None,
headers=get_authentication_headers_for_url(config.HF_ENDPOINT + f"datasets/{dataset}", token=token),
timeout=100.0,
Expand All @@ -53,9 +53,9 @@ def get_exported_parquet_files(dataset: str, revision: str, token: Optional[Unio
logger.debug(
f"Parquet export for {dataset} is available but outdated (revision='{parquet_data_files_response.headers['X-Revision']}')"
)
except Exception as e: # noqa catch any exception of the datasets-server and consider the parquet export doesn't exist
except Exception as e: # noqa catch any exception of the dataset viewer API and consider the parquet export doesn't exist
logger.debug(f"No parquet export for {dataset} available ({type(e).__name__}: {e})")
raise DatasetsServerError("No exported Parquet files available.")
raise DatasetViewerError("No exported Parquet files available.")


def get_exported_dataset_infos(
Expand All @@ -65,10 +65,10 @@ def get_exported_dataset_infos(
Get the dataset information, can be useful to get e.g. the dataset features.
Docs: https://huggingface.co/docs/datasets-server/info
"""
datasets_server_info_url = config.HF_ENDPOINT.replace("://", "://datasets-server.") + "/info?dataset="
dataset_viewer_info_url = config.HF_ENDPOINT.replace("://", "://datasets-server.") + "/info?dataset="
try:
info_response = http_get(
url=datasets_server_info_url + dataset,
url=dataset_viewer_info_url + dataset,
temp_file=None,
headers=get_authentication_headers_for_url(config.HF_ENDPOINT + f"datasets/{dataset}", token=token),
timeout=100.0,
Expand All @@ -91,6 +91,6 @@ def get_exported_dataset_infos(
logger.debug(
f"Dataset info for {dataset} is available but outdated (revision='{info_response.headers['X-Revision']}')"
)
except Exception as e: # noqa catch any exception of the datasets-server and consider the dataset info doesn't exist
except Exception as e: # noqa catch any exception of the dataset viewer API and consider the dataset info doesn't exist
logger.debug(f"No dataset info for {dataset} available ({type(e).__name__}: {e})")
raise DatasetsServerError("No exported dataset infos available.")
raise DatasetViewerError("No exported dataset infos available.")
4 changes: 2 additions & 2 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from datasets.packaged_modules.audiofolder.audiofolder import AudioFolder, AudioFolderConfig
from datasets.packaged_modules.imagefolder.imagefolder import ImageFolder, ImageFolderConfig
from datasets.packaged_modules.parquet.parquet import ParquetConfig
from datasets.utils import _datasets_server
from datasets.utils import _dataset_viewer
from datasets.utils.logging import INFO, get_logger

from .utils import (
Expand Down Expand Up @@ -862,7 +862,7 @@ def test_HubDatasetModuleFactoryWithParquetExport_errors_on_wrong_sha(self):
download_config=self.download_config,
revision="wrong_sha",
)
with self.assertRaises(_datasets_server.DatasetsServerError):
with self.assertRaises(_dataset_viewer.DatasetViewerError):
factory.get_module()

@pytest.mark.integration
Expand Down