Skip to content

Commit 5cd4032

Browse files
authored
Use new huggingface_hub tools for download models (#18438)
* Draft new cached_file * Initial draft for config and model * Small fixes * Fix first batch of tests * Look in cache when internet is down * Fix last tests * Bad black, not fixing all quality errors * Make diff less * Implement change for TF and Flax models * Add tokenizer and feature extractor * For compatibility with main * Add utils to move the cache and auto-do it at first use. * Quality * Deal with empty commit shas * Deal with empty etag * Address review comments
1 parent 70fa1a8 commit 5cd4032

13 files changed

+662
-545
lines changed

src/transformers/configuration_utils.py

Lines changed: 34 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,9 @@
2525

2626
from packaging import version
2727

28-
from requests import HTTPError
29-
3028
from . import __version__
3129
from .dynamic_module_utils import custom_object_save
32-
from .utils import (
33-
CONFIG_NAME,
34-
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
35-
EntryNotFoundError,
36-
PushToHubMixin,
37-
RepositoryNotFoundError,
38-
RevisionNotFoundError,
39-
cached_path,
40-
copy_func,
41-
hf_bucket_url,
42-
is_offline_mode,
43-
is_remote_url,
44-
is_torch_available,
45-
logging,
46-
)
30+
from .utils import CONFIG_NAME, PushToHubMixin, cached_file, copy_func, is_torch_available, logging
4731

4832

4933
logger = logging.get_logger(__name__)
@@ -591,77 +575,43 @@ def _get_config_dict(
591575
if from_pipeline is not None:
592576
user_agent["using_pipeline"] = from_pipeline
593577

594-
if is_offline_mode() and not local_files_only:
595-
logger.info("Offline mode: forcing local_files_only=True")
596-
local_files_only = True
597-
598578
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
599-
if os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)) or is_remote_url(
600-
pretrained_model_name_or_path
601-
):
602-
config_file = pretrained_model_name_or_path
579+
580+
is_local = os.path.isdir(pretrained_model_name_or_path)
581+
if os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)):
582+
# Soecial case when pretrained_model_name_or_path is a local file
583+
resolved_config_file = pretrained_model_name_or_path
584+
is_local = True
603585
else:
604586
configuration_file = kwargs.pop("_configuration_file", CONFIG_NAME)
605587

606-
if os.path.isdir(os.path.join(pretrained_model_name_or_path, subfolder)):
607-
config_file = os.path.join(pretrained_model_name_or_path, subfolder, configuration_file)
608-
else:
609-
config_file = hf_bucket_url(
588+
try:
589+
# Load from local folder or from cache or download from model Hub and cache
590+
resolved_config_file = cached_file(
610591
pretrained_model_name_or_path,
611-
filename=configuration_file,
592+
configuration_file,
593+
cache_dir=cache_dir,
594+
force_download=force_download,
595+
proxies=proxies,
596+
resume_download=resume_download,
597+
local_files_only=local_files_only,
598+
use_auth_token=use_auth_token,
599+
user_agent=user_agent,
612600
revision=revision,
613-
subfolder=subfolder if len(subfolder) > 0 else None,
614-
mirror=None,
601+
subfolder=subfolder,
602+
)
603+
except EnvironmentError:
604+
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
605+
# the original exception.
606+
raise
607+
except Exception:
608+
# For any other exception, we throw a generic error.
609+
raise EnvironmentError(
610+
f"Can't load the configuration of '{pretrained_model_name_or_path}'. If you were trying to load it"
611+
" from 'https://huggingface.co/models', make sure you don't have a local directory with the same"
612+
f" name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory"
613+
f" containing a {configuration_file} file"
615614
)
616-
617-
try:
618-
# Load from URL or cache if already cached
619-
resolved_config_file = cached_path(
620-
config_file,
621-
cache_dir=cache_dir,
622-
force_download=force_download,
623-
proxies=proxies,
624-
resume_download=resume_download,
625-
local_files_only=local_files_only,
626-
use_auth_token=use_auth_token,
627-
user_agent=user_agent,
628-
)
629-
630-
except RepositoryNotFoundError:
631-
raise EnvironmentError(
632-
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
633-
"'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
634-
"permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
635-
"`use_auth_token=True`."
636-
)
637-
except RevisionNotFoundError:
638-
raise EnvironmentError(
639-
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
640-
f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
641-
"available revisions."
642-
)
643-
except EntryNotFoundError:
644-
raise EnvironmentError(
645-
f"{pretrained_model_name_or_path} does not appear to have a file named {configuration_file}."
646-
)
647-
except HTTPError as err:
648-
raise EnvironmentError(
649-
f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n{err}"
650-
)
651-
except ValueError:
652-
raise EnvironmentError(
653-
f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in"
654-
f" the cached files and it looks like {pretrained_model_name_or_path} is not the path to a directory"
655-
f" containing a {configuration_file} file.\nCheckout your internet connection or see how to run the"
656-
" library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'."
657-
)
658-
except EnvironmentError:
659-
raise EnvironmentError(
660-
f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
661-
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
662-
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
663-
f"containing a {configuration_file} file"
664-
)
665615

666616
try:
667617
# Load config dict
@@ -671,10 +621,10 @@ def _get_config_dict(
671621
f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file."
672622
)
673623

674-
if resolved_config_file == config_file:
675-
logger.info(f"loading configuration file {config_file}")
624+
if is_local:
625+
logger.info(f"loading configuration file {resolved_config_file}")
676626
else:
677-
logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}")
627+
logger.info(f"loading configuration file {configuration_file} from cache at {resolved_config_file}")
678628

679629
return config_dict, kwargs
680630

src/transformers/feature_extraction_utils.py

Lines changed: 35 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,15 @@
2424

2525
import numpy as np
2626

27-
from requests import HTTPError
28-
2927
from .dynamic_module_utils import custom_object_save
3028
from .utils import (
3129
FEATURE_EXTRACTOR_NAME,
32-
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
33-
EntryNotFoundError,
3430
PushToHubMixin,
35-
RepositoryNotFoundError,
36-
RevisionNotFoundError,
3731
TensorType,
38-
cached_path,
32+
cached_file,
3933
copy_func,
40-
hf_bucket_url,
4134
is_flax_available,
4235
is_offline_mode,
43-
is_remote_url,
4436
is_tf_available,
4537
is_torch_available,
4638
logging,
@@ -388,64 +380,40 @@ def get_feature_extractor_dict(
388380
local_files_only = True
389381

390382
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
383+
is_local = os.path.isdir(pretrained_model_name_or_path)
391384
if os.path.isdir(pretrained_model_name_or_path):
392385
feature_extractor_file = os.path.join(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME)
393-
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
394-
feature_extractor_file = pretrained_model_name_or_path
386+
if os.path.isfile(pretrained_model_name_or_path):
387+
resolved_feature_extractor_file = pretrained_model_name_or_path
388+
is_local = True
395389
else:
396-
feature_extractor_file = hf_bucket_url(
397-
pretrained_model_name_or_path, filename=FEATURE_EXTRACTOR_NAME, revision=revision, mirror=None
398-
)
399-
400-
try:
401-
# Load from URL or cache if already cached
402-
resolved_feature_extractor_file = cached_path(
403-
feature_extractor_file,
404-
cache_dir=cache_dir,
405-
force_download=force_download,
406-
proxies=proxies,
407-
resume_download=resume_download,
408-
local_files_only=local_files_only,
409-
use_auth_token=use_auth_token,
410-
user_agent=user_agent,
411-
)
412-
413-
except RepositoryNotFoundError:
414-
raise EnvironmentError(
415-
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
416-
"'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
417-
"permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
418-
"`use_auth_token=True`."
419-
)
420-
except RevisionNotFoundError:
421-
raise EnvironmentError(
422-
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
423-
f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
424-
"available revisions."
425-
)
426-
except EntryNotFoundError:
427-
raise EnvironmentError(
428-
f"{pretrained_model_name_or_path} does not appear to have a file named {FEATURE_EXTRACTOR_NAME}."
429-
)
430-
except HTTPError as err:
431-
raise EnvironmentError(
432-
f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n{err}"
433-
)
434-
except ValueError:
435-
raise EnvironmentError(
436-
f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in"
437-
f" the cached files and it looks like {pretrained_model_name_or_path} is not the path to a directory"
438-
f" containing a {FEATURE_EXTRACTOR_NAME} file.\nCheckout your internet connection or see how to run"
439-
" the library in offline mode at"
440-
" 'https://huggingface.co/docs/transformers/installation#offline-mode'."
441-
)
442-
except EnvironmentError:
443-
raise EnvironmentError(
444-
f"Can't load feature extractor for '{pretrained_model_name_or_path}'. If you were trying to load it "
445-
"from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
446-
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
447-
f"containing a {FEATURE_EXTRACTOR_NAME} file"
448-
)
390+
feature_extractor_file = FEATURE_EXTRACTOR_NAME
391+
try:
392+
# Load from local folder or from cache or download from model Hub and cache
393+
resolved_feature_extractor_file = cached_file(
394+
pretrained_model_name_or_path,
395+
feature_extractor_file,
396+
cache_dir=cache_dir,
397+
force_download=force_download,
398+
proxies=proxies,
399+
resume_download=resume_download,
400+
local_files_only=local_files_only,
401+
use_auth_token=use_auth_token,
402+
user_agent=user_agent,
403+
revision=revision,
404+
)
405+
except EnvironmentError:
406+
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
407+
# the original exception.
408+
raise
409+
except Exception:
410+
# For any other exception, we throw a generic error.
411+
raise EnvironmentError(
412+
f"Can't load feature extractor for '{pretrained_model_name_or_path}'. If you were trying to load"
413+
" it from 'https://huggingface.co/models', make sure you don't have a local directory with the"
414+
f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a"
415+
f" directory containing a {FEATURE_EXTRACTOR_NAME} file"
416+
)
449417

450418
try:
451419
# Load feature_extractor dict
@@ -458,12 +426,11 @@ def get_feature_extractor_dict(
458426
f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file."
459427
)
460428

461-
if resolved_feature_extractor_file == feature_extractor_file:
462-
logger.info(f"loading feature extractor configuration file {feature_extractor_file}")
429+
if is_local:
430+
logger.info(f"loading configuration file {resolved_feature_extractor_file}")
463431
else:
464432
logger.info(
465-
f"loading feature extractor configuration file {feature_extractor_file} from cache at"
466-
f" {resolved_feature_extractor_file}"
433+
f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}"
467434
)
468435

469436
return feature_extractor_dict, kwargs

0 commit comments

Comments
 (0)