Skip to content

Commit e3571cd

Browse files
committed
fix: handle missing .dir file from cache on not_in_remote checks
1 parent fade6c5 commit e3571cd

File tree

3 files changed

+62
-3
lines changed

3 files changed

+62
-3
lines changed

dvc/repo/data.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from dvc.fs.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback
88
from dvc.log import logger
99
from dvc.ui import ui
10+
from dvc_data.index import DataIndexDirError
1011

1112
if TYPE_CHECKING:
1213
from dvc.repo import Repo
@@ -328,10 +329,21 @@ def _get_entries_not_in_remote(
328329
from dvc.repo.worktree import worktree_view
329330
from dvc_data.index import StorageKeyError
330331

332+
entries: dict[DataIndexKey, DataIndexEntry] = {}
333+
334+
def _onerror(entry, exc):
335+
if not isinstance(exc, DataIndexDirError):
336+
raise exc
337+
# We don't have the contents of this dir file, so we will only check this key.
338+
entries[entry.key] = entry
339+
331340
# View into the index, with only pushable entries
332341
index = worktree_view(repo.index, push=True)
333342
data_index = index.data["repo"]
334343

344+
orig_data_index_onerror = data_index.onerror
345+
data_index.onerror = _onerror
346+
335347
view = filter_index(data_index, filter_keys=filter_keys) # type: ignore[arg-type]
336348

337349
missing_entries = []
@@ -340,7 +352,6 @@ def _get_entries_not_in_remote(
340352

341353
n = 0
342354
with TqdmCallback(size=n, desc="Checking remote", unit="entry") as cb:
343-
entries: dict[DataIndexKey, DataIndexEntry] = {}
344355
for key, entry in view.iteritems(shallow=not granular):
345356
if not (entry and entry.hash_info):
346357
continue
@@ -368,6 +379,7 @@ def _get_entries_not_in_remote(
368379
pass
369380
finally:
370381
cb.relative_update()
382+
data_index.onerror = orig_data_index_onerror
371383
return missing_entries
372384

373385

dvc/repo/fetch.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
if TYPE_CHECKING:
1212
from dvc.output import Output
13+
from dvc.repo import Repo
1314
from dvc.stage import Stage
1415

1516
logger = logger.getChild(__name__)
@@ -24,7 +25,7 @@ def _onerror(entry, exc):
2425

2526

2627
def _collect_indexes( # noqa: PLR0913
27-
repo,
28+
repo: "Repo",
2829
targets=None,
2930
remote=None,
3031
all_branches=False,
@@ -94,7 +95,7 @@ def outs_filter(out: "Output") -> bool:
9495

9596
@locked
9697
def fetch( # noqa: PLR0913
97-
self,
98+
self: "Repo",
9899
targets=None,
99100
jobs=None,
100101
remote=None,

tests/func/test_data_status.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,3 +918,49 @@ def test_compat_legacy_new_cache_types(M, tmp_dir, dvc, scm):
918918
"committed": {"added": M.unordered("foo", "bar")},
919919
"git": M.dict(),
920920
}
921+
922+
923+
def test_missing_cache_remote_check(M, tmp_dir, dvc, scm, local_remote):
924+
tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
925+
tmp_dir.dvc_gen("foobar", "foobar")
926+
remove(dvc.cache.repo.path)
927+
928+
assert dvc.data_status(untracked_files="all", not_in_remote=True) == {
929+
**EMPTY_STATUS,
930+
"untracked": M.unordered("foobar.dvc", "dir.dvc", ".gitignore"),
931+
"committed": {"added": M.unordered("foobar", join("dir", ""))},
932+
"not_in_cache": M.unordered("foobar", join("dir", "")),
933+
"git": M.dict(),
934+
"not_in_remote": M.unordered("foobar", join("dir", "")),
935+
}
936+
937+
assert dvc.data_status(
938+
granular=True, untracked_files="all", not_in_remote=True
939+
) == {
940+
**EMPTY_STATUS,
941+
"untracked": M.unordered("foobar.dvc", "dir.dvc", ".gitignore"),
942+
"committed": {"added": M.unordered("foobar", join("dir", ""))},
943+
"uncommitted": {"unknown": M.unordered(join("dir", "foo"), join("dir", "bar"))},
944+
"not_in_cache": M.unordered("foobar", join("dir", "")),
945+
"git": M.dict(),
946+
"not_in_remote": M.unordered("foobar", join("dir", "")),
947+
}
948+
949+
assert dvc.data_status(["dir"], untracked_files="all", not_in_remote=True) == {
950+
**EMPTY_STATUS,
951+
"committed": {"added": [join("dir", "")]},
952+
"not_in_cache": [join("dir", "")],
953+
"git": M.dict(),
954+
"not_in_remote": [join("dir", "")],
955+
}
956+
957+
assert dvc.data_status(
958+
["dir"], untracked_files="all", not_in_remote=True, granular=True
959+
) == {
960+
**EMPTY_STATUS,
961+
"committed": {"added": [join("dir", "")]},
962+
"uncommitted": {"unknown": M.unordered(join("dir", "foo"), join("dir", "bar"))},
963+
"not_in_cache": [join("dir", "")],
964+
"git": M.dict(),
965+
"not_in_remote": [join("dir", "")],
966+
}

0 commit comments

Comments
 (0)