Skip to content

Commit 9b5ee43

Browse files
authored
fix(diff): do not show missing cache entries on diff result (#10845)
1 parent 6e22363 commit 9b5ee43

File tree

2 files changed

+80
-52
lines changed

2 files changed

+80
-52
lines changed

dvc/repo/diff.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,11 @@ def meta_cmp_key(meta):
4545
with_renames=True,
4646
meta_cmp_key=meta_cmp_key,
4747
roots=data_keys,
48-
# we need to get unknown and unchanged entries to tell whether
49-
# the object is missing from the cache or not.
50-
with_unknown=with_missing,
48+
# Include unknown entries from missing dir entry, so that they don't
49+
# get reported as added/modified/deleted.
50+
# Also return unchanged entries so that we can check if they are missing
51+
# from cache.
52+
with_unknown=True,
5153
with_unchanged=with_missing,
5254
):
5355
if (change.old and change.old.isdir and not change.old.hash_info) or (

tests/func/test_diff.py

Lines changed: 75 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33

44
import pytest
5-
from funcy import first
65

76
from dvc.exceptions import DvcException
87
from dvc.utils.fs import remove
@@ -96,6 +95,81 @@ def test_no_cache_entry(tmp_dir, scm, dvc):
9695
}
9796

9897

98+
def test_diff_no_cache(tmp_dir, scm, dvc):
99+
(stage,) = tmp_dir.dvc_gen({"dir": {"file": "file content"}}, commit="first")
100+
scm.tag("v1")
101+
dvc.cache.local.clear()
102+
old_digest = stage.outs[0].hash_info.value
103+
dir_path = os.path.join("dir", "")
104+
105+
default_result = {
106+
"added": [],
107+
"deleted": [],
108+
"modified": [],
109+
"renamed": [],
110+
"not in cache": [],
111+
}
112+
113+
assert dvc.diff("v1") == default_result | {
114+
"not in cache": [{"path": dir_path, "hash": old_digest}],
115+
}
116+
assert dvc.diff("HEAD", "v1") == {}
117+
assert dvc.diff("v1", "HEAD") == {}
118+
119+
(stage,) = tmp_dir.dvc_gen(
120+
{"dir": {"file": "modified file content"}}, commit="first"
121+
)
122+
scm.tag("v2")
123+
new_digest = stage.outs[0].hash_info.value
124+
125+
assert dvc.diff("v2") == {}
126+
assert dvc.diff("v1") == default_result | {
127+
"modified": [
128+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
129+
],
130+
"not in cache": [{"path": dir_path, "hash": old_digest}],
131+
}
132+
assert dvc.diff("v1", "v2") == default_result | {
133+
"modified": [
134+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
135+
],
136+
}
137+
138+
remove(dvc.cache.local.path)
139+
# drop the cache so that we can test as if we don't know what entries are
140+
# in the missing cache entry.
141+
dvc.drop_data_index()
142+
143+
assert dvc.diff("v2") == default_result | {
144+
"not in cache": [{"path": dir_path, "hash": new_digest}],
145+
}
146+
assert dvc.diff("v1") == default_result | {
147+
"modified": [
148+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
149+
],
150+
"not in cache": [{"path": dir_path, "hash": old_digest}],
151+
}
152+
assert dvc.diff("v2", "v1") == default_result | {
153+
"modified": [
154+
{"path": dir_path, "hash": {"old": new_digest, "new": old_digest}}
155+
],
156+
}
157+
assert dvc.diff("v1", "v2") == default_result | {
158+
"modified": [
159+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
160+
],
161+
}
162+
assert dvc.diff() == default_result | {
163+
"not in cache": [{"path": dir_path, "hash": new_digest}],
164+
}
165+
166+
remove(str(tmp_dir / "dir"))
167+
assert dvc.diff() == default_result | {
168+
"deleted": [{"path": dir_path, "hash": new_digest}],
169+
"not in cache": [{"path": dir_path, "hash": new_digest}],
170+
}
171+
172+
99173
@pytest.mark.parametrize("delete_data", [True, False])
100174
def test_deleted(tmp_dir, scm, dvc, delete_data):
101175
tmp_dir.dvc_gen("file", "text", commit="add file")
@@ -256,54 +330,6 @@ def test_directories(tmp_dir, scm, dvc):
256330
}
257331

258332

259-
def test_diff_no_cache(tmp_dir, scm, dvc):
260-
tmp_dir.dvc_gen({"dir": {"file": "file content"}}, commit="first")
261-
scm.tag("v1")
262-
263-
tmp_dir.dvc_gen({"dir": {"file": "modified file content"}}, commit="second")
264-
scm.tag("v2")
265-
266-
remove(dvc.cache.local.path)
267-
268-
diff = dvc.diff("v1")
269-
assert diff["added"] == []
270-
assert diff["deleted"] == []
271-
assert first(diff["modified"])["path"] == os.path.join("dir", "")
272-
assert diff["renamed"] == []
273-
assert diff["not in cache"] == [
274-
{
275-
"hash": "61d9c7f006e1ae7138fec5e574676ee2.dir",
276-
"path": os.path.join("dir", ""),
277-
}
278-
]
279-
280-
diff = dvc.diff("v1", "v2")
281-
assert diff["added"] == []
282-
assert diff["deleted"] == []
283-
assert diff["renamed"] == []
284-
assert first(diff["modified"])["path"] == os.path.join("dir", "")
285-
assert diff["not in cache"] == []
286-
287-
(tmp_dir / "dir" / "file").unlink()
288-
remove(str(tmp_dir / "dir"))
289-
diff = dvc.diff()
290-
assert diff["added"] == []
291-
assert diff["deleted"] == [
292-
{
293-
"path": os.path.join("dir", ""),
294-
"hash": "f0f7a307d223921557c929f944bf5303.dir",
295-
}
296-
]
297-
assert diff["renamed"] == []
298-
assert diff["modified"] == []
299-
assert diff["not in cache"] == [
300-
{
301-
"path": os.path.join("dir", ""),
302-
"hash": "f0f7a307d223921557c929f944bf5303.dir",
303-
}
304-
]
305-
306-
307333
def test_diff_dirty(tmp_dir, scm, dvc):
308334
tmp_dir.dvc_gen(
309335
{"file": "file_content", "dir": {"dir_file1": "dir file content"}},

0 commit comments

Comments
 (0)