Skip to content

Commit ac2b091

Browse files
committed
ls-url: add support for --tree/--level
Similar to `ls` command.
1 parent a43c1b3 commit ac2b091

File tree

7 files changed

+188
-28
lines changed

7 files changed

+188
-28
lines changed

dvc/commands/ls/__init__.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,7 @@ def _build_tree_structure(
7777

7878
num_entries = len(entries)
7979
for i, (name, entry) in enumerate(entries.items()):
80-
# show full path for root, otherwise only show the name
81-
if _depth > 0:
82-
entry["path"] = name
83-
80+
entry["path"] = name
8481
is_last = i >= num_entries - 1
8582
tree_part = ""
8683
if _depth > 0:

dvc/commands/ls_url.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,45 @@
33
from dvc.cli.utils import DictAction, append_doc_link
44
from dvc.log import logger
55

6-
from .ls import show_entries
6+
from .ls import show_entries, show_tree
77

88
logger = logger.getChild(__name__)
99

1010

1111
class CmdListUrl(CmdBaseNoRepo):
12-
def run(self):
13-
from dvc.config import Config
12+
def _show_tree(self, config):
13+
from dvc.fs import parse_external_url
14+
from dvc.repo.ls import _ls_tree
15+
16+
fs, fs_path = parse_external_url(
17+
self.args.url, fs_config=self.args.fs_config, config=config
18+
)
19+
entries = _ls_tree(fs, fs_path, maxdepth=self.args.level)
20+
show_tree(entries, with_color=True, with_size=self.args.size)
21+
return 0
22+
23+
def _show_list(self, config):
1424
from dvc.repo import Repo
1525

1626
entries = Repo.ls_url(
1727
self.args.url,
1828
recursive=self.args.recursive,
29+
maxdepth=self.args.level,
1930
fs_config=self.args.fs_config,
20-
config=Config.from_cwd(),
31+
config=config,
2132
)
2233
if entries:
2334
show_entries(entries, with_color=True, with_size=self.args.size)
2435
return 0
2536

37+
def run(self):
38+
from dvc.config import Config
39+
40+
config = Config.from_cwd()
41+
if self.args.tree:
42+
return self._show_tree(config=config)
43+
return self._show_list(config=config)
44+
2645

2746
def add_parser(subparsers, parent_parser):
2847
LS_HELP = "List directory contents from URL."
@@ -40,6 +59,19 @@ def add_parser(subparsers, parent_parser):
4059
lsurl_parser.add_argument(
4160
"-R", "--recursive", action="store_true", help="Recursively list files."
4261
)
62+
lsurl_parser.add_argument(
63+
"-T",
64+
"--tree",
65+
action="store_true",
66+
help="Recurse into directories as a tree.",
67+
)
68+
lsurl_parser.add_argument(
69+
"-L",
70+
"--level",
71+
metavar="depth",
72+
type=int,
73+
help="Limit the depth of recursion.",
74+
)
4375
lsurl_parser.add_argument("--size", action="store_true", help="Show sizes.")
4476
lsurl_parser.add_argument(
4577
"--fs-config",

dvc/repo/ls.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ def ls_tree(
9999
path = path or ""
100100
fs: DVCFileSystem = repo.dvcfs
101101
fs_path = fs.from_os_path(path)
102-
return _ls_tree(fs, fs_path, dvc_only, maxdepth)
102+
return _ls_tree(
103+
fs, fs_path, maxdepth=maxdepth, dvc_only=dvc_only, dvcfiles=True
104+
)
103105

104106

105107
def _ls(
@@ -145,27 +147,34 @@ def _ls(
145147
return ret_list
146148

147149

148-
def _ls_tree(
149-
fs, path, dvc_only: bool = False, maxdepth: Optional[int] = None, _info=None
150-
):
151-
ret = {}
150+
def _ls_tree(fs, path, maxdepth=None, _info=None, **fs_kwargs):
152151
info = _info or fs.info(path)
152+
if _info is None:
153+
# preserve the original path name
154+
name = path
155+
if not name:
156+
name = os.curdir if fs.protocol == "local" else fs.root_marker
157+
path = info["name"]
158+
else:
159+
name = path.rsplit(fs.sep, 1)[-1]
153160

154-
path = info["name"].rstrip(fs.sep) or os.curdir
155-
name = path.rsplit("/", 1)[-1]
161+
ret = {}
156162
ls_info = _adapt_info(info)
157163
ls_info["path"] = path
158164

159165
recurse = maxdepth is None or maxdepth > 0
160166
if recurse and info["type"] == "directory":
161-
infos = fs.ls(path, dvcfiles=True, dvc_only=dvc_only, detail=True)
167+
try:
168+
infos = fs.ls(path, detail=True, **fs_kwargs)
169+
except FileNotFoundError:
170+
# broken symlink?
171+
infos = []
172+
162173
infos.sort(key=lambda f: f["name"])
163174
maxdepth = maxdepth - 1 if maxdepth is not None else None
164175
contents = {}
165176
for info in infos:
166-
d = _ls_tree(
167-
fs, info["name"], dvc_only=dvc_only, maxdepth=maxdepth, _info=info
168-
)
177+
d = _ls_tree(fs, info["name"], maxdepth=maxdepth, _info=info, **fs_kwargs)
169178
contents.update(d)
170179
ls_info["contents"] = contents
171180

dvc/repo/ls_url.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
1+
from fsspec.implementations.local import LocalFileSystem as _LocalFileSystem
2+
13
from dvc.exceptions import URLMissingError
2-
from dvc.fs import parse_external_url
4+
from dvc.fs import LocalFileSystem, parse_external_url
35

46

5-
def ls_url(url, *, fs_config=None, recursive=False, config=None):
7+
def ls_url(url, *, fs_config=None, recursive=False, maxdepth=None, config=None):
68
fs, fs_path = parse_external_url(url, fs_config=fs_config, config=config)
79
try:
810
info = fs.info(fs_path)
911
except FileNotFoundError as exc:
1012
raise URLMissingError(url) from exc
11-
if info["type"] != "directory":
13+
if maxdepth == 0 or info["type"] != "directory":
1214
return [{"path": info["name"], "isdir": False}]
1315

16+
if isinstance(fs, LocalFileSystem):
17+
# dvc's LocalFileSystem does not support maxdepth yet
18+
walk = _LocalFileSystem().walk
19+
else:
20+
walk = fs.walk
21+
1422
ret = []
15-
for _, dirs, files in fs.walk(fs_path, detail=True):
16-
if not recursive:
23+
for root, dirs, files in walk(fs_path, detail=True, maxdepth=maxdepth):
24+
parts = fs.relparts(root, fs_path)
25+
if parts == (".",):
26+
parts = ()
27+
if not recursive or (maxdepth and len(parts) >= maxdepth - 1):
1728
files.update(dirs)
1829

1930
for info in files.values():

dvc/testing/workspace_tests.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,39 @@ def test_recursive(self, cloud):
225225
],
226226
)
227227

228+
result = ls_url(
229+
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=0
230+
)
231+
match_files(
232+
fs,
233+
result,
234+
[{"path": str(cloud / "dir"), "isdir": False}],
235+
)
236+
237+
result = ls_url(
238+
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=1
239+
)
240+
match_files(
241+
fs,
242+
result,
243+
[
244+
{"path": "foo", "isdir": False},
245+
{"path": "subdir", "isdir": True},
246+
],
247+
)
248+
249+
result = ls_url(
250+
str(cloud / "dir"), fs_config=cloud.config, recursive=True, maxdepth=2
251+
)
252+
match_files(
253+
fs,
254+
result,
255+
[
256+
{"path": "foo", "isdir": False},
257+
{"path": "subdir/bar", "isdir": False},
258+
],
259+
)
260+
228261
def test_nonexistent(self, cloud):
229262
with pytest.raises(URLMissingError):
230263
ls_url(str(cloud / "dir"), fs_config=cloud.config)

tests/func/test_ls.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66

77
import pytest
88

9+
from dvc.fs import MemoryFileSystem
910
from dvc.repo import Repo
10-
from dvc.repo.ls import ls_tree
11+
from dvc.repo.ls import _ls_tree, ls_tree
1112
from dvc.scm import CloneError
1213

1314
FS_STRUCTURE = {
@@ -998,3 +999,64 @@ def test_ls_tree_maxdepth(M, tmp_dir, scm, dvc):
998999
"structure.xml.dvc": None,
9991000
}
10001001
}
1002+
1003+
1004+
def test_fs_ls_tree():
1005+
fs = MemoryFileSystem(global_store=False)
1006+
fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})
1007+
root = fs.root_marker
1008+
1009+
files = _ls_tree(fs, "README.md")
1010+
assert _simplify_tree(files) == {"README.md": None}
1011+
files = _ls_tree(fs, root)
1012+
expected = {
1013+
root: {
1014+
".gitignore": None,
1015+
"README.md": None,
1016+
"model": {
1017+
"script.py": None,
1018+
"train.py": None,
1019+
},
1020+
}
1021+
}
1022+
assert _simplify_tree(files) == expected
1023+
1024+
files = _ls_tree(fs, "model")
1025+
assert _simplify_tree(files) == {
1026+
"model": {
1027+
"script.py": None,
1028+
"train.py": None,
1029+
}
1030+
}
1031+
1032+
1033+
def test_fs_ls_tree_maxdepth():
1034+
fs = MemoryFileSystem(global_store=False)
1035+
fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})
1036+
1037+
files = _ls_tree(fs, "/", maxdepth=0)
1038+
assert _simplify_tree(files) == {"/": None}
1039+
1040+
files = _ls_tree(fs, "/", maxdepth=1)
1041+
assert _simplify_tree(files) == {
1042+
"/": {
1043+
".gitignore": None,
1044+
"README.md": None,
1045+
"model": None,
1046+
}
1047+
}
1048+
1049+
files = _ls_tree(fs, "/", maxdepth=2)
1050+
assert _simplify_tree(files) == {
1051+
"/": {
1052+
".gitignore": None,
1053+
"README.md": None,
1054+
"model": {
1055+
"script.py": None,
1056+
"train.py": None,
1057+
},
1058+
}
1059+
}
1060+
1061+
files = _ls_tree(fs, "README.md", maxdepth=3)
1062+
assert _simplify_tree(files) == {"README.md": None}

tests/unit/command/test_ls_url.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from dvc.cli import parse_args
22
from dvc.commands.ls_url import CmdListUrl
33
from dvc.config import Config
4+
from dvc.fs import LocalFileSystem
45

56

67
def test_ls_url(mocker, M):
@@ -12,18 +13,33 @@ def test_ls_url(mocker, M):
1213
assert cmd.run() == 0
1314

1415
m.assert_called_once_with(
15-
"src", recursive=False, fs_config=None, config=M.instance_of(Config)
16+
"src",
17+
recursive=False,
18+
maxdepth=None,
19+
fs_config=None,
20+
config=M.instance_of(Config),
1621
)
1722

1823

1924
def test_recursive(mocker, M):
20-
cli_args = parse_args(["ls-url", "-R", "src"])
25+
cli_args = parse_args(["ls-url", "-R", "-L", "2", "src"])
2126
assert cli_args.func == CmdListUrl
2227
cmd = cli_args.func(cli_args)
2328
m = mocker.patch("dvc.repo.Repo.ls_url", autospec=True)
2429

2530
assert cmd.run() == 0
2631

2732
m.assert_called_once_with(
28-
"src", recursive=True, fs_config=None, config=M.instance_of(Config)
33+
"src", recursive=True, maxdepth=2, fs_config=None, config=M.instance_of(Config)
2934
)
35+
36+
37+
def test_tree(mocker, M):
38+
cli_args = parse_args(["ls-url", "--tree", "--level", "2", "src"])
39+
assert cli_args.func == CmdListUrl
40+
cmd = cli_args.func(cli_args)
41+
m = mocker.patch("dvc.repo.ls._ls_tree", autospec=True)
42+
43+
assert cmd.run() == 0
44+
45+
m.assert_called_once_with(M.instance_of(LocalFileSystem), "src", maxdepth=2)

0 commit comments

Comments
 (0)