Skip to content

Commit d737266

Browse files
[SWDEV-562295] Fix Dmesg errors when using CLI (#822)
* Changes: - Modified attempting to open files to check permissions -> check read access only. Do not try to open all paths, may cause driver issues. Read access is sufficient to check permissions. Reason: GPUs which support partitioning (memory/compute), logical devices will not be valid until configured. See `sudo amd-smi set -h` or applicable APIs to configure on supported hardware. Example error dmesg output: [965358.883112] amdgpu 0000:15:00.0: amdgpu: renderD153 partition 1 not valid! [965358.883283] amdgpu 0000:15:00.0: amdgpu: renderD154 partition 2 not valid! [965358.883438] amdgpu 0000:15:00.0: amdgpu: renderD155 partition 3 not valid! [965358.883594] amdgpu 0000:15:00.0: amdgpu: renderD156 partition 4 not valid! [965358.883749] amdgpu 0000:15:00.0: amdgpu: renderD157 partition 5 not valid! [965358.883904] amdgpu 0000:15:00.0: amdgpu: renderD158 partition 6 not valid! [965358.884060] amdgpu 0000:15:00.0: amdgpu: renderD159 partition 7 not valid! --------- Signed-off-by: Charis Poag <[email protected]>
1 parent 8bdf951 commit d737266

File tree

1 file changed

+55
-5
lines changed

1 file changed

+55
-5
lines changed

amdsmi_cli/amdsmi_helpers.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
import glob
3333
import errno
3434
import pwd
35+
import stat
36+
from typing import Tuple, Optional, Union
3537

3638
from enum import Enum
3739
from pathlib import Path
@@ -1168,14 +1170,46 @@ def _stat_info(self, path: str) -> dict:
11681170
except Exception as e:
11691171
return {"error": str(e)}
11701172

1171-
def _try_open(self, path: str):
1173+
def _has_read_access(self, path: str) -> Tuple[bool, Optional[int], Optional[str]]:
1174+
"""
1175+
Check whether the current (real/effective) user can read the given path
1176+
without opening it. Returns (ok:bool, errno_or_None, message_or_None)
1177+
"""
11721178
try:
1173-
fd = os.open(path, os.O_RDONLY) # Only read access is needed for permission check
1174-
os.close(fd)
1175-
return True, None, None
1179+
st = os.stat(path)
11761180
except OSError as e:
11771181
return False, e.errno, e.strerror
11781182

1183+
# root can always read
1184+
if os.geteuid() == 0:
1185+
return True, None, None
1186+
1187+
mode = st.st_mode
1188+
uid = st.st_uid
1189+
gid = st.st_gid
1190+
1191+
euid = os.geteuid()
1192+
egid = os.getegid()
1193+
groups = os.getgroups()
1194+
1195+
# owner
1196+
if euid == uid:
1197+
if mode & stat.S_IRUSR:
1198+
return True, None, None
1199+
return False, errno.EACCES, "Permission denied (owner)"
1200+
1201+
# group
1202+
if gid == egid or gid in groups:
1203+
if mode & stat.S_IRGRP:
1204+
return True, None, None
1205+
return False, errno.EACCES, "Permission denied (group)"
1206+
1207+
# other
1208+
if mode & stat.S_IROTH:
1209+
return True, None, None
1210+
1211+
return False, errno.EACCES, "Permission denied (other)"
1212+
11791213
def check_required_groups(self, check_render=True, check_video=True):
11801214
"""
11811215
Check if the current user can access kfd and dri
@@ -1210,7 +1244,23 @@ def check_required_groups(self, check_render=True, check_video=True):
12101244
denied = []
12111245

12121246
for path in paths_to_check:
1213-
ok, err, msg = self._try_open(path)
1247+
# Do not try to open all paths, may cause driver issues.
1248+
# Read access is sufficient to check permissions.
1249+
#
1250+
# Reason: GPUs which support partitioning (memory/compute),
1251+
# logical devices will not be valid until configured.
1252+
# See `sudo amd-smi set -h` or applicable APIs
1253+
# to configure on supported hardware.
1254+
#
1255+
# Example error dmesg output:
1256+
# [965358.883112] amdgpu 0000:15:00.0: amdgpu: renderD153 partition 1 not valid!
1257+
# [965358.883283] amdgpu 0000:15:00.0: amdgpu: renderD154 partition 2 not valid!
1258+
# [965358.883438] amdgpu 0000:15:00.0: amdgpu: renderD155 partition 3 not valid!
1259+
# [965358.883594] amdgpu 0000:15:00.0: amdgpu: renderD156 partition 4 not valid!
1260+
# [965358.883749] amdgpu 0000:15:00.0: amdgpu: renderD157 partition 5 not valid!
1261+
# [965358.883904] amdgpu 0000:15:00.0: amdgpu: renderD158 partition 6 not valid!
1262+
# [965358.884060] amdgpu 0000:15:00.0: amdgpu: renderD159 partition 7 not valid!
1263+
ok, err, msg = self._has_read_access(path)
12141264
if ok:
12151265
continue
12161266
# if permission denied or operation not permitted

0 commit comments

Comments
 (0)