Skip to content

Commit ba4af7f

Browse files
authored
feat: Add sys_unshare (#1260)
* feat(namespace): 实现unshare系统调用及相关功能 - 新增unshare.rs模块实现ksys_unshare - 修改nsproxy.rs添加clone_inner方法 - 公开create_new_namespaces和create_pid_namespace方法 - 在user_namespace.rs添加current_user_ns方法 - 添加sys_unshare系统调用实现 Signed-off-by: longjin <[email protected]> * add util-linux package * 调试unshare Signed-off-by: longjin <[email protected]> * fix: 修复futex处理中的用户空间内存访问问题 在`futex.rs`中增加了`safe_read`、`safe_read_u32`和`safe_write_u32`方法,确保在访问用户空间内存时进行安全检查,避免无效地址导致的错误。同时,优化了`handle_futex_death`方法,使用新的安全读取和写入方法,确保在进程死亡时正确处理futex。 Signed-off-by: longjin <[email protected]> * refactor(futex): 重构RobustListHead结构并移除调试日志 - 将RobustListHead拆分为PosixRobustListHead和RobustListHead - 为RobustListHead实现Deref和DerefMut trait - 移除多余的调试日志输出 - 优化robust list处理逻辑 Signed-off-by: longjin <[email protected]> --------- Signed-off-by: longjin <[email protected]>
1 parent 3ab5403 commit ba4af7f

File tree

12 files changed

+283
-28
lines changed

12 files changed

+283
-28
lines changed

kernel/src/libs/futex/futex.rs

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ use alloc::{
22
collections::LinkedList,
33
sync::{Arc, Weak},
44
};
5-
use core::hash::{Hash, Hasher};
5+
use core::{
6+
hash::{Hash, Hasher},
7+
ops::{Deref, DerefMut},
8+
};
69
use core::{
710
intrinsics::{likely, unlikely},
811
mem,
@@ -646,17 +649,39 @@ impl Futex {
646649
const ROBUST_LIST_LIMIT: isize = 2048;
647650

648651
#[derive(Debug, Copy, Clone)]
649-
pub struct RobustList {
652+
#[repr(C)]
653+
struct PosixRobustList {
650654
next: VirtAddr,
651655
}
652656

653657
#[derive(Debug, Copy, Clone)]
654-
pub struct RobustListHead {
655-
list: RobustList,
658+
#[repr(C)]
659+
pub struct PosixRobustListHead {
660+
list: PosixRobustList,
656661
futex_offset: isize,
657662
list_op_pending: VirtAddr,
658663
}
659664

665+
#[derive(Debug, Copy, Clone)]
666+
pub struct RobustListHead {
667+
pub posix: PosixRobustListHead,
668+
pub uaddr: VirtAddr,
669+
}
670+
671+
impl Deref for RobustListHead {
672+
type Target = PosixRobustListHead;
673+
674+
fn deref(&self) -> &Self::Target {
675+
&self.posix
676+
}
677+
}
678+
679+
impl DerefMut for RobustListHead {
680+
fn deref_mut(&mut self) -> &mut Self::Target {
681+
&mut self.posix
682+
}
683+
}
684+
660685
impl RobustListHead {
661686
/// # 获得futex的用户空间地址
662687
pub fn futex_uaddr(&self, entry: VirtAddr) -> VirtAddr {
@@ -677,18 +702,21 @@ impl RobustListHead {
677702
/// - head_uaddr:robust list head用户空间地址
678703
/// - len:robust list head的长度
679704
pub fn set_robust_list(head_uaddr: VirtAddr, len: usize) -> Result<usize, SystemError> {
680-
let robust_list_head_len = mem::size_of::<RobustListHead>();
705+
let robust_list_head_len = mem::size_of::<PosixRobustListHead>();
681706
if unlikely(len != robust_list_head_len) {
682707
return Err(SystemError::EINVAL);
683708
}
684709

685710
let user_buffer_reader = UserBufferReader::new(
686-
head_uaddr.as_ptr::<RobustListHead>(),
687-
mem::size_of::<RobustListHead>(),
711+
head_uaddr.as_ptr::<PosixRobustListHead>(),
712+
mem::size_of::<PosixRobustListHead>(),
688713
true,
689714
)?;
690-
let robust_list_head = *user_buffer_reader.read_one_from_user::<RobustListHead>(0)?;
691-
715+
let robust_list_head = *user_buffer_reader.read_one_from_user::<PosixRobustListHead>(0)?;
716+
let robust_list_head = RobustListHead {
717+
posix: robust_list_head,
718+
uaddr: head_uaddr,
719+
};
692720
// 向内核注册robust list
693721
ProcessManager::current_pcb().set_robust_list(Some(robust_list_head));
694722

@@ -726,11 +754,11 @@ impl RobustListHead {
726754
core::mem::size_of::<usize>(),
727755
true,
728756
)?;
729-
user_writer.copy_one_to_user(&mem::size_of::<RobustListHead>(), 0)?;
757+
user_writer.copy_one_to_user(&mem::size_of::<PosixRobustListHead>(), 0)?;
730758
// 将head拷贝到用户空间head
731759
let mut user_writer = UserBufferWriter::new(
732-
head_uaddr.as_ptr::<RobustListHead>(),
733-
mem::size_of::<RobustListHead>(),
760+
head_uaddr.as_ptr::<PosixRobustListHead>(),
761+
mem::size_of::<PosixRobustListHead>(),
734762
true,
735763
)?;
736764
user_writer.copy_one_to_user(&robust_list_head, 0)?;
@@ -750,6 +778,7 @@ impl RobustListHead {
750778
return;
751779
}
752780
};
781+
753782
// 遍历当前进程/线程的robust list
754783
for futex_uaddr in head.futexes() {
755784
let ret = Self::handle_futex_death(futex_uaddr, pcb.raw_pid().into() as u32);
@@ -879,7 +908,7 @@ impl Iterator for FutexIterator<'_> {
879908
return None;
880909
}
881910

882-
while self.entry.data() != &self.robust_list_head.list as *const RobustList as usize {
911+
while self.entry.data() != self.robust_list_head.uaddr.data() {
883912
if self.count == ROBUST_LIST_LIMIT {
884913
break;
885914
}
@@ -895,8 +924,13 @@ impl Iterator for FutexIterator<'_> {
895924
};
896925

897926
// 安全地读取下一个entry
898-
let next_entry = RobustListHead::safe_read::<RobustList>(self.entry)
899-
.and_then(|reader| reader.read_one_from_user::<RobustList>(0).ok().cloned())?;
927+
let next_entry =
928+
RobustListHead::safe_read::<PosixRobustList>(self.entry).and_then(|reader| {
929+
reader
930+
.read_one_from_user::<PosixRobustList>(0)
931+
.ok()
932+
.cloned()
933+
})?;
900934

901935
self.entry = next_entry.next;
902936

kernel/src/libs/futex/syscall.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
use system_error::SystemError;
22

33
use crate::{
4+
libs::futex::futex::RobustListHead,
45
mm::{verify_area, VirtAddr},
56
syscall::Syscall,
67
time::PosixTimeSpec,
78
};
89

9-
use super::{
10-
constant::*,
11-
futex::{Futex, RobustListHead},
12-
};
10+
use super::{constant::*, futex::Futex};
1311

1412
impl Syscall {
1513
pub fn do_futex(
@@ -117,6 +115,11 @@ impl Syscall {
117115
verify_area(head_uaddr, core::mem::size_of::<u32>())?;
118116

119117
let ret = RobustListHead::set_robust_list(head_uaddr, len);
118+
// log::debug!(
119+
// "set_robust_list: pid: {} head_uaddr={:?}",
120+
// crate::process::ProcessManager::current_pid(),
121+
// head_uaddr
122+
// );
120123
return ret;
121124
}
122125

kernel/src/process/fork.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,6 @@ impl ProcessManager {
339339
clone_args: KernelCloneArgs,
340340
current_trapframe: &TrapFrame,
341341
) -> Result<(), SystemError> {
342-
// log::debug!("fork: clone_flags: {:?}", clone_args.flags);
343342
let clone_flags = clone_args.flags;
344343
// 不允许与不同namespace的进程共享根目录
345344

kernel/src/process/mod.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -452,9 +452,6 @@ impl ProcessManager {
452452

453453
// 进行进程退出后的工作
454454
let thread = pcb.thread.write_irqsave();
455-
if let Some(addr) = thread.set_child_tid {
456-
unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
457-
}
458455

459456
if let Some(addr) = thread.clear_child_tid {
460457
if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
@@ -467,9 +464,9 @@ impl ProcessManager {
467464
}
468465
unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
469466
}
467+
compiler_fence(Ordering::SeqCst);
470468

471469
RobustListHead::exit_robust_list(pcb.clone());
472-
473470
// 如果是vfork出来的进程,则需要处理completion
474471
if thread.vfork_done.is_some() {
475472
thread.vfork_done.as_ref().unwrap().complete_all();

kernel/src/process/namespace/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
pub mod mnt;
22
pub mod nsproxy;
33
pub mod pid_namespace;
4+
pub mod unshare;
45
pub mod user_namespace;
56

67
use nsproxy::NsCommon;

kernel/src/process/namespace/nsproxy.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ pub struct NsProxy {
2323
pub pid_ns_for_children: Arc<PidNamespace>,
2424
/// mount namespace(挂载命名空间)
2525
pub mnt_ns: Arc<MntNamespace>,
26+
// 注意,user_ns 存储在cred,不存储在nsproxy
27+
2628
// 其他namespace(为未来扩展预留)
27-
// pub user_ns: Option<Arc<UserNamespace>>,
2829
// pub net_ns: Option<Arc<NetNamespace>>,
2930
// pub ipc_ns: Option<Arc<IpcNamespace>>,
3031
// pub uts_ns: Option<Arc<UtsNamespace>>,
@@ -58,6 +59,13 @@ impl NsProxy {
5859
pub fn mnt_namespace(&self) -> &Arc<MntNamespace> {
5960
&self.mnt_ns
6061
}
62+
63+
pub fn clone_inner(&self) -> Self {
64+
Self {
65+
pid_ns_for_children: self.pid_ns_for_children.clone(),
66+
mnt_ns: self.mnt_ns.clone(),
67+
}
68+
}
6169
}
6270

6371
impl ProcessManager {
@@ -123,7 +131,7 @@ impl ProcessManager {
123131
/// 返回新创建的nsproxy。调用者需要负责正确的加锁并将其附加到进程上。
124132
///
125133
/// 参考 https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/nsproxy.c?r=&mo=3770&fi=151#67
126-
fn create_new_namespaces(
134+
pub(super) fn create_new_namespaces(
127135
clone_flags: &CloneFlags,
128136
pcb: &Arc<ProcessControlBlock>,
129137
user_ns: Arc<UserNamespace>,
@@ -133,7 +141,7 @@ fn create_new_namespaces(
133141
.pid_ns_for_children
134142
.copy_pid_ns(clone_flags, user_ns.clone())?;
135143

136-
let mnt_ns = nsproxy.mnt_ns.copy_mnt_ns(clone_flags, user_ns)?;
144+
let mnt_ns = nsproxy.mnt_ns.copy_mnt_ns(clone_flags, user_ns.clone())?;
137145
let result = NsProxy {
138146
pid_ns_for_children,
139147
mnt_ns,

kernel/src/process/namespace/pid_namespace.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,10 @@ impl PidNamespace {
124124
}
125125

126126
/// https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/pid_namespace.c#72
127-
fn create_pid_namespace(&self, user_ns: Arc<UserNamespace>) -> Result<Arc<Self>, SystemError> {
127+
pub(super) fn create_pid_namespace(
128+
&self,
129+
user_ns: Arc<UserNamespace>,
130+
) -> Result<Arc<Self>, SystemError> {
128131
let level = self.level() + 1;
129132
if !self.user_ns.is_ancestor_of(&user_ns) {
130133
return Err(SystemError::EINVAL);
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use alloc::sync::Arc;
2+
3+
use system_error::SystemError;
4+
5+
use crate::process::{
6+
fork::CloneFlags,
7+
namespace::nsproxy::{switch_task_namespaces, NsProxy},
8+
ProcessManager,
9+
};
10+
11+
/// 参考 https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/fork.c#3385
12+
pub fn ksys_unshare(flags: CloneFlags) -> Result<(), SystemError> {
13+
// 检查 unshare 标志位
14+
check_unshare_flags(flags)?;
15+
16+
let new_nsproxy = unshare_nsproxy_namespaces(flags)?;
17+
18+
if let Some(new_nsproxy) = new_nsproxy {
19+
// 更新当前进程的 Namespace 代理
20+
let current_pcb = ProcessManager::current_pcb();
21+
switch_task_namespaces(&current_pcb, new_nsproxy)?;
22+
}
23+
// TODO: 处理其他命名空间的 unshare 操作
24+
// CLONE_NEWNS, CLONE_FS, CLONE_FILES, CLONE_SIGHAND, CLONE_VM, CLONE_THREAD, CLONE_SYSVSEM,
25+
// CLONE_NEWUTS, CLONE_NEWIPC, CLONE_NEWUSER, CLONE_NEWNET, CLONE_NEWCGROUP, CLONE_NEWTIME
26+
27+
Ok(())
28+
}
29+
30+
#[inline(never)]
31+
fn unshare_nsproxy_namespaces(
32+
unshare_flags: CloneFlags,
33+
) -> Result<Option<Arc<NsProxy>>, SystemError> {
34+
const ALL_VALID_FLAGS: CloneFlags = CloneFlags::from_bits_truncate(
35+
CloneFlags::CLONE_NEWNS.bits()
36+
| CloneFlags::CLONE_NEWUTS.bits()
37+
| CloneFlags::CLONE_NEWIPC.bits()
38+
| CloneFlags::CLONE_NEWNET.bits()
39+
| CloneFlags::CLONE_NEWPID.bits()
40+
| CloneFlags::CLONE_NEWCGROUP.bits()
41+
| CloneFlags::CLONE_NEWTIME.bits(),
42+
);
43+
if !unshare_flags.intersects(ALL_VALID_FLAGS) {
44+
return Ok(None);
45+
}
46+
47+
// 获取当前进程的 PCB
48+
let current_pcb = ProcessManager::current_pcb();
49+
let user_ns = ProcessManager::current_user_ns();
50+
51+
let nsproxy = super::nsproxy::create_new_namespaces(&unshare_flags, &current_pcb, user_ns)?;
52+
return Ok(Some(nsproxy));
53+
}
54+
55+
#[inline(never)]
56+
fn check_unshare_flags(flags: CloneFlags) -> Result<(), SystemError> {
57+
// 检查无效的标志位
58+
const ALL_VALID_FLAGS: CloneFlags = CloneFlags::from_bits_truncate(
59+
CloneFlags::CLONE_NEWNS.bits()
60+
| CloneFlags::CLONE_NEWCGROUP.bits()
61+
| CloneFlags::CLONE_NEWUTS.bits()
62+
| CloneFlags::CLONE_NEWIPC.bits()
63+
| CloneFlags::CLONE_NEWUSER.bits()
64+
| CloneFlags::CLONE_NEWPID.bits()
65+
| CloneFlags::CLONE_NEWNET.bits()
66+
| CloneFlags::CLONE_NEWTIME.bits()
67+
| CloneFlags::CLONE_FS.bits()
68+
| CloneFlags::CLONE_FILES.bits()
69+
| CloneFlags::CLONE_SIGHAND.bits()
70+
| CloneFlags::CLONE_VM.bits()
71+
| CloneFlags::CLONE_THREAD.bits()
72+
| CloneFlags::CLONE_SYSVSEM.bits(),
73+
);
74+
75+
if flags.intersects(!ALL_VALID_FLAGS) {
76+
return Err(SystemError::EINVAL);
77+
}
78+
79+
let current_pcb = ProcessManager::current_pcb();
80+
81+
// 如果请求 unshare CLONE_THREAD, CLONE_SIGHAND 或 CLONE_VM,
82+
// 必须确保线程组为空(即只有一个线程)
83+
if flags.intersects(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_SIGHAND | CloneFlags::CLONE_VM)
84+
&& !current_pcb.threads_read_irqsave().thread_group_empty()
85+
{
86+
return Err(SystemError::EINVAL);
87+
}
88+
89+
// 如果请求 unshare CLONE_SIGHAND 或 CLONE_VM,
90+
// 必须确保信号处理结构的引用计数为1
91+
if flags.intersects(CloneFlags::CLONE_SIGHAND | CloneFlags::CLONE_VM) {
92+
let sighand_count = current_pcb
93+
.sig_struct_irqsave()
94+
.cnt
95+
.load(core::sync::atomic::Ordering::SeqCst);
96+
if sighand_count > 1 {
97+
return Err(SystemError::EINVAL);
98+
}
99+
}
100+
101+
// TODO: 如果请求 unshare CLONE_VM,
102+
// 必须确保当前进程是单线程进程
103+
// if flags.contains(CloneFlags::CLONE_VM) {
104+
// if !current_pcb.thread_group_empty() {
105+
// return Err(SystemError::EINVAL);
106+
// }
107+
// }
108+
109+
Ok(())
110+
}

kernel/src/process/namespace/user_namespace.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use core::cmp::Ordering;
33
use core::fmt::Debug;
44

55
use crate::libs::spinlock::SpinLock;
6+
use crate::process::ProcessManager;
67

78
use super::nsproxy::NsCommon;
89
use super::{NamespaceOps, NamespaceType};
@@ -86,3 +87,14 @@ impl Debug for UserNamespace {
8687
f.debug_struct("UserNamespace").finish()
8788
}
8889
}
90+
91+
impl ProcessManager {
92+
/// 获取当前进程的 user_ns
93+
pub fn current_user_ns() -> Arc<UserNamespace> {
94+
if Self::initialized() {
95+
ProcessManager::current_pcb().cred().user_ns.clone()
96+
} else {
97+
INIT_USER_NAMESPACE.clone()
98+
}
99+
}
100+
}

kernel/src/process/syscall/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod sys_setresuid;
2525
mod sys_setsid;
2626
mod sys_setuid;
2727
mod sys_uname;
28+
mod sys_unshare;
2829
mod sys_wait4;
2930

3031
#[cfg(target_arch = "x86_64")]

0 commit comments

Comments
 (0)