Skip to content

Commit e9ec735

Browse files
authored
feat: 内存管理、mmap支持与设备驱动的综合改进 (#1455)
* refactor(mm): 重构页缓存读写以解决死锁问题并改进错误处理 - 将页缓存读写拆分为两阶段以避免用户缺页时持有锁 - 改进文件系统缺页处理,返回SIGBUS而非panic - 优化sys_read/sys_write的用户缓冲区访问检查 - 修复mprotect参数对齐检查 Signed-off-by: longjin <[email protected]> * feat(devfs): 添加随机设备支持 - 新增随机设备模块random_dev,提供随机字节生成能力 - 在DevFS中注册/dev/random设备,确保系统能够访问随机数据 - 更新相关文件以支持新设备的集成 Signed-off-by: longjin <[email protected]> * feat(filesystem): 添加mmap支持到多个文件系统节点 - 为LockedZeroInode、LockedExt4Inode、LockedFATInode和LockedRamFSInode实现mmap方法,允许内存映射操作。 - 更新相关文件以支持mmap功能,确保与用户空间的交互更加灵活。 Signed-off-by: longjin <[email protected]> * fix(mm): Improve mmap error handling and validation - Enhanced error handling in mmap implementation to return appropriate errors for unsupported operations. - Added checks for MAP_PRIVATE and MAP_SHARED flags to ensure only one is set. - Implemented page alignment validation for MAP_FIXED. - Updated tests to reflect changes in mmap behavior. Signed-off-by: longjin <[email protected]> * fix(mm): Enhance memory protection handling and validation - Updated the `init_xd_rsvd` function to ensure NX support is enabled and correctly handle hardware limitations. - Improved alignment checks in `sys_mprotect` to prevent overflow and ensure proper memory area verification. - Removed outdated tests from `mmap_test` to streamline the test suite. Signed-off-by: longjin <[email protected]> * feat(procfs): Add support for /proc/[pid]/statm file - Introduced the ProcStatm file type to the ProcFileType enum. - Implemented the open_statm function to return a placeholder response for the statm file. - Updated the ProcFS inode creation to include the statm file for each process. - Enhanced the IndexNode implementation to handle the new ProcStatm file type. Signed-off-by: longjin <[email protected]> * fix(mmap): 增强mmap系统调用的偏移量检查和内存分配逻辑 * fix(procfs): 优化statm文件打开逻辑,增加虚拟内存页数计算 * fix(syscall): 处理len为0的情况,确保read和write系统调用遵循POSIX标准 * refactor(mm): Optimize page reclamation process to prevent deadlocks - Separated the page reclamation into two phases to avoid holding the reclaimer lock for extended periods, reducing the risk of lock order inversion with page_manager/page_cache. - Updated the `shrink_list` method to handle victim page eviction without holding the reclaimer lock, ensuring safer memory management. - Improved the `drain_lru` method to efficiently retrieve victim pages for reclamation. Signed-off-by: longjin <[email protected]> --------- Signed-off-by: longjin <[email protected]>
1 parent 92c81de commit e9ec735

File tree

26 files changed

+882
-189
lines changed

26 files changed

+882
-189
lines changed

kernel/src/arch/x86_64/mm/fault.rs

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
use alloc::sync::Arc;
2-
use core::{
3-
intrinsics::{likely, unlikely},
4-
panic,
5-
};
2+
use core::{intrinsics::unlikely, panic};
63
use log::error;
74
use x86::{bits64::rflags::RFlags, controlregs::Cr4};
85

@@ -391,11 +388,11 @@ impl X86_64MMArch {
391388
return; // 已通过异常表修复
392389
}
393390

394-
log::error!(
395-
"vma access error, error_code: {:?}, address: {:#x}",
396-
error_code,
397-
address.data(),
398-
);
391+
// log::error!(
392+
// "vma access error, error_code: {:?}, address: {:#x}",
393+
// error_code,
394+
// address.data(),
395+
// );
399396

400397
send_segv();
401398
}
@@ -422,8 +419,41 @@ impl X86_64MMArch {
422419
| VmFaultReason::VM_FAULT_HWPOISON_LARGE
423420
| VmFaultReason::VM_FAULT_FALLBACK;
424421

425-
if likely(!fault.contains(vm_fault_error)) {
426-
panic!("fault error: {:?}", fault)
422+
if fault.intersects(vm_fault_error) {
423+
// 内核态访问用户地址(如 copy_from_user)应走异常表修复,返回 -EFAULT,而不是发送信号/崩溃
424+
if !regs.is_from_user() {
425+
if Self::try_fixup_exception(regs, error_code, address) {
426+
return;
427+
}
428+
panic!(
429+
"kernel access to user addr failed without fixup, fault: {:?}, addr: {:#x}, rip: {:#x}",
430+
fault,
431+
address.data(),
432+
regs.rip
433+
);
434+
}
435+
436+
// 用户态 fault:发送对应信号
437+
let mut info = if fault.contains(VmFaultReason::VM_FAULT_SIGSEGV) {
438+
SigInfo::new(
439+
Signal::SIGSEGV,
440+
0,
441+
SigCode::User,
442+
SigType::Kill(ProcessManager::current_pid()),
443+
)
444+
} else {
445+
// 包括 SIGBUS / OOM / HWPOISON 等统一 SIGBUS
446+
SigInfo::new(
447+
Signal::SIGBUS,
448+
0,
449+
SigCode::User,
450+
SigType::Kill(ProcessManager::current_pid()),
451+
)
452+
};
453+
let _ = Signal::SIGBUS.send_signal_info(Some(&mut info), ProcessManager::current_pid());
454+
return;
427455
}
456+
457+
panic!("fault error: {:?}", fault)
428458
}
429459
}

kernel/src/arch/x86_64/mm/mod.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -524,23 +524,25 @@ const fn protection_map() -> [EntryFlags<MMArch>; 16] {
524524

525525
impl X86_64MMArch {
526526
fn init_xd_rsvd() {
527-
// 读取ia32-EFER寄存器的值
528-
let efer: EferFlags = x86_64::registers::model_specific::Efer::read();
527+
// 读取并确保开启 NXE,使用户态 PROT_EXEC 正确受 NX 约束
528+
let mut efer = x86_64::registers::model_specific::Efer::read();
529529
if !efer.contains(EferFlags::NO_EXECUTE_ENABLE) {
530-
// NO_EXECUTE_ENABLE是false,那么就设置xd_reserved为true
531-
debug!("NO_EXECUTE_ENABLE is false, set XD_RESERVED to true");
532-
XD_RESERVED.store(true, Ordering::Relaxed);
530+
debug!("Enabling EFER.NXE for NX support");
531+
efer.insert(EferFlags::NO_EXECUTE_ENABLE);
532+
unsafe { x86_64::registers::model_specific::Efer::write(efer) };
533533
}
534+
// 若硬件仍不支持(写入无效),标记为保留,否则可用
535+
let efer_after = x86_64::registers::model_specific::Efer::read();
536+
let xd_reserved = !efer_after.contains(EferFlags::NO_EXECUTE_ENABLE);
537+
XD_RESERVED.store(xd_reserved, Ordering::Relaxed);
534538
compiler_fence(Ordering::SeqCst);
535539
}
536540

537541
/// 判断XD标志位是否被保留
538542
pub fn is_xd_reserved() -> bool {
539-
// return XD_RESERVED.load(Ordering::Relaxed);
540-
541-
// 由于暂时不支持execute disable,因此直接返回true
542-
// 不支持的原因是,目前好像没有能正确的设置page-level的xd位,会触发page fault
543-
return true;
543+
// 若硬件不支持 NX/XD,则返回 true,表示执行位不可用;否则遵从检测结果
544+
// 默认使用启动阶段检测到的 XD_RESERVED 值
545+
return XD_RESERVED.load(Ordering::Relaxed);
544546
}
545547

546548
pub unsafe fn read_array<T>(addr: VirtAddr, count: usize) -> Vec<T> {

kernel/src/filesystem/devfs/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/// 导出devfs的模块
22
pub mod null_dev;
3+
pub mod random_dev;
34
pub mod zero_dev;
45

56
use super::{
@@ -9,6 +10,9 @@ use super::{
910
FileType, FsInfo, IndexNode, InodeFlags, InodeMode, Magic, Metadata, SuperBlock,
1011
},
1112
};
13+
use crate::filesystem::devfs::zero_dev::LockedZeroInode;
14+
use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
15+
use crate::mm::VmFaultReason;
1216
use crate::{
1317
driver::base::{block::gendisk::GenDisk, device::device_number::DeviceNumber},
1418
filesystem::vfs::mount::MountFlags,
@@ -39,6 +43,21 @@ pub struct DevFS {
3943
super_block: SuperBlock,
4044
}
4145

46+
fn is_zero_inode(pfm: &PageFaultMessage) -> bool {
47+
let vma = pfm.vma();
48+
let vma_guard = vma.lock_irqsave();
49+
match vma_guard.vm_file() {
50+
Some(file) => {
51+
let inode = file.inode();
52+
inode
53+
.as_any_ref()
54+
.downcast_ref::<LockedZeroInode>()
55+
.is_some()
56+
}
57+
None => false,
58+
}
59+
}
60+
4261
impl FileSystem for DevFS {
4362
fn as_any_ref(&self) -> &dyn core::any::Any {
4463
self
@@ -62,6 +81,25 @@ impl FileSystem for DevFS {
6281
fn super_block(&self) -> SuperBlock {
6382
self.super_block.clone()
6483
}
84+
85+
unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason {
86+
if !is_zero_inode(pfm) {
87+
return VmFaultReason::VM_FAULT_SIGBUS;
88+
}
89+
PageFaultHandler::zero_fault(pfm)
90+
}
91+
92+
unsafe fn map_pages(
93+
&self,
94+
pfm: &mut PageFaultMessage,
95+
start_pgoff: usize,
96+
end_pgoff: usize,
97+
) -> VmFaultReason {
98+
if !is_zero_inode(pfm) {
99+
return VmFaultReason::VM_FAULT_SIGBUS;
100+
}
101+
PageFaultHandler::zero_map_pages(pfm, start_pgoff, end_pgoff)
102+
}
65103
}
66104

67105
impl DevFS {
@@ -109,6 +147,7 @@ impl DevFS {
109147
/// @brief 注册系统内部自带的设备
110148
fn register_bultinin_device(&self) {
111149
use null_dev::LockedNullInode;
150+
use random_dev::LockedRandomInode;
112151
use zero_dev::LockedZeroInode;
113152
let dev_root: Arc<LockedDevFSInode> = self.root_inode.clone();
114153
dev_root
@@ -117,6 +156,9 @@ impl DevFS {
117156
dev_root
118157
.add_dev("zero", LockedZeroInode::new())
119158
.expect("DevFS: Failed to register /dev/zero");
159+
dev_root
160+
.add_dev("random", LockedRandomInode::new())
161+
.expect("DevFS: Failed to register /dev/random");
120162
}
121163

122164
/// @brief 在devfs内注册设备
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
use crate::driver::base::device::device_number::{DeviceNumber, Major};
2+
use crate::filesystem::devfs::LockedDevFSInode;
3+
use crate::filesystem::vfs::file::FileFlags;
4+
use crate::filesystem::vfs::{
5+
vcore::generate_inode_id, FilePrivateData, FileSystem, FileType, IndexNode, InodeFlags,
6+
InodeMode, Metadata,
7+
};
8+
use crate::libs::rand::rand_bytes;
9+
use crate::libs::spinlock::SpinLockGuard;
10+
use crate::{filesystem::devfs::DevFS, libs::spinlock::SpinLock, time::PosixTimeSpec};
11+
use alloc::{
12+
string::String,
13+
sync::{Arc, Weak},
14+
vec::Vec,
15+
};
16+
use core::{cmp::min, mem::size_of};
17+
use system_error::SystemError;
18+
19+
use super::DeviceINode;
20+
21+
#[derive(Debug)]
22+
pub struct RandomInode {
23+
self_ref: Weak<LockedRandomInode>,
24+
fs: Weak<DevFS>,
25+
parent: Weak<LockedDevFSInode>,
26+
metadata: Metadata,
27+
}
28+
29+
#[derive(Debug)]
30+
pub struct LockedRandomInode(SpinLock<RandomInode>);
31+
32+
impl LockedRandomInode {
33+
pub fn new() -> Arc<Self> {
34+
let inode = RandomInode {
35+
self_ref: Weak::default(),
36+
fs: Weak::default(),
37+
parent: Weak::default(),
38+
metadata: Metadata {
39+
dev_id: 1,
40+
inode_id: generate_inode_id(),
41+
size: 0,
42+
blk_size: 0,
43+
blocks: 0,
44+
atime: PosixTimeSpec::default(),
45+
mtime: PosixTimeSpec::default(),
46+
ctime: PosixTimeSpec::default(),
47+
btime: PosixTimeSpec::default(),
48+
file_type: FileType::CharDevice,
49+
mode: InodeMode::from_bits_truncate(0o666),
50+
flags: InodeFlags::empty(),
51+
nlinks: 1,
52+
uid: 0,
53+
gid: 0,
54+
raw_dev: DeviceNumber::new(Major::new(1), 8),
55+
},
56+
};
57+
58+
let result = Arc::new(LockedRandomInode(SpinLock::new(inode)));
59+
result.0.lock().self_ref = Arc::downgrade(&result);
60+
result
61+
}
62+
}
63+
64+
impl DeviceINode for LockedRandomInode {
65+
fn set_fs(&self, fs: Weak<DevFS>) {
66+
self.0.lock().fs = fs;
67+
}
68+
69+
fn set_parent(&self, parent: Weak<LockedDevFSInode>) {
70+
self.0.lock().parent = parent;
71+
}
72+
}
73+
74+
impl IndexNode for LockedRandomInode {
75+
fn as_any_ref(&self) -> &dyn core::any::Any {
76+
self
77+
}
78+
79+
fn open(
80+
&self,
81+
_data: SpinLockGuard<FilePrivateData>,
82+
_flags: &FileFlags,
83+
) -> Result<(), SystemError> {
84+
Ok(())
85+
}
86+
87+
fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<(), SystemError> {
88+
Ok(())
89+
}
90+
91+
fn metadata(&self) -> Result<Metadata, SystemError> {
92+
Ok(self.0.lock().metadata.clone())
93+
}
94+
95+
fn fs(&self) -> Arc<dyn FileSystem> {
96+
self.0.lock().fs.upgrade().unwrap()
97+
}
98+
99+
fn list(&self) -> Result<Vec<String>, SystemError> {
100+
Err(SystemError::ENOSYS)
101+
}
102+
103+
fn set_metadata(&self, metadata: &Metadata) -> Result<(), SystemError> {
104+
let mut inode = self.0.lock();
105+
inode.metadata.atime = metadata.atime;
106+
inode.metadata.mtime = metadata.mtime;
107+
inode.metadata.ctime = metadata.ctime;
108+
inode.metadata.btime = metadata.btime;
109+
inode.metadata.mode = metadata.mode;
110+
inode.metadata.uid = metadata.uid;
111+
inode.metadata.gid = metadata.gid;
112+
Ok(())
113+
}
114+
115+
fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
116+
Err(SystemError::ENODEV)
117+
}
118+
119+
fn read_at(
120+
&self,
121+
_offset: usize,
122+
len: usize,
123+
buf: &mut [u8],
124+
_data: SpinLockGuard<FilePrivateData>,
125+
) -> Result<usize, SystemError> {
126+
if buf.len() < len {
127+
return Err(SystemError::EINVAL);
128+
}
129+
130+
let mut copied = 0;
131+
while copied < len {
132+
let chunk = rand_bytes::<{ size_of::<usize>() }>();
133+
let copy_len = min(len - copied, chunk.len());
134+
buf[copied..copied + copy_len].copy_from_slice(&chunk[..copy_len]);
135+
copied += copy_len;
136+
}
137+
138+
Ok(len)
139+
}
140+
141+
fn write_at(
142+
&self,
143+
_offset: usize,
144+
len: usize,
145+
buf: &[u8],
146+
_data: SpinLockGuard<FilePrivateData>,
147+
) -> Result<usize, SystemError> {
148+
if buf.len() < len {
149+
return Err(SystemError::EINVAL);
150+
}
151+
Ok(len)
152+
}
153+
154+
fn parent(&self) -> Result<Arc<dyn IndexNode>, SystemError> {
155+
let parent = self.0.lock().parent.upgrade();
156+
if let Some(parent) = parent {
157+
return Ok(parent);
158+
}
159+
Err(SystemError::ENOENT)
160+
}
161+
}

kernel/src/filesystem/devfs/zero_dev.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ impl IndexNode for LockedZeroInode {
153153
Ok(len)
154154
}
155155

156+
fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
157+
// /dev/zero 支持 mmap,语义等同匿名零页映射
158+
Ok(())
159+
}
160+
156161
fn parent(&self) -> Result<Arc<dyn IndexNode>, SystemError> {
157162
let parent = self.0.lock().parent.upgrade();
158163
if let Some(parent) = parent {

kernel/src/filesystem/ext4/inode.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ pub struct Ext4Inode {
4343
pub struct LockedExt4Inode(pub(super) SpinLock<Ext4Inode>);
4444

4545
impl IndexNode for LockedExt4Inode {
46+
fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> {
47+
Ok(())
48+
}
49+
4650
fn open(
4751
&self,
4852
_data: crate::libs::spinlock::SpinLockGuard<vfs::FilePrivateData>,
@@ -117,7 +121,7 @@ impl IndexNode for LockedExt4Inode {
117121
None,
118122
)
119123
.map_err(SystemError::from)?;
120-
page_cache.lock_irqsave().read(offset, buf)
124+
page_cache.read(offset, buf)
121125
} else {
122126
self.read_direct(offset, len, buf, data)
123127
}
@@ -158,7 +162,7 @@ impl IndexNode for LockedExt4Inode {
158162
let len = core::cmp::min(len, buf.len());
159163
let buf = &buf[0..len];
160164
if let Some(page_cache) = &guard.page_cache {
161-
let write_len = page_cache.lock_irqsave().write(offset, buf)?;
165+
let write_len = PageCache::write(page_cache, offset, buf)?;
162166
let old_file_size = ext4.getattr(guard.inner_inode_num)?.size;
163167
let current_file_size = core::cmp::max(old_file_size, (offset + write_len) as u64);
164168
let time = PosixTimeSpec::now().tv_sec.to_u32().unwrap_or_else(|| {

0 commit comments

Comments
 (0)