Skip to content

Commit 9b1e9b0

Browse files
committed
lazy stack: ensure next stack page statically
This patch modifies all relevant places, where we statically know that both interrupts and preemption should be enabled, to unconditionally pre-fault the stack. These include places in code before: - WITH_LOCK(preemption_lock) block - WITH_LOCK(rcu_read_lock) block - sched::preempt_disable() call - WITH_LOCK(irq_lock) block in one case In general, these are the places that follow the assumption that most of the time preemption and interrupts are enabled. And the functions/method below are called directly or indirectly by an application and there is no other kernel code in that application call stack above that also disables interrupts or preemption. One good example is the memory allocation code in mempool.cc that disables preemption in quite few places. Many of those use WITH_LOCK/DROP_LOCK(preempt_lock) combination that implies they were intended to be called with preemption enabled otherwise code in DROP_LOCK would not work. Also all of those end up calling some form of thread::wait() method that asserts that both interrupts and interrupts are enabled. To validate the reasoning, we add relevant invariants before we pre-fault the stack. Signed-off-by: Waldemar Kozaczuk <[email protected]>
1 parent fabacc4 commit 9b1e9b0

File tree

13 files changed

+202
-1
lines changed

13 files changed

+202
-1
lines changed

bsd/porting/uma_stub.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
3434
{
3535
void * ptr;
3636

37+
#if CONF_lazy_stack_invariant
38+
assert(sched::preemptable() && arch::irq_enabled());
39+
#endif
40+
#if CONF_lazy_stack
41+
arch::ensure_next_stack_page();
42+
#endif
3743
WITH_LOCK(preempt_lock) {
3844
ptr = (*zone->percpu_cache)->alloc();
3945
}
@@ -101,6 +107,12 @@ void uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
101107
zone->uz_dtor(item, zone->uz_size, udata);
102108
}
103109

110+
#if CONF_lazy_stack_invariant
111+
assert(sched::preemptable() && arch::irq_enabled());
112+
#endif
113+
#if CONF_lazy_stack
114+
arch::ensure_next_stack_page();
115+
#endif
104116
WITH_LOCK(preempt_lock) {
105117
if ((*zone->percpu_cache)->free(item)) {
106118
return;

bsd/sys/net/routecache.hh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ public:
162162
// route.cc).
163163
assert(fibnum == 0);
164164

165+
#if CONF_lazy_stack_invariant
166+
assert(sched::preemptable() && arch::irq_enabled());
167+
#endif
168+
#if CONF_lazy_stack
169+
arch::ensure_next_stack_page();
170+
#endif
165171
WITH_LOCK(osv::rcu_read_lock) {
166172
auto *c = cache.read();
167173
auto entry = c->search(dst->sin_addr.s_addr);

bsd/sys/netinet/arpcache.hh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <osv/rcu.hh>
1717
#include <osv/types.h>
1818
#include <osv/mutex.h>
19+
#include <osv/sched.hh>
1920

2021
#include <boost/optional.hpp>
2122
#include <functional>
@@ -98,6 +99,12 @@ struct arp_cache {
9899

99100
boost::optional<entry> lookup(const in_addr ip)
100101
{
102+
#if CONF_lazy_stack_invariant
103+
assert(sched::preemptable() && arch::irq_enabled());
104+
#endif
105+
#if CONF_lazy_stack
106+
arch::ensure_next_stack_page();
107+
#endif
101108
WITH_LOCK(osv::rcu_read_lock) {
102109
auto i = _entries.reader_find(ip, std::hash<in_addr>(), entry_compare());
103110
return boost::optional<entry>(!!i, *i);

core/async.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ class async_worker {
104104

105105
void insert(percpu_timer_task& task)
106106
{
107+
#if CONF_lazy_stack_invariant
108+
assert(arch::irq_enabled() && sched::preemptable());
109+
#endif
110+
#if CONF_lazy_stack
111+
arch::ensure_next_stack_page();
112+
#endif
107113
WITH_LOCK(preempt_lock) {
108114
trace_async_timer_task_insert(this, &task);
109115

@@ -125,6 +131,12 @@ class async_worker {
125131

126132
percpu_timer_task& borrow_task()
127133
{
134+
#if CONF_lazy_stack_invariant
135+
assert(arch::irq_enabled() && sched::preemptable());
136+
#endif
137+
#if CONF_lazy_stack
138+
arch::ensure_next_stack_page();
139+
#endif
128140
WITH_LOCK(preempt_lock) {
129141
auto task = released_timer_tasks.pop();
130142
if (task) {
@@ -142,6 +154,12 @@ class async_worker {
142154
{
143155
auto task = new one_shot_task(std::move(callback));
144156

157+
#if CONF_lazy_stack_invariant
158+
assert(arch::irq_enabled() && sched::preemptable());
159+
#endif
160+
#if CONF_lazy_stack
161+
arch::ensure_next_stack_page();
162+
#endif
145163
WITH_LOCK(preempt_lock) {
146164
if (_queue.empty()) {
147165
_thread->wake();

core/condvar.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ int condvar::wait(mutex* user_mutex, sched::timer* tmr)
3535
_user_mutex = user_mutex;
3636
// This preempt_disable() is just an optimization, to avoid context
3737
// switch between the two unlocks.
38+
#if CONF_lazy_stack_invariant
39+
assert(arch::irq_enabled());
40+
assert(sched::preemptable());
41+
#endif
42+
#if CONF_lazy_stack
43+
arch::ensure_next_stack_page();
44+
#endif
3845
sched::preempt_disable();
3946
user_mutex->unlock();
4047
_m.unlock();

core/elf.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,6 +1654,12 @@ object *program::object_containing_addr(const void *addr)
16541654
{
16551655
object *ret = nullptr;
16561656
module_delete_disable();
1657+
#if CONF_lazy_stack_invariant
1658+
assert(sched::preemptable() && arch::irq_enabled());
1659+
#endif
1660+
#if CONF_lazy_stack
1661+
arch::ensure_next_stack_page();
1662+
#endif
16571663
WITH_LOCK(osv::rcu_read_lock) {
16581664
const auto &modules = _modules_rcu.read()->objects;
16591665
for (object *module : modules) {
@@ -1819,6 +1825,12 @@ void program::free_dtv(object* obj)
18191825
program::modules_list program::modules_get() const
18201826
{
18211827
modules_list ret;
1828+
#if CONF_lazy_stack_invariant
1829+
assert(sched::preemptable() && arch::irq_enabled());
1830+
#endif
1831+
#if CONF_lazy_stack
1832+
arch::ensure_next_stack_page();
1833+
#endif
18221834
WITH_LOCK(osv::rcu_read_lock) {
18231835
auto modules = _modules_rcu.read();
18241836
auto needed = modules->objects.size();

core/mempool.cc

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,12 @@ TRACEPOINT(trace_pool_free_different_cpu, "this=%p, obj=%p, obj_cpu=%d", void*,
216216
void* pool::alloc()
217217
{
218218
void * ret = nullptr;
219+
#if CONF_lazy_stack_invariant
220+
assert(sched::preemptable() && arch::irq_enabled());
221+
#endif
222+
#if CONF_lazy_stack
223+
arch::ensure_next_stack_page();
224+
#endif
219225
WITH_LOCK(preempt_lock) {
220226

221227
// We enable preemption because add_page() may take a Mutex.
@@ -255,8 +261,14 @@ void pool::add_page()
255261
{
256262
// FIXME: this function allocated a page and set it up but on rare cases
257263
// we may add this page to the free list of a different cpu, due to the
258-
// enablment of preemption
264+
// enablement of preemption
259265
void* page = untracked_alloc_page();
266+
#if CONF_lazy_stack_invariant
267+
assert(sched::preemptable() && arch::irq_enabled());
268+
#endif
269+
#if CONF_lazy_stack
270+
arch::ensure_next_stack_page();
271+
#endif
260272
WITH_LOCK(preempt_lock) {
261273
page_header* header = new (page) page_header;
262274
header->cpu_id = mempool_cpuid();
@@ -321,6 +333,12 @@ void pool::free(void* object)
321333
{
322334
trace_pool_free(this, object);
323335

336+
#if CONF_lazy_stack_invariant
337+
assert(sched::preemptable() && arch::irq_enabled());
338+
#endif
339+
#if CONF_lazy_stack
340+
arch::ensure_next_stack_page();
341+
#endif
324342
WITH_LOCK(preempt_lock) {
325343

326344
free_object* obj = static_cast<free_object*>(object);
@@ -1260,6 +1278,9 @@ class l2 {
12601278
while (!(pb = try_alloc_page_batch())) {
12611279
WITH_LOCK(migration_lock) {
12621280
DROP_LOCK(preempt_lock) {
1281+
#if CONF_lazy_stack_invariant
1282+
assert(sched::preemptable());
1283+
#endif
12631284
refill();
12641285
}
12651286
}
@@ -1272,6 +1293,9 @@ class l2 {
12721293
while (!try_free_page_batch(pb)) {
12731294
WITH_LOCK(migration_lock) {
12741295
DROP_LOCK(preempt_lock) {
1296+
#if CONF_lazy_stack_invariant
1297+
assert(sched::preemptable());
1298+
#endif
12751299
unfill();
12761300
}
12771301
}
@@ -1377,6 +1401,12 @@ void l1::fill_thread()
13771401

13781402
void l1::refill()
13791403
{
1404+
#if CONF_lazy_stack_invariant
1405+
assert(sched::preemptable() && arch::irq_enabled());
1406+
#endif
1407+
#if CONF_lazy_stack
1408+
arch::ensure_next_stack_page();
1409+
#endif
13801410
SCOPE_LOCK(preempt_lock);
13811411
auto& pbuf = get_l1();
13821412
if (pbuf.nr + page_batch::nr_pages < pbuf.max / 2) {
@@ -1398,6 +1428,12 @@ void l1::refill()
13981428

13991429
void l1::unfill()
14001430
{
1431+
#if CONF_lazy_stack_invariant
1432+
assert(sched::preemptable() && arch::irq_enabled());
1433+
#endif
1434+
#if CONF_lazy_stack
1435+
arch::ensure_next_stack_page();
1436+
#endif
14011437
SCOPE_LOCK(preempt_lock);
14021438
auto& pbuf = get_l1();
14031439
if (pbuf.nr > page_batch::nr_pages + pbuf.max / 2) {
@@ -1411,6 +1447,12 @@ void l1::unfill()
14111447

14121448
void* l1::alloc_page_local()
14131449
{
1450+
#if CONF_lazy_stack_invariant
1451+
assert(sched::preemptable() && arch::irq_enabled());
1452+
#endif
1453+
#if CONF_lazy_stack
1454+
arch::ensure_next_stack_page();
1455+
#endif
14141456
SCOPE_LOCK(preempt_lock);
14151457
auto& pbuf = get_l1();
14161458
if (pbuf.nr < pbuf.watermark_lo) {
@@ -1424,6 +1466,12 @@ void* l1::alloc_page_local()
14241466

14251467
bool l1::free_page_local(void* v)
14261468
{
1469+
#if CONF_lazy_stack_invariant
1470+
assert(sched::preemptable() && arch::irq_enabled());
1471+
#endif
1472+
#if CONF_lazy_stack
1473+
arch::ensure_next_stack_page();
1474+
#endif
14271475
SCOPE_LOCK(preempt_lock);
14281476
auto& pbuf = get_l1();
14291477
if (pbuf.nr > pbuf.watermark_hi) {

core/rcu.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ using namespace rcu;
196196

197197
void rcu_defer(std::function<void ()>&& func)
198198
{
199+
#if CONF_lazy_stack_invariant
200+
assert(sched::preemptable() && arch::irq_enabled());
201+
#endif
202+
#if CONF_lazy_stack
203+
arch::ensure_next_stack_page();
204+
#endif
199205
WITH_LOCK(preempt_lock) {
200206
auto p = &*percpu_callbacks;
201207
while (p->ncallbacks[p->buf] == p->callbacks[p->buf].size()) {

core/sched.cc

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,12 @@ void thread::unpin()
704704
// to pin, unpin, or migrate the same thread, we need to run the actual
705705
// unpinning code on the same CPU as the target thread.
706706
if (this == current()) {
707+
#if CONF_lazy_stack_invariant
708+
assert(arch::irq_enabled() && sched::preemptable());
709+
#endif
710+
#if CONF_lazy_stack
711+
arch::ensure_next_stack_page();
712+
#endif
707713
WITH_LOCK(preempt_lock) {
708714
if (_pinned) {
709715
_pinned = false;
@@ -882,6 +888,12 @@ static thread_runtime::duration total_app_time_exited(0);
882888

883889
thread_runtime::duration thread::thread_clock() {
884890
if (this == current()) {
891+
#if CONF_lazy_stack_invariant
892+
assert(arch::irq_enabled() && sched::preemptable());
893+
#endif
894+
#if CONF_lazy_stack
895+
arch::ensure_next_stack_page();
896+
#endif
885897
WITH_LOCK (preempt_lock) {
886898
// Inside preempt_lock, we are running and the scheduler can't
887899
// intervene and change _total_cpu_time or _running_since
@@ -1158,6 +1170,13 @@ void thread::prepare_wait()
11581170
{
11591171
// After setting the thread's status to "waiting", we must not preempt it,
11601172
// as it is no longer in "running" state and therefore will not return.
1173+
#if CONF_lazy_stack_invariant
1174+
assert(arch::irq_enabled());
1175+
assert(sched::preemptable());
1176+
#endif
1177+
#if CONF_lazy_stack
1178+
arch::ensure_next_stack_page();
1179+
#endif
11611180
preempt_disable();
11621181
assert(_detached_state->st.load() == status::running);
11631182
_detached_state->st.store(status::waiting);
@@ -1257,6 +1276,12 @@ void thread::wake_with_irq_disabled()
12571276
void thread::wake_lock(mutex* mtx, wait_record* wr)
12581277
{
12591278
// must be called with mtx held
1279+
#if CONF_lazy_stack_invariant
1280+
assert(sched::preemptable() && arch::irq_enabled());
1281+
#endif
1282+
#if CONF_lazy_stack
1283+
arch::ensure_next_stack_page();
1284+
#endif
12601285
WITH_LOCK(rcu_read_lock) {
12611286
auto st = _detached_state.get();
12621287
// We want to send_lock() to this thread, but we want to be sure we're the only
@@ -1283,6 +1308,12 @@ void thread::wake_lock(mutex* mtx, wait_record* wr)
12831308

12841309
bool thread::unsafe_stop()
12851310
{
1311+
#if CONF_lazy_stack_invariant
1312+
assert(sched::preemptable() && arch::irq_enabled());
1313+
#endif
1314+
#if CONF_lazy_stack
1315+
arch::ensure_next_stack_page();
1316+
#endif
12861317
WITH_LOCK(rcu_read_lock) {
12871318
auto st = _detached_state.get();
12881319
auto expected = status::waiting;
@@ -1338,6 +1369,13 @@ void thread::complete()
13381369
}
13391370
// If this thread gets preempted after changing status it will never be
13401371
// scheduled again to set terminating_thread. So must disable preemption.
1372+
#if CONF_lazy_stack_invariant
1373+
assert(arch::irq_enabled());
1374+
assert(preemptable());
1375+
#endif
1376+
#if CONF_lazy_stack
1377+
arch::ensure_next_stack_page();
1378+
#endif
13411379
preempt_disable();
13421380
_detached_state->st.store(status::terminating);
13431381
// We want to run destroy() here, but can't because it cause the stack we're
@@ -1462,6 +1500,13 @@ void thread::sleep_impl(timer &t)
14621500

14631501
void thread_handle::wake()
14641502
{
1503+
#if CONF_lazy_stack_invariant
1504+
assert(arch::irq_enabled());
1505+
assert(sched::preemptable());
1506+
#endif
1507+
#if CONF_lazy_stack
1508+
arch::ensure_next_stack_page();
1509+
#endif
14651510
WITH_LOCK(rcu_read_lock) {
14661511
thread::detached_state* ds = _t.read();
14671512
if (ds) {
@@ -1589,6 +1634,13 @@ void timer_base::set_with_irq_disabled(osv::clock::uptime::time_point time)
15891634
void timer_base::set(osv::clock::uptime::time_point time)
15901635
{
15911636
trace_timer_set(this, time.time_since_epoch().count());
1637+
#if CONF_lazy_stack_invariant
1638+
assert(arch::irq_enabled());
1639+
assert(sched::preemptable());
1640+
#endif
1641+
#if CONF_lazy_stack
1642+
arch::ensure_next_stack_page();
1643+
#endif
15921644
irq_save_lock_type irq_lock;
15931645
WITH_LOCK(irq_lock) {
15941646
_state = state::armed;

fs/vfs/kern_descrip.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@ int fget(int fd, struct file **out_fp)
145145
if (fd < 0 || fd >= FDMAX)
146146
return EBADF;
147147

148+
#if CONF_lazy_stack_invariant
149+
assert(sched::preemptable() && arch::irq_enabled());
150+
#endif
151+
#if CONF_lazy_stack
152+
arch::ensure_next_stack_page();
153+
#endif
148154
WITH_LOCK(rcu_read_lock) {
149155
fp = gfdt[fd].read();
150156
if (fp == nullptr) {

0 commit comments

Comments
 (0)