Skip to content

Commit 8f6b03d

Browse files
committed
lkl: epoll_wait demultiplex between host and lkl
This commit implements a mixed file descriptors for a epoll_wait(2) call to properlly dispatch required (lkl)syscall. Previously it is prohibited when epoll_ctl(2) is called by an application, and returns an error (-LKL_EOPNOTSUPP) to the caller. Now this is relaxed by implementing dispatcher inside hijack library as librumphijack of NetBSD does. ref: https://github.com/NetBSD/src/blob/trunk/lib/librumphijack/hijack.c poll(2) or select(2) can follow this impementation but this would be a future patch. Signed-off-by: Hajime Tazaki <[email protected]>
1 parent c2c11e5 commit 8f6b03d

File tree

3 files changed

+237
-9
lines changed

3 files changed

+237
-9
lines changed

tools/lkl/lib/hijack/hijack.c

Lines changed: 232 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <unistd.h>
1111
#include <stdio.h>
1212
#include <stdarg.h>
13+
#include <stdlib.h>
1314
#include <sys/types.h>
1415
#include <sys/mman.h>
1516
#define __USE_GNU
@@ -24,6 +25,7 @@
2425
#include <poll.h>
2526
#include <sys/ioctl.h>
2627
#include <assert.h>
28+
#include <pthread.h>
2729
#include <lkl.h>
2830
#include <lkl_host.h>
2931

@@ -53,6 +55,8 @@ static void *resolve_sym(const char *sym)
5355
typedef long (*host_call)(long p1, long p2, long p3, long p4, long p5, long p6);
5456

5557
static host_call host_calls[__lkl__NR_syscalls];
58+
/* internally managed fd list for epoll */
59+
int dual_fds[LKL_FD_OFFSET];
5660

5761
#define HOOK_FD_CALL(name) \
5862
static void __attribute__((constructor(101))) \
@@ -137,7 +141,6 @@ static int lkl_call(int nr, int args, ...)
137141
return lkl_set_errno(lkl_syscall(nr, params));
138142
}
139143

140-
HOOK_FD_CALL(close)
141144
HOOK_FD_CALL(recvmsg)
142145
HOOK_FD_CALL(sendmsg)
143146
HOOK_FD_CALL(sendmmsg)
@@ -156,14 +159,16 @@ HOOK_FD_CALL(read)
156159
HOOK_FD_CALL(readv)
157160
HOOK_FD_CALL(recvfrom)
158161
HOOK_FD_CALL(recv)
159-
HOOK_FD_CALL(epoll_wait)
160162
HOOK_FD_CALL(splice)
161163
HOOK_FD_CALL(vmsplice)
162164
HOOK_CALL_USE_HOST_BEFORE_START(pipe);
163165

164166
HOOK_CALL_USE_HOST_BEFORE_START(accept4);
165167
HOOK_CALL_USE_HOST_BEFORE_START(pipe2);
166168

169+
HOST_CALL(write)
170+
HOST_CALL(pipe2)
171+
167172
HOST_CALL(setsockopt);
168173
int setsockopt(int fd, int level, int optname, const void *optval,
169174
socklen_t optlen)
@@ -302,21 +307,239 @@ int select(int nfds, fd_set *r, fd_set *w, fd_set *e, struct timeval *t)
302307
return lkl_call(__lkl__NR_select, 5, nfds, r, w, e, t);
303308
}
304309

305-
HOOK_CALL_USE_HOST_BEFORE_START(epoll_create);
306-
HOOK_CALL_USE_HOST_BEFORE_START(epoll_create1);
310+
HOST_CALL(close);
311+
int close(int fd)
312+
{
313+
CHECK_HOST_CALL(close);
314+
315+
if (!is_lklfd(fd)) {
316+
/* handle epoll's dual_fd */
317+
if ((dual_fds[fd] != -1) && lkl_running) {
318+
lkl_call(__lkl__NR_close, 1, dual_fds[fd]);
319+
dual_fds[fd] = -1;
320+
}
321+
322+
return host_close(fd);
323+
}
324+
325+
return lkl_call(__lkl__NR_close, 1, fd);
326+
}
327+
328+
HOST_CALL(epoll_create);
329+
int epoll_create(int size)
330+
{
331+
int host_fd;
332+
333+
CHECK_HOST_CALL(epoll_create);
334+
335+
host_fd = host_epoll_create(size);
336+
if (!host_fd) {
337+
fprintf(stderr, "%s fail (%d)\n", __func__, errno);
338+
return -1;
339+
}
340+
341+
if (!lkl_running)
342+
return host_fd;
343+
344+
dual_fds[host_fd] = lkl_call(__lkl__NR_epoll_create, 1, size);
345+
346+
/* always returns the host fd */
347+
return host_fd;
348+
}
349+
350+
HOST_CALL(epoll_create1);
351+
int epoll_create1(int flags)
352+
{
353+
int host_fd;
354+
355+
CHECK_HOST_CALL(epoll_create1);
356+
357+
host_fd = host_epoll_create1(flags);
358+
if (!host_fd) {
359+
fprintf(stderr, "%s fail (%d)\n", __func__, errno);
360+
return -1;
361+
}
362+
363+
if (!lkl_running)
364+
return host_fd;
365+
366+
dual_fds[host_fd] = lkl_call(__lkl__NR_epoll_create1, 1, flags);
367+
368+
/* always returns the host fd */
369+
return host_fd;
370+
}
371+
307372

308373
HOST_CALL(epoll_ctl);
309374
int epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event)
310375
{
311376
CHECK_HOST_CALL(epoll_ctl);
312377

313-
if (is_lklfd(epollfd) != is_lklfd(fd))
314-
return lkl_set_errno(-LKL_EOPNOTSUPP);
315-
316-
if (!is_lklfd(epollfd))
378+
if (!is_lklfd(fd))
317379
return host_epoll_ctl(epollfd, op, fd, event);
318380

319-
return lkl_call(__lkl__NR_epoll_ctl, 4, epollfd, op, fd, event);
381+
return lkl_call(__lkl__NR_epoll_ctl, 4, dual_fds[epollfd],
382+
op, fd, event);
383+
}
384+
385+
struct epollarg {
386+
int epfd;
387+
struct epoll_event *events;
388+
int maxevents;
389+
int timeout;
390+
int pipefd;
391+
int errnum;
392+
};
393+
394+
HOST_CALL(epoll_wait)
395+
static void *host_epollwait(void *arg)
396+
{
397+
struct epollarg *earg = arg;
398+
int ret;
399+
400+
ret = host_epoll_wait(earg->epfd, earg->events,
401+
earg->maxevents, earg->timeout);
402+
if (ret == -1)
403+
earg->errnum = errno;
404+
lkl_call(__lkl__NR_write, 3, earg->pipefd, &ret, sizeof(ret));
405+
406+
return (void *)(intptr_t)ret;
407+
}
408+
409+
int epoll_wait(int epfd, struct epoll_event *events,
410+
int maxevents, int timeout)
411+
{
412+
CHECK_HOST_CALL(epoll_wait);
413+
CHECK_HOST_CALL(pipe2);
414+
415+
int l_pipe[2] = {-1, -1}, h_pipe[2] = {-1, -1};
416+
struct epoll_event host_ev, lkl_ev;
417+
int ret_events = maxevents;
418+
struct epoll_event h_events[ret_events], l_events[ret_events];
419+
struct epollarg earg;
420+
pthread_t thread;
421+
void *trv_val;
422+
int i, ret, ret_lkl, ret_host;
423+
424+
ret = lkl_call(__lkl__NR_pipe, 1, l_pipe);
425+
if (ret == -1) {
426+
fprintf(stderr, "lkl pipe error(errno=%d)\n", errno);
427+
return -1;
428+
}
429+
430+
ret = host_pipe2(h_pipe, 0);
431+
if (ret == -1) {
432+
fprintf(stderr, "host pipe error(errno=%d)\n", errno);
433+
return -1;
434+
}
435+
436+
if (dual_fds[epfd] == -1) {
437+
fprintf(stderr, "epollfd isn't available (%d)\n", epfd);
438+
abort();
439+
}
440+
441+
/* wait pipe at host/lkl epoll_fd */
442+
memset(&lkl_ev, 0, sizeof(lkl_ev));
443+
lkl_ev.events = EPOLLIN;
444+
lkl_ev.data.fd = l_pipe[0];
445+
ret = lkl_call(__lkl__NR_epoll_ctl, 4, dual_fds[epfd], EPOLL_CTL_ADD,
446+
l_pipe[0], &lkl_ev);
447+
if (ret == -1) {
448+
fprintf(stderr, "epoll_ctl error(epfd=%d:%d, fd=%d, err=%d)\n",
449+
epfd, dual_fds[epfd], l_pipe[0], errno);
450+
return -1;
451+
}
452+
453+
memset(&host_ev, 0, sizeof(host_ev));
454+
host_ev.events = EPOLLIN;
455+
host_ev.data.fd = h_pipe[0];
456+
ret = host_epoll_ctl(epfd, EPOLL_CTL_ADD, h_pipe[0], &host_ev);
457+
if (ret == -1) {
458+
fprintf(stderr, "host epoll_ctl error(%d, %d, %d, %d)\n",
459+
epfd, h_pipe[0], h_pipe[1], errno);
460+
return -1;
461+
}
462+
463+
464+
/* now wait by epoll_wait on 2 threads */
465+
memset(h_events, 0, sizeof(struct epoll_event) * ret_events);
466+
memset(l_events, 0, sizeof(struct epoll_event) * ret_events);
467+
earg.epfd = epfd;
468+
earg.events = h_events;
469+
earg.maxevents = maxevents;
470+
earg.timeout = timeout;
471+
earg.pipefd = l_pipe[1];
472+
pthread_create(&thread, NULL, host_epollwait, &earg);
473+
474+
ret_lkl = lkl_call(__lkl__NR_epoll_wait, 4, dual_fds[epfd], l_events,
475+
maxevents, timeout);
476+
if (ret_lkl == -1) {
477+
fprintf(stderr,
478+
"lkl_%s_wait error(epfd=%d:%d, fd=%d, err=%d)\n",
479+
__func__, epfd, dual_fds[epfd], l_pipe[0], errno);
480+
return -1;
481+
}
482+
host_write(h_pipe[1], &ret, sizeof(ret));
483+
pthread_join(thread, &trv_val);
484+
ret_host = (int)(intptr_t)trv_val;
485+
if (ret_host == -1) {
486+
fprintf(stderr,
487+
"host epoll_ctl error(%d, %d, %d, %d)\n", epfd,
488+
h_pipe[0], h_pipe[1], errno);
489+
return -1;
490+
}
491+
492+
ret = lkl_call(__lkl__NR_epoll_ctl, 4, dual_fds[epfd], EPOLL_CTL_DEL,
493+
l_pipe[0], &lkl_ev);
494+
if (ret == -1) {
495+
fprintf(stderr,
496+
"lkl epoll_ctl error(epfd=%d:%d, fd=%d, err=%d)\n",
497+
epfd, dual_fds[epfd], l_pipe[0], errno);
498+
return -1;
499+
}
500+
501+
ret = host_epoll_ctl(epfd, EPOLL_CTL_DEL, h_pipe[0], &host_ev);
502+
if (ret == -1) {
503+
fprintf(stderr, "host epoll_ctl error(%d, %d, %d, %d)\n",
504+
epfd, h_pipe[0], h_pipe[1], errno);
505+
return -1;
506+
}
507+
508+
memset(events, 0, sizeof(struct epoll_event) * maxevents);
509+
ret = 0;
510+
if (ret_host > 0) {
511+
for (i = 0; i < ret_host; i++) {
512+
if (h_events[i].data.fd == h_pipe[0])
513+
continue;
514+
if (is_lklfd(h_events[i].data.fd))
515+
continue;
516+
517+
memcpy(events, &(h_events[i]),
518+
sizeof(struct epoll_event));
519+
events++;
520+
ret++;
521+
}
522+
}
523+
if (ret_lkl > 0) {
524+
for (i = 0; i < ret_lkl; i++) {
525+
if (l_events[i].data.fd == l_pipe[0])
526+
continue;
527+
if (!is_lklfd(l_events[i].data.fd))
528+
continue;
529+
530+
memcpy(events, &(l_events[i]),
531+
sizeof(struct epoll_event));
532+
events++;
533+
ret++;
534+
}
535+
}
536+
537+
lkl_call(__lkl__NR_close, 1, l_pipe[0]);
538+
lkl_call(__lkl__NR_close, 1, l_pipe[1]);
539+
host_close(h_pipe[0]);
540+
host_close(h_pipe[1]);
541+
542+
return ret;
320543
}
321544

322545
int eventfd(unsigned int count, int flags)

tools/lkl/lib/hijack/init.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <lkl_host.h>
2626

2727
#include "xlate.h"
28+
#include "init.h"
2829
#include "../config.h"
2930

3031
#define __USE_GNU
@@ -490,6 +491,9 @@ hijack_init(void)
490491

491492
lkl_running = 1;
492493

494+
/* initialize epoll manage list */
495+
memset(dual_fds, -1, sizeof(int) * LKL_FD_OFFSET);
496+
493497
/* restore cpu affinity */
494498
if (single_cpu_mode)
495499
PinToCpus(&ori_cpu);

tools/lkl/lib/hijack/init.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
#define _LKL_HIJACK_INIT_H
33

44
extern int lkl_running;
5+
extern int dual_fds[];
56

67
#endif /*_LKL_HIJACK_INIT_H */

0 commit comments

Comments
 (0)