Skip to content

Commit a85f488

Browse files
jesus-ramosavagin
authored andcommitted
criu/plugin: Introduce new plugin hooks PAUSE_DEVICES and CHECKPOINT_DEVICES to be used during pstree collection
PAUSE_DEVICES is called before a process is frozen and is used by the CUDA plugin to place the process in a state that's ready to be checkpointed and quiesce any pending work CHECKPOINT_DEVICES is called after all processes in the tree have been frozen and PAUSE'd and performs the actual checkpointing operation for CUDA applications Signed-off-by: Jesus Ramos <[email protected]>
1 parent fc65e46 commit a85f488

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

criu/include/criu-plugin.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ enum {
5656

5757
CR_PLUGIN_HOOK__RESUME_DEVICES_LATE = 9,
5858

59+
CR_PLUGIN_HOOK__PAUSE_DEVICES = 10,
60+
61+
CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
62+
5963
CR_PLUGIN_HOOK__MAX
6064
};
6165

@@ -72,6 +76,8 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, int fd, const struct
7276
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const uint64_t addr,
7377
const uint64_t old_pgoff, uint64_t *new_pgoff, int *plugin_fd);
7478
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
79+
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
80+
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
7581

7682
enum {
7783
CR_PLUGIN_STAGE__DUMP,

criu/plugin.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
5757
__assign_hook(HANDLE_DEVICE_VMA, "cr_plugin_handle_device_vma");
5858
__assign_hook(UPDATE_VMA_MAP, "cr_plugin_update_vma_map");
5959
__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
60+
__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
61+
__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
6062

6163
#undef __assign_hook
6264

criu/seize.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "pstree.h"
1717
#include "criu-log.h"
1818
#include <compel/ptrace.h>
19+
#include "plugin.h"
1920
#include "proc_parse.h"
2021
#include "seccomp.h"
2122
#include "seize.h"
@@ -637,6 +638,11 @@ static int collect_children(struct pstree_item *item)
637638
goto free;
638639
}
639640

641+
ret = run_plugins(PAUSE_DEVICES, pid);
642+
if (ret < 0 && ret != -ENOTSUP) {
643+
goto free;
644+
}
645+
640646
if (!opts.freeze_cgroup)
641647
/* fails when meets a zombie */
642648
__ignore_value(compel_interrupt_task(pid));
@@ -966,6 +972,7 @@ int collect_pstree(void)
966972
pid_t pid = root_item->pid->real;
967973
int ret = -1;
968974
struct proc_status_creds creds;
975+
struct pstree_item *iter;
969976

970977
timing_start(TIME_FREEZING);
971978

@@ -984,6 +991,11 @@ int collect_pstree(void)
984991
if (opts.freeze_cgroup && freeze_processes())
985992
goto err;
986993

994+
ret = run_plugins(PAUSE_DEVICES, pid);
995+
if (ret < 0 && ret != -ENOTSUP) {
996+
goto err;
997+
}
998+
987999
if (!opts.freeze_cgroup && compel_interrupt_task(pid)) {
9881000
set_cr_errno(ESRCH);
9891001
goto err;
@@ -1017,6 +1029,12 @@ int collect_pstree(void)
10171029
goto err;
10181030
}
10191031

1032+
for_each_pstree_item(iter) {
1033+
ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
1034+
if (ret < 0 && ret != -ENOTSUP)
1035+
goto err;
1036+
}
1037+
10201038
ret = 0;
10211039
timing_stop(TIME_FREEZING);
10221040
timing_start(TIME_FROZEN);

0 commit comments

Comments
 (0)