CA-225067: tap_ctl_{spawn,create}: move tapdisk to cgroup slice

The '-c' option allows the user to specify which cgroup slice the new tapdisk
process should be put in. If unspecified, it is moved to the default cgroup
slice.

This avoids the need to rely on the cgrulesengd daemon which fails to do its job
if the time it takes to write to the tasks file exceeds the lifespan of the
parent tapdisk (before it forks and exits). On a 4.4 kernel, it can take several
milliseconds to write to this file, so that kind of failure is very likely.

Failure to move the tapdisk process to the desired cgroup is logged but ignored.

The compile-time option -DTAP_CTL_NO_DEFAULT_CGROUP_SLICE can disable the
default behaviour of moving spawned tapdisk processes to the system's default
cgroup.

Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com>
---
 control/tap-ctl-create.c |  4 ++--
 control/tap-ctl-spawn.c  | 54 +++++++++++++++++++++++++++++++++++++++++++++++-
 control/tap-ctl.c        | 21 +++++++++++++------
 include/tap-ctl.h        |  4 ++--
 4 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/control/tap-ctl-create.c b/control/tap-ctl-create.c
index 608bec1d..218f8e3c 100644
--- a/control/tap-ctl-create.c
+++ b/control/tap-ctl-create.c
@@ -43,7 +43,7 @@
 
 int
 tap_ctl_create(const char *params, char **devname, int flags, int parent_minor,
-		char *secondary, int timeout)
+		char *secondary, int timeout, const char *slice)
 {
 	int err, id, minor;
 
@@ -51,7 +51,7 @@ tap_ctl_create(const char *params, char **devname, int flags, int parent_minor,
 	if (err)
 		return err;
 
-	id = tap_ctl_spawn();
+	id = tap_ctl_spawn(slice);
 	if (id < 0) {
 		err = id;
 		goto destroy;
diff --git a/control/tap-ctl-spawn.c b/control/tap-ctl-spawn.c
index 47d915e0..316df7e2 100644
--- a/control/tap-ctl-spawn.c
+++ b/control/tap-ctl-spawn.c
@@ -172,8 +172,48 @@ tap_ctl_get_child_id(int readfd)
 	return id;
 }
 
+/* Move the process to the nominated cgroup slice
+ * Return 0 on success, -1 on error (with errno set) */
+static int
+tap_ctl_move_to_cgroup(int pid, const char *slice)
+{
+	FILE *f = NULL;
+	char *path;
+
+	if (!slice) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	path = malloc(FILENAME_MAX);
+	if (!path) {
+		EPRINTF("malloc failed: %d\n", errno);
+		return -1;
+	}
+
+	if (snprintf(path, FILENAME_MAX, "/sys/fs/cgroup/cpu/%s/tasks", slice) >= FILENAME_MAX) {
+		EPRINTF("path truncated: %d\n", errno);
+		free(path);
+		return -1;
+	}
+
+	f = fopen(path, "we");
+	if (!f) {
+		EPRINTF("failed to write pid to cgroups task file '%s': %d\n",
+		    path, errno);
+		free(path);
+		return -1;
+	}
+
+	fprintf(f, "%d", pid);
+	fclose(f);
+
+	free(path);
+	return 0;
+}
+
 int
-tap_ctl_spawn(void)
+tap_ctl_spawn(const char *slice)
 {
 	pid_t child;
 	int err, id, readfd;
@@ -196,5 +236,17 @@ tap_ctl_spawn(void)
 	if (id < 0)
 		EPRINTF("get_id failed, child %d err %d\n", child, errno);
 
+	/* Put the tapdisk in a cgroup slice (best-effort) */
+	if (!slice) {
+#ifndef TAP_CTL_NO_DEFAULT_CGROUP_SLICE
+		/* No option specified; move it to the default slice */
+		if (tap_ctl_move_to_cgroup(id, "/") < 0)
+			EPRINTF("failed to move tapdisk %d to default cgroup slice: %s; ignoring.\n", id, strerror(errno));
+#endif
+	} else {
+		if (tap_ctl_move_to_cgroup(id, slice) < 0)
+			EPRINTF("failed to move tapdisk %d to slice '%s': %s; ignoring.\n", id, slice, strerror(errno));
+	}
+
 	return id;
 }
diff --git a/control/tap-ctl.c b/control/tap-ctl.c
index 443b66be..7089ce02 100644
--- a/control/tap-ctl.c
+++ b/control/tap-ctl.c
@@ -270,7 +270,8 @@ tap_cli_create_usage(FILE *stream)
 		"[-r turn on read caching into leaf node] [-2 <path> "
 		"use secondary image (in mirror mode if no -s)] [-s "
 		"fail over to the secondary image on ENOSPC] "
-		"[-t request timeout in seconds] [-D no O_DIRECT]\n");
+		"[-t request timeout in seconds] [-D no O_DIRECT] "
+		"[-c <cgroup-slice>]\n");
 }
 
 static int
@@ -278,6 +279,7 @@ tap_cli_create(int argc, char **argv)
 {
 	int c, err, flags, prt_minor, timeout;
 	char *args, *devname, *secondary;
+	char *slice = NULL;
 	char d_flag = 0;
 
 	args      = NULL;
@@ -288,11 +290,14 @@ tap_cli_create(int argc, char **argv)
 	timeout   = 0;
 
 	optind = 0;
-	while ((c = getopt(argc, argv, "a:RDd:e:r2:st:h")) != -1) {
+	while ((c = getopt(argc, argv, "a:c:RDd:e:r2:st:h")) != -1) {
 		switch (c) {
 		case 'a':
 			args = optarg;
 			break;
+		case 'c':
+			slice = optarg;
+			break;
 		case 'd':
 			devname = optarg;
 			d_flag = 1;
@@ -332,7 +337,7 @@ tap_cli_create(int argc, char **argv)
 		goto usage;
 
 	err = tap_ctl_create(args, &devname, flags, prt_minor, secondary,
-			timeout);
+			timeout, slice);
 	if (!err)
 		printf("%s\n", devname);
 
@@ -412,7 +417,7 @@ tap_cli_destroy(int argc, char **argv)
 static void
 tap_cli_spawn_usage(FILE *stream)
 {
-	fprintf(stream, "usage: spawn\n");
+	fprintf(stream, "usage: spawn [ -c <cgroup-slice> ]\n");
 }
 
 static int
@@ -420,10 +425,14 @@ tap_cli_spawn(int argc, char **argv)
 {
 	int c, tty;
 	pid_t pid;
+	char *slice = NULL;
 
 	optind = 0;
-	while ((c = getopt(argc, argv, "h")) != -1) {
+	while ((c = getopt(argc, argv, "c:h")) != -1) {
 		switch (c) {
+		case 'c':
+			slice = optarg;
+			break;
 		case '?':
 			goto usage;
 		case 'h':
@@ -432,7 +441,7 @@ tap_cli_spawn(int argc, char **argv)
 		}
 	}
 
-	pid = tap_ctl_spawn();
+	pid = tap_ctl_spawn(slice);
 	if (pid < 0)
 		return pid;
 
diff --git a/include/tap-ctl.h b/include/tap-ctl.h
index d5468207..26ec3a4f 100644
--- a/include/tap-ctl.h
+++ b/include/tap-ctl.h
@@ -105,11 +105,11 @@ int tap_ctl_allocate(int *minor, char **devname);
 int tap_ctl_free(const int minor);
 
 int tap_ctl_create(const char *params, char **devname, int flags, 
-		int prt_minor, char *secondary, int timeout);
+		int prt_minor, char *secondary, int timeout, const char *slice);
 int tap_ctl_destroy(const int id, const int minor, int force,
 		    struct timeval *timeout);
 
-int tap_ctl_spawn(void);
+int tap_ctl_spawn(const char *slice);
 pid_t tap_ctl_get_pid(const int id);
 
 int tap_ctl_attach(const int id, const int minor);
