On Wed, Dec 14, 2022 at 10:54:33AM +0100, Kevin Brodsky wrote:
On 13/12/2022 12:55, Beata Michalska wrote:
Add a bunch of test-cases for clone3 syscall, focusing mainly on handling struct clone_args versioning and sizing, alongside the obvious req for respecting capabilities.
Signed-off-by: Beata Michalska beata.michalska@arm.com
.../testing/selftests/arm64/morello/Makefile | 1 + tools/testing/selftests/arm64/morello/clone.c | 228 +++++++++++++++++- 2 files changed, 228 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/arm64/morello/Makefile b/tools/testing/selftests/arm64/morello/Makefile index 7dbf5b140695..c79e8973dbbf 100644 --- a/tools/testing/selftests/arm64/morello/Makefile +++ b/tools/testing/selftests/arm64/morello/Makefile @@ -38,3 +38,4 @@ $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/freestanding_start.o $(OUTPUT)/freestanding $(CC) $^ -o $@ $(LDFLAGS) $(OUTPUT)/signal: $(OUTPUT)/signal_common.o +$(OUTPUT)/clone: $(OUTPUT)/signal_common.o diff --git a/tools/testing/selftests/arm64/morello/clone.c b/tools/testing/selftests/arm64/morello/clone.c index 8217840ba504..da3a43f00c04 100644 --- a/tools/testing/selftests/arm64/morello/clone.c +++ b/tools/testing/selftests/arm64/morello/clone.c @@ -1,15 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2021 Arm Limited +#include "freestanding.h" #include <linux/mman.h> #include <linux/resource.h> #include <linux/wait.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/errno.h> +#include <linux/signal.h> +#include <linux/types.h> +#include <limits.h> #include <cheriintrin.h> -#include "freestanding.h" +#include "signal_common.h" #define STACK_SIZE 1024*1024 +#define TLS_SIZE 4096 #define in_restricted() \ (!(cheri_perms_get(cheri_pcc_get()) & ARM_CAP_PERMISSION_EXECUTIVE)) @@ -17,6 +22,10 @@ #define MAGIC_NR 0x3562 /* whatever really ..... */ +#ifndef MAX_PID_NS_LEVEL +#define MAX_PID_NS_LEVEL 32 +#endif
/* Cloned thread result */ #define CTR_SUCCESS 1 #define CTR_FAILED -1 @@ -256,6 +265,222 @@ TEST(test_wait_rusage) RUN_WITH_FIXTURE(data, CLONE_TH_RUSAGE, wait_single); } +#define GET_ERRNO(err) (((err) + MAX_ERRNO) ^ MAX_ERRNO)
+#define CUSTOM_CLONE_STACK_INV BIT(1)
+static struct clone3_fixture {
- size_t args_size;
- struct clone_args args;
- int test_flags;
- int e_result;
+} clone3_data[] = {
- /* BEGIN_SECTION: expected failure */
- /* size of clone_args smaller than CLONE_ARGS_SIZE_VER0 */
- {
.args_size = offsetof(struct clone_args, tls),
.e_result = -EINVAL
- }, /* @{0} */
- /* invalid set_tid array size */
- {
.args_size = sizeof(struct clone_args),
A bit late to suggest this, sorry, but maybe this could be made the default value (if .args_size is 0)? That removes a few lines but more importantly makes the non-default values stand out a lot more.
I can do that though there are comments for each expected-failure case that do describe what's wrong with the setup.
.args.set_tid_size = MAX_PID_NS_LEVEL + 1,
.e_result = -EINVAL
- }, /* @{1} */
- /* Invalid combination of set_tid & set_tid_size */
- {
.args_size = sizeof(struct clone_args),
.args.set_tid_size = 1,
.e_result = -EINVAL
- }, /* @{2} */
- /* Invalid exit_signal */
- {
.args_size = sizeof(struct clone_args),
.args.exit_signal = _NSIG + 1,
.e_result = -EINVAL
- }, /* @{3} */
- /* Invalid cgroup number */
- {
.args_size = sizeof(struct clone_args),
.args.flags = CLONE_INTO_CGROUP,
.args.cgroup = (__u64)INT_MAX + 1,
.e_result = -EINVAL
- }, /* @{4} */
- /* Invalid size for clone_args with cgroup */
- {
.args_size = offsetof(struct clone_args, cgroup),
.args.flags = CLONE_INTO_CGROUP,
.args.cgroup = 1,
.e_result = -EINVAL
- }, /* @{5} */
- /* Invalid stack & stack_size combination */
That comment doesn't apply any more.
You mean the comment below (?)
- /* Use top bits of clone flags to mark those */
- {
.args_size = sizeof(struct clone_args),
.args.stack_size = STACK_SIZE,
.test_flags = CUSTOM_CLONE_STACK_INV,
.e_result = -EINVAL
- }, /* @{6} */
- {
.args_size = sizeof(struct clone_args),
.test_flags = CUSTOM_CLONE_STACK_INV,
.e_result = -EINVAL
- }, /* @{7} */
- /* Invalid set_tid entry */
- {
.args_size = sizeof(struct clone_args),
.args.set_tid = (uintptr_t)&(pid_t){1},
.args.set_tid_size = 1,
.e_result = -EEXIST
- }, /* @{8} */
- /* END_SECTION: expected failure */
- {
.args_size = sizeof(struct clone_args),
.args.flags = CLONE_PIDFD | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
.e_result = 0
- }, /* @{9} */
- {
.args_size = sizeof(struct clone_args),
.args.flags = CLONE_PARENT_SETTID | CLONE_CHILD_SETTID,
.e_result = 0
- }, /* @{10} */
- {
.args_size = sizeof(struct clone_args),
.args.flags = CLONE_SETTLS,
.e_result = 0
- }, /* @{11} */
+};
+static __attribute__((noinline)) void run_child(struct clone_args *args) +{
- static __thread int tls_data;
- if (args->flags & CLONE_CHILD_SETTID) {
pid_t current_pid = getpid();
ASSERT_FALSE(current_pid != *(pid_t *)args->child_tid);
Just noticed that this could be ASSERT_TRUE() to avoid the double negation, or probably better ASSERT_EQ().
OK.
- }
- if (args->flags & CLONE_SETTLS && args->tls) {
ptraddr_t base_addr = cheri_address_get(args->tls);
ptraddr_t ref_addr = cheri_address_get(&tls_data);
size_t length = cheri_length_get(args->tls);
ASSERT_TRUE(cheri_tag_get(args->tls));
ASSERT_TRUE(ref_addr >= base_addr && ref_addr < base_addr + length);
- }
- if (args->flags & CLONE_PIDFD) {
sigset_t set;
ASSERT_EQ(sigemptyset(&set), 0);
ASSERT_EQ(sigaddset(&set, SIGUSR1), 0);
ASSERT_EQ(sigprocmask(SIG_BLOCK, &set, NULL), 0);
/* Suspend utill parent kicks things back in */
ASSERT_EQ(syscall(__NR_kill, getpid(), SIGSTOP), 0);
/* Wait for a signal */
ASSERT_EQ(rt_sigtimedwait(&set, NULL, 0, sizeof(set)),
SIGUSR1);
- }
- syscall(__NR_exit, 0);
+}
+static inline __attribute__((always_inline)) +void run_clone3(struct clone3_fixture *data) +{
- struct clone_args *args = &(data->args);
- int pidfd, parent_tid = 0, child_tid = 0;
- siginfo_t wait_si;
- int result;
- pid_t pid;
- args->pidfd = (uintcap_t)&pidfd;
- args->parent_tid = (uintcap_t)&parent_tid;
- args->child_tid = (uintcap_t)&child_tid;
- if (!args->exit_signal)
args->exit_signal = SIGCHLD;
- if (!args->stack_size) {
args->stack = (uintcap_t) mmap_verified(NULL, STACK_SIZE,
PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE,
-1, 0, STACK_REQ_PERMS);
args->stack_size =
data->test_flags & CUSTOM_CLONE_STACK_INV ? 0 : STACK_SIZE;
- }
- if (data->e_result) {
result = syscall(__NR_clone3, args, data->args_size);
ASSERT_EQ(result, data->e_result) {
TH_LOG("Expected: %d while %d was received",
GET_ERRNO(data->e_result), GET_ERRNO(result));
}
if (args->stack)
munmap((void *)args->stack, STACK_SIZE);
return;
- }
- args->flags |= CLONE_VM;
- if (args->flags & CLONE_SETTLS) {
void *addr = mmap_verified(NULL, TLS_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0,
CAP_LOAD_PERMS | CAP_STORE_PERMS);
args->tls = (uintcap_t)cheri_bounds_set(addr, TLS_SIZE);
I'm realising something a little annoying here, which is that the munmap() of args->tls will fail once PCuABI checks are in place if the page size is > 4K (because the capability bounds need to include the page(s) to be unmapped). I think you have two options, either stash the capability returned by mmap() somewhere, or make TLS_SIZE 64K to avoid having to do that.
Yeah, good point. I think I will go for stashed value which might be more descriptive.
--- BR B.
Kevin
- }
- pid = syscall(__NR_clone3, args, data->args_size);
- ASSERT_GE(pid, 0);
- if (!pid)
run_child(args);
- if (args->flags & CLONE_PIDFD) {
/* Make sure the child here gets a chance to block the signal */
result = waitid(P_PID, pid, &wait_si, WSTOPPED, NULL);
ASSERT_EQ(result, 0);
result = syscall(__NR_kill, pid, SIGCONT);
ASSERT_EQ(result, 0);
/* Signal handling is not the test target here: valid pidfd is */
result = syscall(__NR_pidfd_send_signal, pidfd, SIGUSR1, NULL, 0);
ASSERT_EQ(result, 0);
- }
- result = waitid(P_PID, pid, &wait_si, WEXITED, NULL);
- ASSERT_EQ(result, 0);
- /* child_tid set once the thread gets scheduled */
- if (args->flags & CLONE_PARENT_SETTID && args->flags & CLONE_CHILD_SETTID
&& !(args->flags & CLONE_CHILD_CLEARTID)) {
ASSERT_EQ(parent_tid, child_tid);
ASSERT_NE(parent_tid, 0);
- }
- if (args->flags & CLONE_CHILD_CLEARTID)
ASSERT_EQ(child_tid, 0);
- munmap((void *)args->stack, STACK_SIZE);
- if (args->flags & CLONE_SETTLS)
munmap((void *)args->tls, TLS_SIZE);
+}
+TEST(test_clone3) +{
- int ncount = sizeof(clone3_data)/sizeof(clone3_data[0]);
- for (int i = 0; i < ncount; ++i) {
TH_LOG("Validating clone3 @{%d}", i);
run_clone3(&clone3_data[i]);
- }
+}
void run_restricted(uintcap_t entry_point) { void *new_stack = allocate_mem_raw(STACK_SIZE); @@ -290,5 +515,6 @@ int main(void) run_restricted((uintcap_t)test_clone_tls_restricted); test_wait_raw(); test_wait_rusage();
- test_clone3(); return 0;
}