Introduce compat versions of the structs exposed in the uAPI headers that might contain pointers as a member. Also, implement functions that convert the compat versions to the native versions of the struct.
A subsequent patch is going to change the io_uring structs to enable them to support new architectures. On such architectures, the current struct layout still needs to be supported for compat tasks.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com --- include/linux/io_uring_types.h | 140 ++++++++++++++++++- io_uring/cancel.c | 38 ++++- io_uring/epoll.c | 2 +- io_uring/fdinfo.c | 55 +++++++- io_uring/io_uring.c | 248 +++++++++++++++++++++++---------- io_uring/io_uring.h | 122 +++++++++++++--- io_uring/kbuf.c | 108 ++++++++++++-- io_uring/kbuf.h | 6 +- io_uring/net.c | 5 +- io_uring/rsrc.c | 125 +++++++++++++++-- io_uring/tctx.c | 57 ++++++-- io_uring/uring_cmd.h | 7 + 12 files changed, 782 insertions(+), 131 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 440179029a8f0..737bc1aa67306 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -7,6 +7,130 @@ #include <linux/llist.h> #include <uapi/linux/io_uring.h>
+struct compat_io_uring_sqe { + __u8 opcode; + __u8 flags; + __u16 ioprio; + __s32 fd; + union { + __u64 off; + __u64 addr2; + struct { + __u32 cmd_op; + __u32 __pad1; + }; + }; + union { + __u64 addr; + __u64 splice_off_in; + }; + __u32 len; + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; + __u32 poll32_events; + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + __u32 fadvise_advice; + __u32 splice_flags; + __u32 rename_flags; + __u32 unlink_flags; + __u32 hardlink_flags; + __u32 xattr_flags; + __u32 msg_ring_flags; + __u32 uring_cmd_flags; + }; + __u64 user_data; + union { + __u16 buf_index; + __u16 buf_group; + } __packed; + __u16 personality; + union { + __s32 splice_fd_in; + __u32 file_index; + struct { + __u16 addr_len; + __u16 __pad3[1]; + }; + }; + union { + struct { + __u64 addr3; + __u64 __pad2[1]; + }; + __u8 cmd[0]; + }; +}; + +struct compat_io_uring_cqe { + __u64 user_data; + __s32 res; + __u32 flags; + __u64 big_cqe[]; +}; + +struct compat_io_uring_files_update { + __u32 offset; + __u32 resv; + __aligned_u64 fds; +}; + +struct compat_io_uring_rsrc_register { + __u32 nr; + __u32 flags; + __u64 resv2; + __aligned_u64 data; + __aligned_u64 tags; +}; + +struct compat_io_uring_rsrc_update { + __u32 offset; + __u32 resv; + __aligned_u64 data; +}; + +struct compat_io_uring_rsrc_update2 { + __u32 offset; + __u32 resv; + __aligned_u64 data; + __aligned_u64 tags; + __u32 nr; + __u32 resv2; +}; + +struct compat_io_uring_buf { + __u64 addr; + __u32 len; + __u16 bid; + __u16 resv; +}; + +struct compat_io_uring_buf_ring { + union { + struct { + __u64 resv1; + __u32 resv2; + __u16 resv3; + __u16 tail; + }; + struct compat_io_uring_buf bufs[0]; + }; +}; + +struct compat_io_uring_getevents_arg { + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad; + __u64 ts; +}; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -216,7 +340,11 @@ struct io_ring_ctx { * array. */ u32 *sq_array; - struct io_uring_sqe *sq_sqes; + + union { + struct compat_io_uring_sqe *sq_sqes_compat; + struct io_uring_sqe *sq_sqes; + }; unsigned cached_sq_head; unsigned sq_entries;
@@ -271,7 +399,10 @@ struct io_ring_ctx { * produced, so the application is allowed to modify pending * entries. */ - struct io_uring_cqe *cqes; + union { + struct compat_io_uring_cqe *cqes_compat; + struct io_uring_cqe *cqes; + };
/* * We cache a range of free CQEs we can use, once exhausted it @@ -581,7 +712,10 @@ struct io_kiocb {
struct io_overflow_cqe { struct list_head list; - struct io_uring_cqe cqe; + union { + struct compat_io_uring_cqe compat_cqe; + struct io_uring_cqe cqe; + }; };
#endif diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 2291a53cdabd1..20cb8634e44af 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -27,6 +27,42 @@ struct io_cancel { #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED)
+struct compat_io_uring_sync_cancel_reg { + __u64 addr; + __s32 fd; + __u32 flags; + struct __kernel_timespec timeout; + __u64 pad[4]; +}; + +#ifdef CONFIG_COMPAT64 +static int get_compat_io_uring_sync_cancel_reg(struct io_uring_sync_cancel_reg *sc, + const void __user *user_sc) +{ + struct compat_io_uring_sync_cancel_reg compat_sc; + + if (unlikely(copy_from_user(&compat_sc, user_sc, sizeof(compat_sc)))) + return -EFAULT; + sc->addr = compat_sc.addr; + sc->fd = compat_sc.fd; + sc->flags = compat_sc.flags; + sc->timeout = compat_sc.timeout; + memcpy(sc->pad, compat_sc.pad, sizeof(sc->pad)); + return 0; +} +#endif + +static int copy_io_uring_sync_cancel_reg_from_user(struct io_ring_ctx *ctx, + struct io_uring_sync_cancel_reg *sc, + const void __user *arg) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) + return get_compat_io_uring_sync_cancel_reg(sc, arg); +#endif + return copy_from_user(sc, arg, sizeof(*sc)); +} + static bool io_cancel_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); @@ -243,7 +279,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) DEFINE_WAIT(wait); int ret;
- if (copy_from_user(&sc, arg, sizeof(sc))) + if (copy_io_uring_sync_cancel_reg_from_user(ctx, &sc, arg)) return -EFAULT; if (sc.flags & ~CANCEL_FLAGS) return -EINVAL; diff --git a/io_uring/epoll.c b/io_uring/epoll.c index 9aa74d2c80bc4..d5580ff465c3e 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -40,7 +40,7 @@ int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct epoll_event __user *ev;
ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); - if (copy_from_user(&epoll->event, ev, sizeof(*ev))) + if (copy_epoll_event_from_user(&epoll->event, ev, req->ctx->compat)) return -EFAULT; }
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index bc8c9d764bc13..c5bd669081c98 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -89,12 +89,25 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, for (i = 0; i < sq_entries; i++) { unsigned int entry = i + sq_head; struct io_uring_sqe *sqe; - unsigned int sq_idx; + unsigned int sq_idx, sq_off; +#ifdef CONFIG_COMPAT64 + struct io_uring_sqe *native_sqe = NULL; +#endif
sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); if (sq_idx > sq_mask) continue; - sqe = &ctx->sq_sqes[sq_idx << sq_shift]; + sq_off = sq_idx << sq_shift; + sqe = ctx->compat ? (void *)&ctx->sq_sqes_compat[sq_off] : &ctx->sq_sqes[sq_off]; +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + native_sqe = kmalloc(sizeof(struct io_uring_sqe) << sq_shift, GFP_KERNEL); + convert_compat_io_uring_sqe(ctx, native_sqe, + (struct compat_io_uring_sqe *)sqe); + sqe = native_sqe; + } +#endif + seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " "addr:0x%llx, rw_flags:0x%x, buf_index:%d " "user_data:%llu", @@ -104,7 +117,8 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, sqe->buf_index, sqe->user_data); if (sq_shift) { u64 *sqeb = (void *) (sqe + 1); - int size = sizeof(struct io_uring_sqe) / sizeof(u64); + int size = (ctx->compat ? sizeof(struct compat_io_uring_sqe) + : sizeof(struct io_uring_sqe)) / sizeof(u64); int j;
for (j = 0; j < size; j++) { @@ -114,12 +128,29 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, } } seq_printf(m, "\n"); +#ifdef CONFIG_COMPAT64 + kfree(native_sqe); +#endif } seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); cq_entries = min(cq_tail - cq_head, ctx->cq_entries); for (i = 0; i < cq_entries; i++) { unsigned int entry = i + cq_head; - struct io_uring_cqe *cqe = &ctx->cqes[(entry & cq_mask) << cq_shift]; + struct io_uring_cqe *cqe; + unsigned int cq_off = (entry & cq_mask) << cq_shift; +#ifdef CONFIG_COMPAT64 + struct io_uring_cqe *native_cqe = NULL; +#endif + + cqe = ctx->compat ? (void *)&ctx->cqes_compat[cq_off] : &ctx->cqes[cq_off]; +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + native_cqe = kmalloc(sizeof(struct io_uring_cqe) << cq_shift, GFP_KERNEL); + convert_compat_io_uring_cqe(ctx, native_cqe, + (struct compat_io_uring_cqe *)cqe); + cqe = native_cqe; + } +#endif
seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x", entry & cq_mask, cqe->user_data, cqe->res, @@ -128,6 +159,9 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, seq_printf(m, ", extra1:%llu, extra2:%llu\n", cqe->big_cqe[0], cqe->big_cqe[1]); seq_printf(m, "\n"); +#ifdef CONFIG_COMPAT64 + kfree(native_cqe); +#endif }
/* @@ -189,10 +223,23 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { struct io_uring_cqe *cqe = &ocqe->cqe; +#ifdef CONFIG_COMPAT64 + struct io_uring_cqe *native_cqe = NULL; + + if (ctx->compat) { + native_cqe = kmalloc(sizeof(struct io_uring_cqe) << cq_shift, GFP_KERNEL); + convert_compat_io_uring_cqe(ctx, native_cqe, + (struct compat_io_uring_cqe *)cqe); + cqe = native_cqe; + } +#endif
seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", cqe->user_data, cqe->res, cqe->flags);
+#ifdef CONFIG_COMPAT64 + kfree(native_cqe); +#endif }
spin_unlock(&ctx->completion_lock); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 707229ae04dc8..03506776af043 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -152,6 +152,43 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
static struct kmem_cache *req_cachep;
+#ifdef CONFIG_COMPAT64 +static int get_compat_io_uring_getevents_arg(struct io_uring_getevents_arg *arg, + const void __user *user_arg) +{ + struct compat_io_uring_getevents_arg compat_arg; + + if (unlikely(copy_from_user(&compat_arg, user_arg, sizeof(compat_arg)))) + return -EFAULT; + arg->sigmask = (__kernel_uintptr_t)compat_ptr(compat_arg.sigmask); + arg->sigmask_sz = compat_arg.sigmask_sz; + arg->pad = compat_arg.pad; + arg->ts = (__kernel_uintptr_t)compat_ptr(compat_arg.ts); + return 0; +} +#endif /* CONFIG_COMPAT64 */ + +static int copy_io_uring_getevents_arg_from_user(struct io_ring_ctx *ctx, + struct io_uring_getevents_arg *arg, + const void __user *argp, + size_t size) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + if (size != sizeof(struct compat_io_uring_getevents_arg)) + return -EINVAL; + if (get_compat_io_uring_getevents_arg(arg, argp)) + return -EFAULT; + return 0; + } +#endif + if (size != sizeof(*arg)) + return -EINVAL; + if (copy_from_user(arg, argp, sizeof(*arg))) + return -EFAULT; + return 0; +} + struct sock *io_uring_get_socket(struct file *file) { #if defined(CONFIG_UNIX) @@ -604,7 +641,9 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx) static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) { bool all_flushed; - size_t cqe_size = sizeof(struct io_uring_cqe); + size_t cqe_size = ctx->compat ? + sizeof(struct compat_io_uring_cqe) : + sizeof(struct io_uring_cqe);
if (!force && __io_cqring_events(ctx) == ctx->cq_entries) return false; @@ -741,10 +780,11 @@ bool io_req_cqe_overflow(struct io_kiocb *req) * control dependency is enough as we're using WRITE_ONCE to * fill the cq entry */ -struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow) +void *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow) { unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); unsigned int free, queued, len; + void *cqe;
/* * Posting into the CQ when there are pending overflowed CQEs may break @@ -767,14 +807,15 @@ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow) len <<= 1; }
- ctx->cqe_cached = &ctx->cqes[off]; + cqe = ctx->compat ? (void *)&ctx->cqes_compat[off] : (void *)&ctx->cqes[off]; + ctx->cqe_cached = cqe; ctx->cqe_sentinel = ctx->cqe_cached + len;
ctx->cached_cq_tail++; ctx->cqe_cached++; if (ctx->flags & IORING_SETUP_CQE32) ctx->cqe_cached++; - return &ctx->cqes[off]; + return cqe; }
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags, @@ -793,14 +834,7 @@ bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags if (likely(cqe)) { trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- WRITE_ONCE(cqe->user_data, user_data); - WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, cflags); - - if (ctx->flags & IORING_SETUP_CQE32) { - WRITE_ONCE(cqe->big_cqe[0], 0); - WRITE_ONCE(cqe->big_cqe[1], 0); - } + __io_fill_cqe_any(ctx, cqe, user_data, res, cflags, 0, 0); return true; }
@@ -2222,7 +2256,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) * used, it's important that those reads are done through READ_ONCE() to * prevent a re-load down the line. */ -static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) +static const void *io_get_sqe(struct io_ring_ctx *ctx) { unsigned head, mask = ctx->sq_entries - 1; unsigned sq_idx = ctx->cached_sq_head++ & mask; @@ -2240,7 +2274,8 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) /* double index for 128-byte SQEs, twice as long */ if (ctx->flags & IORING_SETUP_SQE128) head <<= 1; - return &ctx->sq_sqes[head]; + return ctx->compat ? (void *)&ctx->sq_sqes_compat[head] + : (void *)&ctx->sq_sqes[head]; }
/* drop invalid entries */ @@ -2265,8 +2300,11 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) io_submit_state_start(&ctx->submit_state, left);
do { - const struct io_uring_sqe *sqe; + const void *sqe; struct io_kiocb *req; +#ifdef CONFIG_COMPAT64 + struct io_uring_sqe native_sqe; +#endif
if (unlikely(!io_alloc_req_refill(ctx))) break; @@ -2276,6 +2314,12 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) io_req_add_to_cache(req, ctx); break; } +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + convert_compat_io_uring_sqe(ctx, &native_sqe, sqe); + sqe = &native_sqe; + } +#endif
/* * Continue submitting even for sqe failure if the @@ -2480,6 +2524,9 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries { struct io_rings *rings; size_t off, cq_array_size, sq_array_size; + size_t cqe_size = ctx->compat ? + sizeof(struct compat_io_uring_cqe) : + sizeof(struct io_uring_cqe);
off = sizeof(*rings);
@@ -2492,7 +2539,7 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries if (cq_offset) *cq_offset = off;
- cq_array_size = array_size(sizeof(struct io_uring_cqe), cq_entries); + cq_array_size = array_size(cqe_size, cq_entries); if (cq_array_size == SIZE_MAX) return SIZE_MAX;
@@ -3120,20 +3167,22 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
#endif /* !CONFIG_MMU */
-static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz) +static int io_validate_ext_arg(struct io_ring_ctx *ctx, unsigned int flags, + const void __user *argp, size_t argsz) { if (flags & IORING_ENTER_EXT_ARG) { struct io_uring_getevents_arg arg; + int ret;
- if (argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; + ret = copy_io_uring_getevents_arg_from_user(ctx, &arg, argp, argsz); + if (ret) + return ret; } return 0; }
-static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, +static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned int flags, + const void __user *argp, size_t *argsz, #ifdef CONFIG_CHERI_PURECAP_UABI struct __kernel_timespec * __capability *ts, const sigset_t * __capability *sig) @@ -3143,6 +3192,7 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz #endif { struct io_uring_getevents_arg arg; + int ret;
/* * If EXT_ARG isn't set, then we have no timespec and the argp pointer @@ -3158,10 +3208,9 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz * EXT_ARG is set - ensure we agree on the size of it and copy in our * timespec and sigset_t pointers if good. */ - if (*argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; + ret = copy_io_uring_getevents_arg_from_user(ctx, &arg, argp, *argsz); + if (ret) + return ret; if (arg.pad) return -EINVAL; *sig = u64_to_user_ptr(arg.sigmask); @@ -3268,7 +3317,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ mutex_lock(&ctx->uring_lock); iopoll_locked: - ret2 = io_validate_ext_arg(flags, argp, argsz); + ret2 = io_validate_ext_arg(ctx, flags, argp, argsz); if (likely(!ret2)) { min_complete = min(min_complete, ctx->cq_entries); @@ -3279,7 +3328,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, const sigset_t __user *sig; struct __kernel_timespec __user *ts;
- ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); + ret2 = io_get_ext_arg(ctx, flags, argp, &argsz, &ts, &sig); if (likely(!ret2)) { min_complete = min(min_complete, ctx->cq_entries); @@ -3329,6 +3378,9 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, { struct io_rings *rings; size_t size, cqes_offset, sq_array_offset; + size_t sqe_size = ctx->compat ? + sizeof(struct compat_io_uring_sqe) : + sizeof(struct io_uring_sqe);
/* make sure these are sane, as we already accounted them */ ctx->sq_entries = p->sq_entries; @@ -3351,9 +3403,9 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->cq_ring_entries = p->cq_entries;
if (p->flags & IORING_SETUP_SQE128) - size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); + size = array_size(2 * sqe_size, p->sq_entries); else - size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + size = array_size(sqe_size, p->sq_entries); if (size == SIZE_MAX) { io_mem_free(ctx->rings); ctx->rings = NULL; @@ -4107,48 +4159,45 @@ static int __init io_uring_init(void) #define BUILD_BUG_SQE_ELEM_SIZE(eoffset, esize, ename) \ __BUILD_BUG_VERIFY_OFFSET_SIZE(struct io_uring_sqe, eoffset, esize, ename) BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); - BUILD_BUG_SQE_ELEM(0, __u8, opcode); - BUILD_BUG_SQE_ELEM(1, __u8, flags); - BUILD_BUG_SQE_ELEM(2, __u16, ioprio); - BUILD_BUG_SQE_ELEM(4, __s32, fd); - BUILD_BUG_SQE_ELEM(8, __u64, off); - BUILD_BUG_SQE_ELEM(8, __u64, addr2); - BUILD_BUG_SQE_ELEM(8, __u32, cmd_op); + BUILD_BUG_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_SQE_ELEM(1, __u8, flags); + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_SQE_ELEM(4, __s32, fd); + BUILD_BUG_SQE_ELEM(8, __u64, off); + BUILD_BUG_SQE_ELEM(8, __u64, addr2); + BUILD_BUG_SQE_ELEM(8, __u32, cmd_op); BUILD_BUG_SQE_ELEM(12, __u32, __pad1); - BUILD_BUG_SQE_ELEM(16, __u64, addr); - BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); - BUILD_BUG_SQE_ELEM(24, __u32, len); - BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); - BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); - BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); - BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); - BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); - BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); - BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); - BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); - BUILD_BUG_SQE_ELEM(28, __u32, open_flags); - BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); - BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); - BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); - BUILD_BUG_SQE_ELEM(28, __u32, rename_flags); - BUILD_BUG_SQE_ELEM(28, __u32, unlink_flags); - BUILD_BUG_SQE_ELEM(28, __u32, hardlink_flags); - BUILD_BUG_SQE_ELEM(28, __u32, xattr_flags); - BUILD_BUG_SQE_ELEM(28, __u32, msg_ring_flags); - BUILD_BUG_SQE_ELEM(32, __u64, user_data); - BUILD_BUG_SQE_ELEM(40, __u16, buf_index); - BUILD_BUG_SQE_ELEM(40, __u16, buf_group); - BUILD_BUG_SQE_ELEM(42, __u16, personality); - BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); - BUILD_BUG_SQE_ELEM(44, __u32, file_index); - BUILD_BUG_SQE_ELEM(44, __u16, addr_len); - BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); - BUILD_BUG_SQE_ELEM(48, __u64, addr3); + BUILD_BUG_SQE_ELEM(16, __u64, addr); + BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); + BUILD_BUG_SQE_ELEM(24, __u32, len); + BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); + BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); + BUILD_BUG_SQE_ELEM(28, __u32, open_flags); + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); + BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); + BUILD_BUG_SQE_ELEM(28, __u32, rename_flags); + BUILD_BUG_SQE_ELEM(28, __u32, unlink_flags); + BUILD_BUG_SQE_ELEM(28, __u32, hardlink_flags); + BUILD_BUG_SQE_ELEM(28, __u32, xattr_flags); + BUILD_BUG_SQE_ELEM(28, __u32, msg_ring_flags); + BUILD_BUG_SQE_ELEM(32, __u64, user_data); + BUILD_BUG_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_SQE_ELEM(40, __u16, buf_group); + BUILD_BUG_SQE_ELEM(42, __u16, personality); + BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); + BUILD_BUG_SQE_ELEM(44, __u32, file_index); + BUILD_BUG_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); + BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); - BUILD_BUG_SQE_ELEM(56, __u64, __pad2); + BUILD_BUG_SQE_ELEM(56, __u64, __pad2);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) != sizeof(struct io_uring_rsrc_update)); @@ -4160,6 +4209,65 @@ static int __init io_uring_init(void) BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) != offsetof(struct io_uring_buf_ring, tail));
+#ifdef CONFIG_COMPAT64 +#define BUILD_BUG_COMPAT_SQE_ELEM(eoffset, etype, ename) \ + __BUILD_BUG_VERIFY_OFFSET_SIZE(struct compat_io_uring_sqe, eoffset, sizeof(etype), ename) +#define BUILD_BUG_COMPAT_SQE_ELEM_SIZE(eoffset, esize, ename) \ + __BUILD_BUG_VERIFY_OFFSET_SIZE(struct compat_io_uring_sqe, eoffset, esize, ename) + BUILD_BUG_ON(sizeof(struct compat_io_uring_sqe) != 64); + BUILD_BUG_COMPAT_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_COMPAT_SQE_ELEM(1, __u8, flags); + BUILD_BUG_COMPAT_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_COMPAT_SQE_ELEM(4, __s32, fd); + BUILD_BUG_COMPAT_SQE_ELEM(8, __u64, off); + BUILD_BUG_COMPAT_SQE_ELEM(8, __u64, addr2); + BUILD_BUG_COMPAT_SQE_ELEM(8, __u32, cmd_op); + BUILD_BUG_COMPAT_SQE_ELEM(12, __u32, __pad1); + BUILD_BUG_COMPAT_SQE_ELEM(16, __u64, addr); + BUILD_BUG_COMPAT_SQE_ELEM(16, __u64, splice_off_in); + BUILD_BUG_COMPAT_SQE_ELEM(24, __u32, len); + BUILD_BUG_COMPAT_SQE_ELEM(28, __kernel_rwf_t, rw_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, /* compat */ int, rw_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, /* compat */ __u32, rw_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, fsync_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, /* compat */ __u16, poll_events); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, poll32_events); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, sync_range_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, msg_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, timeout_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, accept_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, cancel_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, open_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, statx_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, fadvise_advice); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, splice_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, rename_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, unlink_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, hardlink_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, xattr_flags); + BUILD_BUG_COMPAT_SQE_ELEM(28, __u32, msg_ring_flags); + BUILD_BUG_COMPAT_SQE_ELEM(32, __u64, user_data); + BUILD_BUG_COMPAT_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_COMPAT_SQE_ELEM(40, __u16, buf_group); + BUILD_BUG_COMPAT_SQE_ELEM(42, __u16, personality); + BUILD_BUG_COMPAT_SQE_ELEM(44, __s32, splice_fd_in); + BUILD_BUG_COMPAT_SQE_ELEM(44, __u32, file_index); + BUILD_BUG_COMPAT_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_COMPAT_SQE_ELEM(46, __u16, __pad3[0]); + BUILD_BUG_COMPAT_SQE_ELEM(48, __u64, addr3); + BUILD_BUG_COMPAT_SQE_ELEM_SIZE(48, 0, cmd); + BUILD_BUG_COMPAT_SQE_ELEM(56, __u64, __pad2); + + BUILD_BUG_ON(sizeof(struct compat_io_uring_files_update) != + sizeof(struct compat_io_uring_rsrc_update)); + BUILD_BUG_ON(sizeof(struct compat_io_uring_rsrc_update) > + sizeof(struct compat_io_uring_rsrc_update2)); + + BUILD_BUG_ON(offsetof(struct compat_io_uring_buf_ring, bufs) != 0); + BUILD_BUG_ON(offsetof(struct compat_io_uring_buf, resv) != + offsetof(struct compat_io_uring_buf_ring, tail)); +#endif /* CONFIG_COMPAT64 */ + /* should fit into one byte */ BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(SQE_COMMON_FLAGS >= (1 << 8)); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 50bc3af449534..fb2711770bfb0 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -5,6 +5,7 @@ #include <linux/lockdep.h> #include <linux/io_uring_types.h> #include "io-wq.h" +#include "uring_cmd.h" #include "slist.h" #include "filetable.h"
@@ -24,7 +25,7 @@ enum { IOU_STOP_MULTISHOT = -ECANCELED, };
-struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow); +void *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow); bool io_req_cqe_overflow(struct io_kiocb *req); int io_run_task_work_sig(struct io_ring_ctx *ctx); int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked); @@ -93,8 +94,67 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx)
void io_cq_unlock_post(struct io_ring_ctx *ctx);
-static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, - bool overflow) +#ifdef CONFIG_COMPAT64 +static inline void convert_compat_io_uring_cqe(struct io_ring_ctx *ctx, + struct io_uring_cqe *cqe, + const struct compat_io_uring_cqe *compat_cqe) +{ + cqe->user_data = READ_ONCE(compat_cqe->user_data); + cqe->res = READ_ONCE(compat_cqe->res); + cqe->flags = READ_ONCE(compat_cqe->flags); + + if (ctx->flags & IORING_SETUP_CQE32) { + cqe->big_cqe[0] = READ_ONCE(compat_cqe->big_cqe[0]); + cqe->big_cqe[1] = READ_ONCE(compat_cqe->big_cqe[1]); + } +} + +static inline void convert_compat_io_uring_sqe(struct io_ring_ctx *ctx, + struct io_uring_sqe *sqe, + const struct compat_io_uring_sqe *compat_sqe) +{ +/* + * The struct io_uring_sqe contains anonymous unions and there is no field + * keeping track of which union's member is active. Because in all the cases, + * the unions are between integral types and the types are compatible, use the + * largest member of each union to perform the copy. Use this compile-time check + * to ensure that the union's members are not truncated during the conversion. + */ +#define BUILD_BUG_COMPAT_SQE_UNION_ELEM(elem1, elem2) \ + BUILD_BUG_ON(sizeof_field(struct compat_io_uring_sqe, elem1) != \ + (offsetof(struct compat_io_uring_sqe, elem2) - \ + offsetof(struct compat_io_uring_sqe, elem1))) + + sqe->opcode = READ_ONCE(compat_sqe->opcode); + sqe->flags = READ_ONCE(compat_sqe->flags); + sqe->ioprio = READ_ONCE(compat_sqe->ioprio); + sqe->fd = READ_ONCE(compat_sqe->fd); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr2, addr); + sqe->addr2 = READ_ONCE(compat_sqe->addr2); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr, len); + sqe->addr = READ_ONCE(compat_sqe->addr); + sqe->len = READ_ONCE(compat_sqe->len); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(rw_flags, user_data); + sqe->rw_flags = READ_ONCE(compat_sqe->rw_flags); + sqe->user_data = READ_ONCE(compat_sqe->user_data); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(buf_index, personality); + sqe->buf_index = READ_ONCE(compat_sqe->buf_index); + sqe->personality = READ_ONCE(compat_sqe->personality); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(splice_fd_in, addr3); + sqe->splice_fd_in = READ_ONCE(compat_sqe->splice_fd_in); + if (sqe->opcode == IORING_OP_URING_CMD) { + size_t compat_cmd_size = compat_uring_cmd_pdu_size(ctx->flags & + IORING_SETUP_SQE128); + + memcpy(sqe->cmd, compat_sqe->cmd, compat_cmd_size); + } else + sqe->addr3 = READ_ONCE(compat_sqe->addr3); +#undef BUILD_BUG_COMPAT_SQE_UNION_ELEM +} +#endif /* CONFIG_COMPAT64 */ + +static inline void *io_get_cqe_overflow(struct io_ring_ctx *ctx, + bool overflow) { if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { struct io_uring_cqe *cqe = ctx->cqe_cached; @@ -109,15 +169,46 @@ static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, return __io_get_cqe(ctx, overflow); }
-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) +static inline void *io_get_cqe(struct io_ring_ctx *ctx) { return io_get_cqe_overflow(ctx, false); }
+static inline void __io_fill_cqe_any(struct io_ring_ctx *ctx, struct io_uring_cqe *cqe, + u64 user_data, s32 res, u32 cflags, + u64 extra1, u64 extra2) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + struct compat_io_uring_cqe *compat_cqe = (struct compat_io_uring_cqe *)cqe; + + WRITE_ONCE(compat_cqe->user_data, user_data); + WRITE_ONCE(compat_cqe->res, res); + WRITE_ONCE(compat_cqe->flags, cflags); + + if (ctx->flags & IORING_SETUP_CQE32) { + WRITE_ONCE(compat_cqe->big_cqe[0], extra1); + WRITE_ONCE(compat_cqe->big_cqe[1], extra2); + } + return; + } +#endif + WRITE_ONCE(cqe->user_data, user_data); + WRITE_ONCE(cqe->res, res); + WRITE_ONCE(cqe->flags, cflags); + + if (ctx->flags & IORING_SETUP_CQE32) { + WRITE_ONCE(cqe->big_cqe[0], extra1); + WRITE_ONCE(cqe->big_cqe[1], extra2); + } +} + static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req) { struct io_uring_cqe *cqe; + u64 extra1 = 0; + u64 extra2 = 0;
/* * If we can't get a cq entry, userspace overflowed the @@ -128,24 +219,17 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, if (unlikely(!cqe)) return io_req_cqe_overflow(req);
+ if (ctx->flags & IORING_SETUP_CQE32 && req->flags & REQ_F_CQE32_INIT) { + extra1 = req->extra1; + extra2 = req->extra2; + } + trace_io_uring_complete(req->ctx, req, req->cqe.user_data, req->cqe.res, req->cqe.flags, - (req->flags & REQ_F_CQE32_INIT) ? req->extra1 : 0, - (req->flags & REQ_F_CQE32_INIT) ? req->extra2 : 0); + extra1, extra2);
- memcpy(cqe, &req->cqe, sizeof(*cqe)); - - if (ctx->flags & IORING_SETUP_CQE32) { - u64 extra1 = 0, extra2 = 0; - - if (req->flags & REQ_F_CQE32_INIT) { - extra1 = req->extra1; - extra2 = req->extra2; - } - - WRITE_ONCE(cqe->big_cqe[0], extra1); - WRITE_ONCE(cqe->big_cqe[1], extra2); - } + __io_fill_cqe_any(ctx, cqe, req->cqe.user_data, req->cqe.res, + req->cqe.flags, extra1, extra2); return true; }
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index e2c46889d5fab..9f840ba93c888 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -16,6 +16,9 @@ #include "kbuf.h"
#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) +#ifdef CONFIG_COMPAT64 +#define IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct compat_io_uring_buf)) +#endif
#define BGID_ARRAY 64
@@ -28,6 +31,42 @@ struct io_provide_buf { __u16 bid; };
+#ifdef CONFIG_COMPAT64 +struct compat_io_uring_buf_reg { + __u64 ring_addr; + __u32 ring_entries; + __u16 bgid; + __u16 pad; + __u64 resv[3]; +}; + +static int get_compat_io_uring_buf_reg(struct io_uring_buf_reg *reg, + const void __user *user_reg) +{ + struct compat_io_uring_buf_reg compat_reg; + + if (unlikely(copy_from_user(&compat_reg, user_reg, sizeof(compat_reg)))) + return -EFAULT; + reg->ring_addr = compat_reg.ring_addr; + reg->ring_entries = compat_reg.ring_entries; + reg->bgid = compat_reg.bgid; + reg->pad = compat_reg.pad; + memcpy(reg->resv, compat_reg.resv, sizeof(reg->resv)); + return 0; +} +#endif + +static int copy_io_uring_buf_reg_from_user(struct io_ring_ctx *ctx, + struct io_uring_buf_reg *reg, + const void __user *arg) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) + return get_compat_io_uring_buf_reg(reg, arg); +#endif + return copy_from_user(reg, arg, sizeof(*reg)); +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { @@ -125,6 +164,41 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, return NULL; }
+#ifdef CONFIG_COMPAT64 +static void __user *io_ring_buffer_select_compat(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) +{ + struct compat_io_uring_buf_ring *br = bl->buf_ring_compat; + struct compat_io_uring_buf *buf; + __u16 head = bl->head; + + if (unlikely(smp_load_acquire(&br->tail) == head)) + return NULL; + + head &= bl->mask; + if (head < IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE) { + buf = &br->bufs[head]; + } else { + int off = head & (IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE - 1); + int index = head / IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE; + buf = page_address(bl->buf_pages[index]); + buf += off; + } + if (*len == 0 || *len > buf->len) + *len = buf->len; + req->flags |= REQ_F_BUFFER_RING; + req->buf_list = bl; + req->buf_index = buf->bid; + + if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { + req->buf_list = NULL; + bl->head++; + } + return compat_ptr(buf->addr); +} +#endif /* CONFIG_COMPAT64 */ + static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, unsigned int issue_flags) @@ -168,6 +242,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, return u64_to_user_ptr(buf->addr); }
+static void __user *io_ring_buffer_select_any(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) +{ +#ifdef CONFIG_COMPAT64 + if (req->ctx->compat) + return io_ring_buffer_select_compat(req, len, bl, issue_flags); +#endif + return io_ring_buffer_select(req, len, bl, issue_flags); +} + void __user *io_buffer_select(struct io_kiocb *req, size_t *len, unsigned int issue_flags) { @@ -180,7 +265,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, bl = io_buffer_get_list(ctx, req->buf_index); if (likely(bl)) { if (bl->buf_nr_pages) - ret = io_ring_buffer_select(req, len, bl, issue_flags); + ret = io_ring_buffer_select_any(req, len, bl, issue_flags); else ret = io_provided_buffer_select(req, len, bl); } @@ -215,9 +300,12 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return 0;
if (bl->buf_nr_pages) { + __u16 tail = ctx->compat ? + bl->buf_ring_compat->tail : + bl->buf_ring->tail; int j;
- i = bl->buf_ring->tail - bl->head; + i = tail - bl->head; for (j = 0; j < bl->buf_nr_pages; j++) unpin_user_page(bl->buf_pages[j]); kvfree(bl->buf_pages); @@ -469,13 +557,13 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { - struct io_uring_buf_ring *br; struct io_uring_buf_reg reg; struct io_buffer_list *bl, *free_bl = NULL; struct page **pages; + size_t pages_size; int nr_pages;
- if (copy_from_user(®, arg, sizeof(reg))) + if (copy_io_uring_buf_reg_from_user(ctx, ®, arg)) return -EFAULT;
if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) @@ -508,19 +596,19 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -ENOMEM; }
- pages = io_pin_pages(reg.ring_addr, - struct_size(br, bufs, reg.ring_entries), - &nr_pages); + pages_size = ctx->compat ? + size_mul(sizeof(struct compat_io_uring_buf), reg.ring_entries) : + size_mul(sizeof(struct io_uring_buf), reg.ring_entries); + pages = io_pin_pages(reg.ring_addr, pages_size, &nr_pages); if (IS_ERR(pages)) { kfree(free_bl); return PTR_ERR(pages); }
- br = page_address(pages[0]); bl->buf_pages = pages; bl->buf_nr_pages = nr_pages; bl->nr_entries = reg.ring_entries; - bl->buf_ring = br; + bl->buf_ring = page_address(pages[0]); bl->mask = reg.ring_entries - 1; io_buffer_add_list(ctx, bl, reg.bgid); return 0; @@ -531,7 +619,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_uring_buf_reg reg; struct io_buffer_list *bl;
- if (copy_from_user(®, arg, sizeof(reg))) + if (copy_io_uring_buf_reg_from_user(ctx, ®, arg)) return -EFAULT; if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) return -EINVAL; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index c23e15d7d3caf..1aa5bbbc5d628 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -2,6 +2,7 @@ #ifndef IOU_KBUF_H #define IOU_KBUF_H
+#include <linux/io_uring_types.h> #include <uapi/linux/io_uring.h>
struct io_buffer_list { @@ -13,7 +14,10 @@ struct io_buffer_list { struct list_head buf_list; struct { struct page **buf_pages; - struct io_uring_buf_ring *buf_ring; + union { + struct io_uring_buf_ring *buf_ring; + struct compat_io_uring_buf_ring *buf_ring_compat; + }; }; }; __u16 bgid; diff --git a/io_uring/net.c b/io_uring/net.c index c586278858e7e..4c133bc6f9d1d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -4,6 +4,7 @@ #include <linux/file.h> #include <linux/slab.h> #include <linux/net.h> +#include <linux/uio.h> #include <linux/compat.h> #include <net/compat.h> #include <linux/io_uring.h> @@ -435,7 +436,9 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req, } else if (msg.msg_iovlen > 1) { return -EINVAL; } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) + void *iov = iovec_from_user(msg.msg_iov, 1, 1, iomsg->fast_iov, + req->ctx->compat); + if (IS_ERR(iov)) return -EFAULT; sr->len = iomsg->fast_iov[0].iov_len; iomsg->free_iov = NULL; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 41e192de9e8a7..eafdb039f9e51 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -23,6 +23,106 @@ struct io_rsrc_update { u32 offset; };
+#ifdef CONFIG_COMPAT64 +static int get_compat_io_uring_rsrc_update(struct io_uring_rsrc_update2 *up2, + const void __user *user_up) +{ + struct compat_io_uring_rsrc_update compat_up; + + if (unlikely(copy_from_user(&compat_up, user_up, sizeof(compat_up)))) + return -EFAULT; + up2->offset = compat_up.offset; + up2->resv = compat_up.resv; + up2->data = compat_up.data; + return 0; +} + +static int get_compat_io_uring_rsrc_update2(struct io_uring_rsrc_update2 *up2, + const void __user *user_up2) +{ + struct compat_io_uring_rsrc_update2 compat_up2; + + if (unlikely(copy_from_user(&compat_up2, user_up2, sizeof(compat_up2)))) + return -EFAULT; + up2->offset = compat_up2.offset; + up2->resv = compat_up2.resv; + up2->data = compat_up2.data; + up2->tags = compat_up2.tags; + up2->nr = compat_up2.nr; + up2->resv2 = compat_up2.resv2; + return 0; +} + +static int get_compat_io_uring_rsrc_register(struct io_uring_rsrc_register *rr, + const void __user *user_rr) +{ + struct compat_io_uring_rsrc_register compat_rr; + + if (unlikely(copy_from_user(&compat_rr, user_rr, sizeof(compat_rr)))) + return -EFAULT; + rr->nr = compat_rr.nr; + rr->flags = compat_rr.flags; + rr->resv2 = compat_rr.resv2; + rr->data = compat_rr.data; + rr->tags = compat_rr.tags; + return 0; +} +#endif /* CONFIG_COMPAT64 */ + +static int copy_io_uring_rsrc_update_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up2, + const void __user *arg) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) + return get_compat_io_uring_rsrc_update(up2, arg); +#endif + return copy_from_user(up2, arg, sizeof(struct io_uring_rsrc_update)); +} + +static int copy_io_uring_rsrc_update2_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up2, + const void __user *arg, + size_t size) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + if (size != sizeof(struct compat_io_uring_rsrc_update2)) + return -EINVAL; + if (get_compat_io_uring_rsrc_update2(up2, arg)) + return -EFAULT; + return 0; + } +#endif + if (size != sizeof(*up2)) + return -EINVAL; + if (copy_from_user(up2, arg, sizeof(*up2))) + return -EFAULT; + return 0; +} + +static int copy_io_uring_rsrc_register_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_register *rr, + const void __user *arg, + size_t size) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) { + if (size != sizeof(struct compat_io_uring_rsrc_register)) + return -EINVAL; + if (get_compat_io_uring_rsrc_register(rr, arg)) + return -EFAULT; + return 0; + } +#endif + /* keep it extendible */ + if (size != sizeof(*rr)) + return -EINVAL; + if (copy_from_user(rr, arg, size)) + return -EFAULT; + return 0; +} + static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, struct io_mapped_ubuf **pimu, struct page **last_hpage); @@ -597,12 +697,14 @@ int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { struct io_uring_rsrc_update2 up; + int ret;
if (!nr_args) return -EINVAL; memset(&up, 0, sizeof(up)); - if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) - return -EFAULT; + ret = copy_io_uring_rsrc_update_from_user(ctx, &up, arg); + if (ret) + return ret; if (up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); @@ -612,11 +714,11 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, unsigned size, unsigned type) { struct io_uring_rsrc_update2 up; + int ret;
- if (size != sizeof(up)) - return -EINVAL; - if (copy_from_user(&up, arg, sizeof(up))) - return -EFAULT; + ret = copy_io_uring_rsrc_update2_from_user(ctx, &up, arg, size); + if (ret) + return ret; if (!up.nr || up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, type, &up, up.nr); @@ -626,14 +728,11 @@ __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, unsigned int size, unsigned int type) { struct io_uring_rsrc_register rr; + int ret;
- /* keep it extendible */ - if (size != sizeof(rr)) - return -EINVAL; - - memset(&rr, 0, sizeof(rr)); - if (copy_from_user(&rr, arg, size)) - return -EFAULT; + ret = copy_io_uring_rsrc_register_from_user(ctx, &rr, arg, size); + if (ret) + return ret; if (!rr.nr || rr.resv2) return -EINVAL; if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE) diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 96f77450cf4e2..6ab9916ed3844 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -12,6 +12,32 @@ #include "io_uring.h" #include "tctx.h"
+#ifdef CONFIG_COMPAT64 +static int get_compat_io_uring_rsrc_update(struct io_uring_rsrc_update *up, + const void __user *user_up) +{ + struct compat_io_uring_rsrc_update compat_up; + + if (unlikely(copy_from_user(&compat_up, user_up, sizeof(compat_up)))) + return -EFAULT; + up->offset = compat_up.offset; + up->resv = compat_up.resv; + up->data = compat_up.data; + return 0; +} +#endif /* CONFIG_COMPAT64 */ + +static int copy_io_uring_rsrc_update_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update *up, + const void __user *arg) +{ +#ifdef CONFIG_COMPAT64 + if (ctx->compat) + return get_compat_io_uring_rsrc_update(up, arg); +#endif + return copy_from_user(up, arg, sizeof(struct io_uring_rsrc_update)); +} + static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, struct task_struct *task) { @@ -244,8 +270,6 @@ static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, unsigned nr_args) { - struct io_uring_rsrc_update __user *arg = __arg; - struct io_uring_rsrc_update reg; struct io_uring_task *tctx; int ret, i;
@@ -260,9 +284,17 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
tctx = current->io_uring; for (i = 0; i < nr_args; i++) { + void __user *arg; + __u32 __user *arg_offset; + struct io_uring_rsrc_update reg; int start, end;
- if (copy_from_user(®, &arg[i], sizeof(reg))) { + if (ctx->compat) + arg = &((struct compat_io_uring_rsrc_update __user *)__arg)[i]; + else + arg = &((struct io_uring_rsrc_update __user *)__arg)[i]; + + if (copy_io_uring_rsrc_update_from_user(ctx, ®, arg)) { ret = -EFAULT; break; } @@ -288,8 +320,10 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, if (ret < 0) break;
- reg.offset = ret; - if (put_user(reg.offset, &arg[i].offset)) { + arg_offset = ctx->compat ? + &((struct compat_io_uring_rsrc_update __user *)arg)->offset : + &((struct io_uring_rsrc_update __user *)arg)->offset; + if (put_user(reg.offset, arg_offset)) { fput(tctx->registered_rings[reg.offset]); tctx->registered_rings[reg.offset] = NULL; ret = -EFAULT; @@ -303,9 +337,7 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, unsigned nr_args) { - struct io_uring_rsrc_update __user *arg = __arg; struct io_uring_task *tctx = current->io_uring; - struct io_uring_rsrc_update reg; int ret = 0, i;
if (!nr_args || nr_args > IO_RINGFD_REG_MAX) @@ -314,10 +346,19 @@ int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, return 0;
for (i = 0; i < nr_args; i++) { - if (copy_from_user(®, &arg[i], sizeof(reg))) { + void __user *arg; + struct io_uring_rsrc_update reg; + + if (ctx->compat) + arg = &((struct compat_io_uring_rsrc_update __user *)__arg)[i]; + else + arg = &((struct io_uring_rsrc_update __user *)__arg)[i]; + + if (copy_io_uring_rsrc_update_from_user(ctx, ®, arg)) { ret = -EFAULT; break; } + if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { ret = -EINVAL; break; diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 7c6697d13cb2e..d67bb30ad543b 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -11,3 +11,10 @@ int io_uring_cmd_prep_async(struct io_kiocb *req); #define uring_cmd_pdu_size(is_sqe128) \ ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \ offsetof(struct io_uring_sqe, cmd)) + +#ifdef CONFIG_COMPAT64 +#define compat_uring_cmd_pdu_size(is_sqe128) \ + ((1 + !!(is_sqe128)) * sizeof(struct compat_io_uring_sqe) - \ + offsetof(struct compat_io_uring_sqe, cmd)) +#endif +