mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-11 09:00:12 +00:00
ctx->tcxt_list holds the tasks using this ring, and it's currently protected by the normal ctx->uring_lock. However, this can cause a circular locking issue, as reported by syzbot, where cancelations off exec end up needing to remove an entry from this list: ====================================================== WARNING: possible circular locking dependency detected syzkaller #0 Tainted: G L ------------------------------------------------------ syz.0.9999/12287 is trying to acquire lock: ffff88805851c0a8 (&ctx->uring_lock){+.+.}-{4:4}, at: io_uring_del_tctx_node+0xf0/0x2c0 io_uring/tctx.c:179 but task is already holding lock: ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: prepare_bprm_creds fs/exec.c:1360 [inline] ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: bprm_execve+0xb9/0x1400 fs/exec.c:1733 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sig->cred_guard_mutex){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:614 [inline] __mutex_lock+0x187/0x1350 kernel/locking/mutex.c:776 proc_pid_attr_write+0x547/0x630 fs/proc/base.c:2837 vfs_write+0x27e/0xb30 fs/read_write.c:684 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (sb_writers#3){.+.+}-{0:0}: percpu_down_read_internal include/linux/percpu-rwsem.h:53 [inline] percpu_down_read_freezable include/linux/percpu-rwsem.h:83 [inline] __sb_start_write include/linux/fs/super.h:19 [inline] sb_start_write+0x4d/0x1c0 include/linux/fs/super.h:125 mnt_want_write+0x41/0x90 fs/namespace.c:499 open_last_lookups fs/namei.c:4529 [inline] path_openat+0xadd/0x3dd0 fs/namei.c:4784 do_filp_open+0x1fa/0x410 fs/namei.c:4814 io_openat2+0x3e0/0x5c0 io_uring/openclose.c:143 __io_issue_sqe+0x181/0x4b0 io_uring/io_uring.c:1792 io_issue_sqe+0x165/0x1060 io_uring/io_uring.c:1815 io_queue_sqe io_uring/io_uring.c:2042 [inline] io_submit_sqe io_uring/io_uring.c:2320 [inline] io_submit_sqes+0xbf4/0x2140 io_uring/io_uring.c:2434 __do_sys_io_uring_enter io_uring/io_uring.c:3280 [inline] __se_sys_io_uring_enter+0x2e0/0x2b60 io_uring/io_uring.c:3219 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&ctx->uring_lock){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3165 [inline] check_prevs_add kernel/locking/lockdep.c:3284 [inline] validate_chain kernel/locking/lockdep.c:3908 [inline] __lock_acquire+0x15a6/0x2cf0 kernel/locking/lockdep.c:5237 lock_acquire+0x107/0x340 kernel/locking/lockdep.c:5868 __mutex_lock_common kernel/locking/mutex.c:614 [inline] __mutex_lock+0x187/0x1350 kernel/locking/mutex.c:776 io_uring_del_tctx_node+0xf0/0x2c0 io_uring/tctx.c:179 io_uring_clean_tctx+0xd4/0x1a0 io_uring/tctx.c:195 io_uring_cancel_generic+0x6ca/0x7d0 io_uring/cancel.c:646 io_uring_task_cancel include/linux/io_uring.h:24 [inline] begin_new_exec+0x10ed/0x2440 fs/exec.c:1131 load_elf_binary+0x9f8/0x2d70 fs/binfmt_elf.c:1010 search_binary_handler fs/exec.c:1669 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve+0x92e/0x1400 fs/exec.c:1753 do_execveat_common+0x510/0x6a0 fs/exec.c:1859 do_execve fs/exec.c:1933 [inline] __do_sys_execve fs/exec.c:2009 [inline] __se_sys_execve fs/exec.c:2004 [inline] __x64_sys_execve+0x94/0xb0 fs/exec.c:2004 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: &ctx->uring_lock --> sb_writers#3 --> &sig->cred_guard_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sig->cred_guard_mutex); lock(sb_writers#3); lock(&sig->cred_guard_mutex); lock(&ctx->uring_lock); *** DEADLOCK *** 1 lock held by syz.0.9999/12287: #0: ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: prepare_bprm_creds fs/exec.c:1360 [inline] #0: ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: bprm_execve+0xb9/0x1400 fs/exec.c:1733 stack backtrace: CPU: 0 UID: 0 PID: 12287 Comm: syz.0.9999 Tainted: G L syzkaller #0 PREEMPT(full) Tainted: [L]=SOFTLOCKUP Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 Call Trace: <TASK> dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 print_circular_bug+0x2e2/0x300 kernel/locking/lockdep.c:2043 check_noncircular+0x12e/0x150 kernel/locking/lockdep.c:2175 check_prev_add kernel/locking/lockdep.c:3165 [inline] check_prevs_add kernel/locking/lockdep.c:3284 [inline] validate_chain kernel/locking/lockdep.c:3908 [inline] __lock_acquire+0x15a6/0x2cf0 kernel/locking/lockdep.c:5237 lock_acquire+0x107/0x340 kernel/locking/lockdep.c:5868 __mutex_lock_common kernel/locking/mutex.c:614 [inline] __mutex_lock+0x187/0x1350 kernel/locking/mutex.c:776 io_uring_del_tctx_node+0xf0/0x2c0 io_uring/tctx.c:179 io_uring_clean_tctx+0xd4/0x1a0 io_uring/tctx.c:195 io_uring_cancel_generic+0x6ca/0x7d0 io_uring/cancel.c:646 io_uring_task_cancel include/linux/io_uring.h:24 [inline] begin_new_exec+0x10ed/0x2440 fs/exec.c:1131 load_elf_binary+0x9f8/0x2d70 fs/binfmt_elf.c:1010 search_binary_handler fs/exec.c:1669 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve+0x92e/0x1400 fs/exec.c:1753 do_execveat_common+0x510/0x6a0 fs/exec.c:1859 do_execve fs/exec.c:1933 [inline] __do_sys_execve fs/exec.c:2009 [inline] __se_sys_execve fs/exec.c:2004 [inline] __x64_sys_execve+0x94/0xb0 fs/exec.c:2004 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff3a8b8f749 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ff3a9a97038 EFLAGS: 00000246 ORIG_RAX: 000000000000003b RAX: ffffffffffffffda RBX: 00007ff3a8de5fa0 RCX: 00007ff3a8b8f749 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000200000000400 RBP: 00007ff3a8c13f91 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ff3a8de6038 R14: 00007ff3a8de5fa0 R15: 00007ff3a8f0fa28 </TASK> Add a separate lock just for the tctx_list, tctx_lock. This can nest under ->uring_lock, where necessary, and be used separately for list manipulation. For the cancelation off exec side, this removes the need to grab ->uring_lock, hence fixing the circular locking dependency. Reported-by: syzbot+b0e3b77ffaa8a4067ce5@syzkaller.appspotmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
662 lines
16 KiB
C
662 lines
16 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/file.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/nospec.h>
|
|
#include <linux/io_uring.h>
|
|
|
|
#include <uapi/linux/io_uring.h>
|
|
|
|
#include "filetable.h"
|
|
#include "io_uring.h"
|
|
#include "tctx.h"
|
|
#include "sqpoll.h"
|
|
#include "uring_cmd.h"
|
|
#include "poll.h"
|
|
#include "timeout.h"
|
|
#include "waitid.h"
|
|
#include "futex.h"
|
|
#include "cancel.h"
|
|
|
|
struct io_cancel {
|
|
struct file *file;
|
|
u64 addr;
|
|
u32 flags;
|
|
s32 fd;
|
|
u8 opcode;
|
|
};
|
|
|
|
#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
|
|
IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
|
|
IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
|
|
|
|
/*
|
|
* Returns true if the request matches the criteria outlined by 'cd'.
|
|
*/
|
|
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
|
|
{
|
|
bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
|
|
|
|
if (req->ctx != cd->ctx)
|
|
return false;
|
|
|
|
if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
|
|
match_user_data = true;
|
|
|
|
if (cd->flags & IORING_ASYNC_CANCEL_ANY)
|
|
goto check_seq;
|
|
if (cd->flags & IORING_ASYNC_CANCEL_FD) {
|
|
if (req->file != cd->file)
|
|
return false;
|
|
}
|
|
if (cd->flags & IORING_ASYNC_CANCEL_OP) {
|
|
if (req->opcode != cd->opcode)
|
|
return false;
|
|
}
|
|
if (match_user_data && req->cqe.user_data != cd->data)
|
|
return false;
|
|
if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
|
|
check_seq:
|
|
if (io_cancel_match_sequence(req, cd->seq))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool io_cancel_cb(struct io_wq_work *work, void *data)
|
|
{
|
|
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
|
|
struct io_cancel_data *cd = data;
|
|
|
|
return io_cancel_req_match(req, cd);
|
|
}
|
|
|
|
static int io_async_cancel_one(struct io_uring_task *tctx,
|
|
struct io_cancel_data *cd)
|
|
{
|
|
enum io_wq_cancel cancel_ret;
|
|
int ret = 0;
|
|
bool all;
|
|
|
|
if (!tctx || !tctx->io_wq)
|
|
return -ENOENT;
|
|
|
|
all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
|
|
cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
|
|
switch (cancel_ret) {
|
|
case IO_WQ_CANCEL_OK:
|
|
ret = 0;
|
|
break;
|
|
case IO_WQ_CANCEL_RUNNING:
|
|
ret = -EALREADY;
|
|
break;
|
|
case IO_WQ_CANCEL_NOTFOUND:
|
|
ret = -ENOENT;
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
|
|
unsigned issue_flags)
|
|
{
|
|
struct io_ring_ctx *ctx = cd->ctx;
|
|
int ret;
|
|
|
|
WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
|
|
|
|
ret = io_async_cancel_one(tctx, cd);
|
|
/*
|
|
* Fall-through even for -EALREADY, as we may have poll armed
|
|
* that need unarming.
|
|
*/
|
|
if (!ret)
|
|
return 0;
|
|
|
|
ret = io_poll_cancel(ctx, cd, issue_flags);
|
|
if (ret != -ENOENT)
|
|
return ret;
|
|
|
|
ret = io_waitid_cancel(ctx, cd, issue_flags);
|
|
if (ret != -ENOENT)
|
|
return ret;
|
|
|
|
ret = io_futex_cancel(ctx, cd, issue_flags);
|
|
if (ret != -ENOENT)
|
|
return ret;
|
|
|
|
spin_lock(&ctx->completion_lock);
|
|
if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
|
|
ret = io_timeout_cancel(ctx, cd);
|
|
spin_unlock(&ctx->completion_lock);
|
|
return ret;
|
|
}
|
|
|
|
int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
|
|
|
|
if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
|
|
return -EINVAL;
|
|
if (sqe->off || sqe->splice_fd_in)
|
|
return -EINVAL;
|
|
|
|
cancel->addr = READ_ONCE(sqe->addr);
|
|
cancel->flags = READ_ONCE(sqe->cancel_flags);
|
|
if (cancel->flags & ~CANCEL_FLAGS)
|
|
return -EINVAL;
|
|
if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
|
|
if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
|
|
return -EINVAL;
|
|
cancel->fd = READ_ONCE(sqe->fd);
|
|
}
|
|
if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
|
|
if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
|
|
return -EINVAL;
|
|
cancel->opcode = READ_ONCE(sqe->len);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __io_async_cancel(struct io_cancel_data *cd,
|
|
struct io_uring_task *tctx,
|
|
unsigned int issue_flags)
|
|
{
|
|
bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
|
|
struct io_ring_ctx *ctx = cd->ctx;
|
|
struct io_tctx_node *node;
|
|
int ret, nr = 0;
|
|
|
|
do {
|
|
ret = io_try_cancel(tctx, cd, issue_flags);
|
|
if (ret == -ENOENT)
|
|
break;
|
|
if (!all)
|
|
return ret;
|
|
nr++;
|
|
} while (1);
|
|
|
|
/* slow path, try all io-wq's */
|
|
__set_current_state(TASK_RUNNING);
|
|
io_ring_submit_lock(ctx, issue_flags);
|
|
mutex_lock(&ctx->tctx_lock);
|
|
ret = -ENOENT;
|
|
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
|
|
ret = io_async_cancel_one(node->task->io_uring, cd);
|
|
if (ret != -ENOENT) {
|
|
if (!all)
|
|
break;
|
|
nr++;
|
|
}
|
|
}
|
|
mutex_unlock(&ctx->tctx_lock);
|
|
io_ring_submit_unlock(ctx, issue_flags);
|
|
return all ? nr : ret;
|
|
}
|
|
|
|
int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
|
|
struct io_cancel_data cd = {
|
|
.ctx = req->ctx,
|
|
.data = cancel->addr,
|
|
.flags = cancel->flags,
|
|
.opcode = cancel->opcode,
|
|
.seq = atomic_inc_return(&req->ctx->cancel_seq),
|
|
};
|
|
struct io_uring_task *tctx = req->tctx;
|
|
int ret;
|
|
|
|
if (cd.flags & IORING_ASYNC_CANCEL_FD) {
|
|
if (req->flags & REQ_F_FIXED_FILE ||
|
|
cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
|
|
req->flags |= REQ_F_FIXED_FILE;
|
|
req->file = io_file_get_fixed(req, cancel->fd,
|
|
issue_flags);
|
|
} else {
|
|
req->file = io_file_get_normal(req, cancel->fd);
|
|
}
|
|
if (!req->file) {
|
|
ret = -EBADF;
|
|
goto done;
|
|
}
|
|
cd.file = req->file;
|
|
}
|
|
|
|
ret = __io_async_cancel(&cd, tctx, issue_flags);
|
|
done:
|
|
if (ret < 0)
|
|
req_set_fail(req);
|
|
io_req_set_res(req, ret, 0);
|
|
return IOU_COMPLETE;
|
|
}
|
|
|
|
static int __io_sync_cancel(struct io_uring_task *tctx,
|
|
struct io_cancel_data *cd, int fd)
|
|
{
|
|
struct io_ring_ctx *ctx = cd->ctx;
|
|
|
|
/* fixed must be grabbed every time since we drop the uring_lock */
|
|
if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
|
|
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
|
|
struct io_rsrc_node *node;
|
|
|
|
node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
|
|
if (unlikely(!node))
|
|
return -EBADF;
|
|
cd->file = io_slot_file(node);
|
|
if (!cd->file)
|
|
return -EBADF;
|
|
}
|
|
|
|
return __io_async_cancel(cd, tctx, 0);
|
|
}
|
|
|
|
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
|
|
__must_hold(&ctx->uring_lock)
|
|
{
|
|
struct io_cancel_data cd = {
|
|
.ctx = ctx,
|
|
.seq = atomic_inc_return(&ctx->cancel_seq),
|
|
};
|
|
ktime_t timeout = KTIME_MAX;
|
|
struct io_uring_sync_cancel_reg sc;
|
|
struct file *file = NULL;
|
|
DEFINE_WAIT(wait);
|
|
int ret, i;
|
|
|
|
if (copy_from_user(&sc, arg, sizeof(sc)))
|
|
return -EFAULT;
|
|
if (sc.flags & ~CANCEL_FLAGS)
|
|
return -EINVAL;
|
|
for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
|
|
if (sc.pad[i])
|
|
return -EINVAL;
|
|
for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
|
|
if (sc.pad2[i])
|
|
return -EINVAL;
|
|
|
|
cd.data = sc.addr;
|
|
cd.flags = sc.flags;
|
|
cd.opcode = sc.opcode;
|
|
|
|
/* we can grab a normal file descriptor upfront */
|
|
if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
|
|
!(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
|
|
file = fget(sc.fd);
|
|
if (!file)
|
|
return -EBADF;
|
|
cd.file = file;
|
|
}
|
|
|
|
ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
|
|
|
|
/* found something, done! */
|
|
if (ret != -EALREADY)
|
|
goto out;
|
|
|
|
if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
|
|
struct timespec64 ts = {
|
|
.tv_sec = sc.timeout.tv_sec,
|
|
.tv_nsec = sc.timeout.tv_nsec
|
|
};
|
|
|
|
timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
|
|
}
|
|
|
|
/*
|
|
* Keep looking until we get -ENOENT. we'll get woken everytime
|
|
* every time a request completes and will retry the cancelation.
|
|
*/
|
|
do {
|
|
cd.seq = atomic_inc_return(&ctx->cancel_seq);
|
|
|
|
prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);
|
|
|
|
ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
|
|
|
|
mutex_unlock(&ctx->uring_lock);
|
|
if (ret != -EALREADY)
|
|
break;
|
|
|
|
ret = io_run_task_work_sig(ctx);
|
|
if (ret < 0)
|
|
break;
|
|
ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
|
|
if (!ret) {
|
|
ret = -ETIME;
|
|
break;
|
|
}
|
|
mutex_lock(&ctx->uring_lock);
|
|
} while (1);
|
|
|
|
finish_wait(&ctx->cq_wait, &wait);
|
|
mutex_lock(&ctx->uring_lock);
|
|
|
|
if (ret == -ENOENT || ret > 0)
|
|
ret = 0;
|
|
out:
|
|
if (file)
|
|
fput(file);
|
|
return ret;
|
|
}
|
|
|
|
bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
|
|
struct hlist_head *list, bool cancel_all,
|
|
bool (*cancel)(struct io_kiocb *))
|
|
{
|
|
struct hlist_node *tmp;
|
|
struct io_kiocb *req;
|
|
bool found = false;
|
|
|
|
lockdep_assert_held(&ctx->uring_lock);
|
|
|
|
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
|
|
if (!io_match_task_safe(req, tctx, cancel_all))
|
|
continue;
|
|
hlist_del_init(&req->hash_node);
|
|
if (cancel(req))
|
|
found = true;
|
|
}
|
|
|
|
return found;
|
|
}
|
|
|
|
int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
|
|
unsigned int issue_flags, struct hlist_head *list,
|
|
bool (*cancel)(struct io_kiocb *))
|
|
{
|
|
struct hlist_node *tmp;
|
|
struct io_kiocb *req;
|
|
int nr = 0;
|
|
|
|
io_ring_submit_lock(ctx, issue_flags);
|
|
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
|
|
if (!io_cancel_req_match(req, cd))
|
|
continue;
|
|
if (cancel(req))
|
|
nr++;
|
|
if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
|
|
break;
|
|
}
|
|
io_ring_submit_unlock(ctx, issue_flags);
|
|
return nr ?: -ENOENT;
|
|
}
|
|
|
|
static bool io_match_linked(struct io_kiocb *head)
|
|
{
|
|
struct io_kiocb *req;
|
|
|
|
io_for_each_link(req, head) {
|
|
if (req->flags & REQ_F_INFLIGHT)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* As io_match_task() but protected against racing with linked timeouts.
|
|
* User must not hold timeout_lock.
|
|
*/
|
|
bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
|
|
bool cancel_all)
|
|
{
|
|
bool matched;
|
|
|
|
if (tctx && head->tctx != tctx)
|
|
return false;
|
|
if (cancel_all)
|
|
return true;
|
|
|
|
if (head->flags & REQ_F_LINK_TIMEOUT) {
|
|
struct io_ring_ctx *ctx = head->ctx;
|
|
|
|
/* protect against races with linked timeouts */
|
|
raw_spin_lock_irq(&ctx->timeout_lock);
|
|
matched = io_match_linked(head);
|
|
raw_spin_unlock_irq(&ctx->timeout_lock);
|
|
} else {
|
|
matched = io_match_linked(head);
|
|
}
|
|
return matched;
|
|
}
|
|
|
|
void __io_uring_cancel(bool cancel_all)
|
|
{
|
|
io_uring_unreg_ringfd();
|
|
io_uring_cancel_generic(cancel_all, NULL);
|
|
}
|
|
|
|
struct io_task_cancel {
|
|
struct io_uring_task *tctx;
|
|
bool all;
|
|
};
|
|
|
|
static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
|
|
{
|
|
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
|
|
struct io_task_cancel *cancel = data;
|
|
|
|
return io_match_task_safe(req, cancel->tctx, cancel->all);
|
|
}
|
|
|
|
static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
|
|
struct io_uring_task *tctx,
|
|
bool cancel_all)
|
|
{
|
|
struct io_defer_entry *de;
|
|
LIST_HEAD(list);
|
|
|
|
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
|
|
if (io_match_task_safe(de->req, tctx, cancel_all)) {
|
|
list_cut_position(&list, &ctx->defer_list, &de->list);
|
|
break;
|
|
}
|
|
}
|
|
if (list_empty(&list))
|
|
return false;
|
|
|
|
while (!list_empty(&list)) {
|
|
de = list_first_entry(&list, struct io_defer_entry, list);
|
|
list_del_init(&de->list);
|
|
ctx->nr_drained -= io_linked_nr(de->req);
|
|
io_req_task_queue_fail(de->req, -ECANCELED);
|
|
kfree(de);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
__cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
|
|
{
|
|
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
|
|
|
|
return req->ctx == data;
|
|
}
|
|
|
|
static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_tctx_node *node;
|
|
enum io_wq_cancel cret;
|
|
bool ret = false;
|
|
|
|
mutex_lock(&ctx->uring_lock);
|
|
mutex_lock(&ctx->tctx_lock);
|
|
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
|
|
struct io_uring_task *tctx = node->task->io_uring;
|
|
|
|
/*
|
|
* io_wq will stay alive while we hold uring_lock, because it's
|
|
* killed after ctx nodes, which requires to take the lock.
|
|
*/
|
|
if (!tctx || !tctx->io_wq)
|
|
continue;
|
|
cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
|
|
ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
|
|
}
|
|
mutex_unlock(&ctx->tctx_lock);
|
|
mutex_unlock(&ctx->uring_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
__cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
|
|
struct io_uring_task *tctx,
|
|
bool cancel_all, bool is_sqpoll_thread)
|
|
{
|
|
struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
|
|
enum io_wq_cancel cret;
|
|
bool ret = false;
|
|
|
|
/* set it so io_req_local_work_add() would wake us up */
|
|
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
|
|
atomic_set(&ctx->cq_wait_nr, 1);
|
|
smp_mb();
|
|
}
|
|
|
|
/* failed during ring init, it couldn't have issued any requests */
|
|
if (!ctx->rings)
|
|
return false;
|
|
|
|
if (!tctx) {
|
|
ret |= io_uring_try_cancel_iowq(ctx);
|
|
} else if (tctx->io_wq) {
|
|
/*
|
|
* Cancels requests of all rings, not only @ctx, but
|
|
* it's fine as the task is in exit/exec.
|
|
*/
|
|
cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
|
|
&cancel, true);
|
|
ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
|
|
}
|
|
|
|
/* SQPOLL thread does its own polling */
|
|
if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
|
|
is_sqpoll_thread) {
|
|
while (!wq_list_empty(&ctx->iopoll_list)) {
|
|
io_iopoll_try_reap_events(ctx);
|
|
ret = true;
|
|
cond_resched();
|
|
}
|
|
}
|
|
|
|
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
|
|
io_allowed_defer_tw_run(ctx))
|
|
ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
|
|
mutex_lock(&ctx->uring_lock);
|
|
ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
|
|
ret |= io_poll_remove_all(ctx, tctx, cancel_all);
|
|
ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
|
|
ret |= io_futex_remove_all(ctx, tctx, cancel_all);
|
|
ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
|
|
mutex_unlock(&ctx->uring_lock);
|
|
ret |= io_kill_timeouts(ctx, tctx, cancel_all);
|
|
if (tctx)
|
|
ret |= io_run_task_work() > 0;
|
|
else
|
|
ret |= flush_delayed_work(&ctx->fallback_work);
|
|
return ret;
|
|
}
|
|
|
|
static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
|
|
{
|
|
if (tracked)
|
|
return atomic_read(&tctx->inflight_tracked);
|
|
return percpu_counter_sum(&tctx->inflight);
|
|
}
|
|
|
|
/*
|
|
* Find any io_uring ctx that this task has registered or done IO on, and cancel
|
|
* requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
|
|
*/
|
|
__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
|
|
{
|
|
struct io_uring_task *tctx = current->io_uring;
|
|
struct io_ring_ctx *ctx;
|
|
struct io_tctx_node *node;
|
|
unsigned long index;
|
|
s64 inflight;
|
|
DEFINE_WAIT(wait);
|
|
|
|
WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
|
|
|
|
if (!current->io_uring)
|
|
return;
|
|
if (tctx->io_wq)
|
|
io_wq_exit_start(tctx->io_wq);
|
|
|
|
atomic_inc(&tctx->in_cancel);
|
|
do {
|
|
bool loop = false;
|
|
|
|
io_uring_drop_tctx_refs(current);
|
|
if (!tctx_inflight(tctx, !cancel_all))
|
|
break;
|
|
|
|
/* read completions before cancelations */
|
|
inflight = tctx_inflight(tctx, false);
|
|
if (!inflight)
|
|
break;
|
|
|
|
if (!sqd) {
|
|
xa_for_each(&tctx->xa, index, node) {
|
|
/* sqpoll task will cancel all its requests */
|
|
if (node->ctx->sq_data)
|
|
continue;
|
|
loop |= io_uring_try_cancel_requests(node->ctx,
|
|
current->io_uring,
|
|
cancel_all,
|
|
false);
|
|
}
|
|
} else {
|
|
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
|
|
loop |= io_uring_try_cancel_requests(ctx,
|
|
current->io_uring,
|
|
cancel_all,
|
|
true);
|
|
}
|
|
|
|
if (loop) {
|
|
cond_resched();
|
|
continue;
|
|
}
|
|
|
|
prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
|
|
io_run_task_work();
|
|
io_uring_drop_tctx_refs(current);
|
|
xa_for_each(&tctx->xa, index, node) {
|
|
if (io_local_work_pending(node->ctx)) {
|
|
WARN_ON_ONCE(node->ctx->submitter_task &&
|
|
node->ctx->submitter_task != current);
|
|
goto end_wait;
|
|
}
|
|
}
|
|
/*
|
|
* If we've seen completions, retry without waiting. This
|
|
* avoids a race where a completion comes in before we did
|
|
* prepare_to_wait().
|
|
*/
|
|
if (inflight == tctx_inflight(tctx, !cancel_all))
|
|
schedule();
|
|
end_wait:
|
|
finish_wait(&tctx->wait, &wait);
|
|
} while (1);
|
|
|
|
io_uring_clean_tctx(tctx);
|
|
if (cancel_all) {
|
|
/*
|
|
* We shouldn't run task_works after cancel, so just leave
|
|
* ->in_cancel set for normal exit.
|
|
*/
|
|
atomic_dec(&tctx->in_cancel);
|
|
/* for exec all current's requests should be gone, kill tctx */
|
|
__io_uring_free(current);
|
|
}
|
|
}
|