From 807e0d187da4c0b22036b5e34000f7a8c52f6e50 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 20 Nov 2025 01:45:25 +0800 Subject: [PATCH 1/3] tick/sched: Fix bogus condition in report_idle_softirq() In commit 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") the new function report_idle_softirq() was created by breaking code out of the existing can_stop_idle_tick() for kernels v5.18 and newer. In doing so, the code essentially went from this form: if (A) { static int ratelimit; if (ratelimit < 10 && !C && A&D) { pr_warn("NOHZ tick-stop error: ..."); ratelimit++; } return false; } to a new function: static bool report_idle_softirq(void) { static int ratelimit; if (likely(!A)) return false; if (ratelimit < 10) return false; ... pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", pending); ratelimit++; return true; } commit a7e282c77785 ("tick/rcu: Fix bogus ratelimit condition") realized ratelimit was essentially set to zero instead of ten, and hence *no* softirq pending messages would ever be issued, but "fixed" it as: - if (ratelimit < 10) + if (ratelimit >= 10) return false; However, this fix introduced another issue: When ratelimit is greater than or equal 10, even if A is true, it will directly return false. While ratelimit in the original code was only used to control printing and will not affect the return value. Restore the original logic and restrict ratelimit to control the printk and not the return value. Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") Fixes: a7e282c77785 ("tick/rcu: Fix bogus ratelimit condition") Signed-off-by: Wen Yang Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251119174525.29470-1-wen.yang@linux.dev --- kernel/time/tick-sched.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c527b421c865..466e083c8272 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1152,16 +1152,15 @@ static bool report_idle_softirq(void) return false; } - if (ratelimit >= 10) - return false; - /* On RT, softirq handling may be waiting on some lock */ if (local_bh_blocked()) return false; - pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", - pending); - ratelimit++; + if (ratelimit < 10) { + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + } return true; } From 7b5ab04f035f829ed6008e4685501ec00b3e73c9 Mon Sep 17 00:00:00 2001 From: Malaya Kumar Rout Date: Thu, 20 Nov 2025 20:32:13 +0530 Subject: [PATCH 2/3] timekeeping: Fix resource leak in tk_aux_sysfs_init() error paths tk_aux_sysfs_init() returns immediately on error during the auxiliary clock initialization loop without cleaning up previously allocated kobjects and sysfs groups. If kobject_create_and_add() or sysfs_create_group() fails during loop iteration, the parent kobjects (tko and auxo) and any previously created child kobjects are leaked. Fix this by adding proper error handling with goto labels to ensure all allocated resources are cleaned up on failure. kobject_put() on the parent kobjects will handle cleanup of their children. Fixes: 7b95663a3d96 ("timekeeping: Provide interface to control auxiliary clocks") Signed-off-by: Malaya Kumar Rout Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251120150213.246777-1-mrout@redhat.com --- kernel/time/timekeeping.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3a4d3b2e3f74..08e0943b54da 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -3060,29 +3060,32 @@ static const struct attribute_group aux_clock_enable_attr_group = { static int __init tk_aux_sysfs_init(void) { struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); + int ret = -ENOMEM; if (!tko) - return -ENOMEM; + return ret; auxo = kobject_create_and_add("aux_clocks", tko); - if (!auxo) { - kobject_put(tko); - return -ENOMEM; - } + if (!auxo) + goto err_clean; for (int i = 0; i < MAX_AUX_CLOCKS; i++) { char id[2] = { [0] = '0' + i, }; struct kobject *clk = kobject_create_and_add(id, auxo); if (!clk) - return -ENOMEM; - - int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); + goto err_clean; + ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); if (ret) - return ret; + goto err_clean; } return 0; + +err_clean: + kobject_put(auxo); + kobject_put(tko); + return ret; } late_initcall(tk_aux_sysfs_init); From 20739af07383e6eb1ec59dcd70b72ebfa9ac362c Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Sat, 22 Nov 2025 09:39:42 +0000 Subject: [PATCH 3/3] timers: Fix NULL function pointer race in timer_shutdown_sync() There is a race condition between timer_shutdown_sync() and timer expiration that can lead to hitting a WARN_ON in expire_timers(). The issue occurs when timer_shutdown_sync() clears the timer function to NULL while the timer is still running on another CPU. The race scenario looks like this: CPU0 CPU1 lock_timer_base() expire_timers() base->running_timer = timer; unlock_timer_base() [call_timer_fn enter] mod_timer() ... timer_shutdown_sync() lock_timer_base() // For now, will not detach the timer but only clear its function to NULL if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); if (shutdown) timer->function = NULL; unlock_timer_base() [call_timer_fn exit] lock_timer_base() base->running_timer = NULL; unlock_timer_base() ... // Now timer is pending while its function set to NULL. // next timer trigger expire_timers() WARN_ON_ONCE(!fn) // hit ... lock_timer_base() // Now timer will detach if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); if (shutdown) timer->function = NULL; unlock_timer_base() The problem is that timer_shutdown_sync() clears the timer function regardless of whether the timer is currently running. This can leave a pending timer with a NULL function pointer, which triggers the WARN_ON_ONCE(!fn) check in expire_timers(). Fix this by only clearing the timer function when actually detaching the timer. If the timer is running, leave the function pointer intact, which is safe because the timer will be properly detached when it finishes running. Fixes: 0cc04e80458a ("timers: Add shutdown mechanism to the internal functions") Signed-off-by: Yipeng Zou Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251122093942.301559-1-zouyipeng@huawei.com --- kernel/time/timer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 553fa469d7cc..d5ebb1d927ea 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1458,10 +1458,11 @@ static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) + if (base->running_timer != timer) { ret = detach_if_pending(timer, base, true); - if (shutdown) - timer->function = NULL; + if (shutdown) + timer->function = NULL; + } raw_spin_unlock_irqrestore(&base->lock, flags);