mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 17:41:28 +00:00
With the RAPL PMU addition, there is a recursive locking when CPU online callback function calls rapl_package_add_pmu(). Here cpu_hotplug_lock is already acquired by cpuhp_thread_fun() and rapl_package_add_pmu() tries to acquire again. <4>[ 8.197433] ============================================ <4>[ 8.197437] WARNING: possible recursive locking detected <4>[ 8.197440] 6.19.0-rc1-lgci-xe-xe-4242-05b7c58b3367dca84+ #1 Not tainted <4>[ 8.197444] -------------------------------------------- <4>[ 8.197447] cpuhp/0/20 is trying to acquire lock: <4>[ 8.197450] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at: rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197463] but task is already holding lock: <4>[ 8.197466] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x6d/0x290 <4>[ 8.197477] other info that might help us debug this: <4>[ 8.197480] Possible unsafe locking scenario: <4>[ 8.197483] CPU0 <4>[ 8.197485] ---- <4>[ 8.197487] lock(cpu_hotplug_lock); <4>[ 8.197490] lock(cpu_hotplug_lock); <4>[ 8.197493] *** DEADLOCK *** .. .. <4>[ 8.197542] __lock_acquire+0x146e/0x2790 <4>[ 8.197548] lock_acquire+0xc4/0x2c0 <4>[ 8.197550] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197556] cpus_read_lock+0x41/0x110 <4>[ 8.197558] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197561] rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197565] rapl_cpu_online+0x85/0x87 [intel_rapl_msr] <4>[ 8.197568] ? __pfx_rapl_cpu_online+0x10/0x10 [intel_rapl_msr] <4>[ 8.197570] cpuhp_invoke_callback+0x41f/0x6c0 <4>[ 8.197573] ? cpuhp_thread_fun+0x6d/0x290 <4>[ 8.197575] cpuhp_thread_fun+0x1e2/0x290 <4>[ 8.197578] ? smpboot_thread_fn+0x26/0x290 <4>[ 8.197581] smpboot_thread_fn+0x12f/0x290 <4>[ 8.197584] ? __pfx_smpboot_thread_fn+0x10/0x10 <4>[ 8.197586] kthread+0x11f/0x250 <4>[ 8.197589] ? __pfx_kthread+0x10/0x10 <4>[ 8.197592] ret_from_fork+0x344/0x3a0 <4>[ 8.197595] ? __pfx_kthread+0x10/0x10 <4>[ 8.197597] ret_from_fork_asm+0x1a/0x30 <4>[ 8.197604] </TASK> Fix this issue in the same way as rapl powercap package domain is added from the same CPU online callback by introducing another interface which doesn't call cpus_read_lock(). Add rapl_package_add_pmu_locked() and rapl_package_remove_pmu_locked() which don't call cpus_read_lock(). Fixes: 748d6ba43afd ("powercap: intel_rapl: Enable MSR-based RAPL PMU support") Reported-by: Borah, Chaitanya Kumar <chaitanya.kumar.borah@intel.com> Closes: https://lore.kernel.org/linux-pm/5427ede1-57a0-43d1-99f3-8ca4b0643e82@intel.com/T/#u Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: RavitejaX Veesam <ravitejax.veesam@intel.com> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Link: https://patch.msgid.link/20251217153455.3560176-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
228 lines
6.3 KiB
C
228 lines
6.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Data types and headers for RAPL support
|
|
*
|
|
* Copyright (C) 2019 Intel Corporation.
|
|
*
|
|
* Author: Zhang Rui <rui.zhang@intel.com>
|
|
*/
|
|
|
|
#ifndef __INTEL_RAPL_H__
|
|
#define __INTEL_RAPL_H__
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/powercap.h>
|
|
#include <linux/cpuhotplug.h>
|
|
|
|
enum rapl_if_type {
|
|
RAPL_IF_MSR, /* RAPL I/F using MSR registers */
|
|
RAPL_IF_MMIO, /* RAPL I/F using MMIO registers */
|
|
RAPL_IF_TPMI, /* RAPL I/F using TPMI registers */
|
|
};
|
|
|
|
enum rapl_domain_type {
|
|
RAPL_DOMAIN_PACKAGE, /* entire package/socket */
|
|
RAPL_DOMAIN_PP0, /* core power plane */
|
|
RAPL_DOMAIN_PP1, /* graphics uncore */
|
|
RAPL_DOMAIN_DRAM, /* DRAM control_type */
|
|
RAPL_DOMAIN_PLATFORM, /* PSys control_type */
|
|
RAPL_DOMAIN_MAX,
|
|
};
|
|
|
|
enum rapl_domain_reg_id {
|
|
RAPL_DOMAIN_REG_LIMIT,
|
|
RAPL_DOMAIN_REG_STATUS,
|
|
RAPL_DOMAIN_REG_PERF,
|
|
RAPL_DOMAIN_REG_POLICY,
|
|
RAPL_DOMAIN_REG_INFO,
|
|
RAPL_DOMAIN_REG_PL4,
|
|
RAPL_DOMAIN_REG_UNIT,
|
|
RAPL_DOMAIN_REG_PL2,
|
|
RAPL_DOMAIN_REG_MAX,
|
|
};
|
|
|
|
struct rapl_domain;
|
|
|
|
enum rapl_primitives {
|
|
POWER_LIMIT1,
|
|
POWER_LIMIT2,
|
|
POWER_LIMIT4,
|
|
ENERGY_COUNTER,
|
|
FW_LOCK,
|
|
FW_HIGH_LOCK,
|
|
PL1_LOCK,
|
|
PL2_LOCK,
|
|
PL4_LOCK,
|
|
|
|
PL1_ENABLE, /* power limit 1, aka long term */
|
|
PL1_CLAMP, /* allow frequency to go below OS request */
|
|
PL2_ENABLE, /* power limit 2, aka short term, instantaneous */
|
|
PL2_CLAMP,
|
|
PL4_ENABLE, /* power limit 4, aka max peak power */
|
|
|
|
TIME_WINDOW1, /* long term */
|
|
TIME_WINDOW2, /* short term */
|
|
THERMAL_SPEC_POWER,
|
|
MAX_POWER,
|
|
|
|
MIN_POWER,
|
|
MAX_TIME_WINDOW,
|
|
THROTTLED_TIME,
|
|
PRIORITY_LEVEL,
|
|
|
|
PSYS_POWER_LIMIT1,
|
|
PSYS_POWER_LIMIT2,
|
|
PSYS_PL1_ENABLE,
|
|
PSYS_PL2_ENABLE,
|
|
PSYS_TIME_WINDOW1,
|
|
PSYS_TIME_WINDOW2,
|
|
/* below are not raw primitive data */
|
|
AVERAGE_POWER,
|
|
NR_RAPL_PRIMITIVES,
|
|
};
|
|
|
|
struct rapl_domain_data {
|
|
u64 primitives[NR_RAPL_PRIMITIVES];
|
|
unsigned long timestamp;
|
|
};
|
|
|
|
#define NR_POWER_LIMITS (POWER_LIMIT4 + 1)
|
|
|
|
struct rapl_power_limit {
|
|
struct powercap_zone_constraint *constraint;
|
|
struct rapl_domain *domain;
|
|
const char *name;
|
|
bool locked;
|
|
u64 last_power_limit;
|
|
};
|
|
|
|
struct rapl_package;
|
|
|
|
#define RAPL_DOMAIN_NAME_LENGTH 16
|
|
|
|
union rapl_reg {
|
|
void __iomem *mmio;
|
|
u32 msr;
|
|
u64 val;
|
|
};
|
|
|
|
struct rapl_domain {
|
|
char name[RAPL_DOMAIN_NAME_LENGTH];
|
|
enum rapl_domain_type id;
|
|
union rapl_reg regs[RAPL_DOMAIN_REG_MAX];
|
|
struct powercap_zone power_zone;
|
|
struct rapl_domain_data rdd;
|
|
struct rapl_power_limit rpl[NR_POWER_LIMITS];
|
|
u64 attr_map; /* track capabilities */
|
|
unsigned int state;
|
|
unsigned int power_unit;
|
|
unsigned int energy_unit;
|
|
unsigned int time_unit;
|
|
struct rapl_package *rp;
|
|
};
|
|
|
|
struct reg_action {
|
|
union rapl_reg reg;
|
|
u64 mask;
|
|
u64 value;
|
|
int err;
|
|
};
|
|
|
|
/**
|
|
* struct rapl_if_priv: private data for different RAPL interfaces
|
|
* @control_type: Each RAPL interface must have its own powercap
|
|
* control type.
|
|
* @platform_rapl_domain: Optional. Some RAPL interface may have platform
|
|
* level RAPL control.
|
|
* @pcap_rapl_online: CPU hotplug state for each RAPL interface.
|
|
* @reg_unit: Register for getting energy/power/time unit.
|
|
* @regs: Register sets for different RAPL Domains.
|
|
* @limits: Number of power limits supported by each domain.
|
|
* @read_raw: Callback for reading RAPL interface specific
|
|
* registers.
|
|
* @write_raw: Callback for writing RAPL interface specific
|
|
* registers.
|
|
* @defaults: internal pointer to interface default settings
|
|
* @rpi: internal pointer to interface primitive info
|
|
*/
|
|
struct rapl_if_priv {
|
|
enum rapl_if_type type;
|
|
struct powercap_control_type *control_type;
|
|
enum cpuhp_state pcap_rapl_online;
|
|
union rapl_reg reg_unit;
|
|
union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
|
|
int limits[RAPL_DOMAIN_MAX];
|
|
int (*read_raw)(int id, struct reg_action *ra, bool atomic);
|
|
int (*write_raw)(int id, struct reg_action *ra);
|
|
void *defaults;
|
|
void *rpi;
|
|
};
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
/**
|
|
* struct rapl_package_pmu_data: Per package data for PMU support
|
|
* @scale: Scale of 2^-32 Joules for each energy counter increase.
|
|
* @lock: Lock to protect n_active and active_list.
|
|
* @n_active: Number of active events.
|
|
* @active_list: List of active events.
|
|
* @timer_interval: Maximum timer expiration time before counter overflow.
|
|
* @hrtimer: Periodically update the counter to prevent overflow.
|
|
*/
|
|
struct rapl_package_pmu_data {
|
|
u64 scale[RAPL_DOMAIN_MAX];
|
|
raw_spinlock_t lock;
|
|
int n_active;
|
|
struct list_head active_list;
|
|
ktime_t timer_interval;
|
|
struct hrtimer hrtimer;
|
|
};
|
|
#endif
|
|
|
|
/* maximum rapl package domain name: package-%d-die-%d */
|
|
#define PACKAGE_DOMAIN_NAME_LENGTH 30
|
|
|
|
struct rapl_package {
|
|
unsigned int id; /* logical die id, equals physical 1-die systems */
|
|
unsigned int nr_domains;
|
|
unsigned long domain_map; /* bit map of active domains */
|
|
struct rapl_domain *domains; /* array of domains, sized at runtime */
|
|
struct powercap_zone *power_zone; /* keep track of parent zone */
|
|
unsigned long power_limit_irq; /* keep track of package power limit
|
|
* notify interrupt enable status.
|
|
*/
|
|
struct list_head plist;
|
|
int lead_cpu; /* one active cpu per package for access */
|
|
/* Track active cpus */
|
|
struct cpumask cpumask;
|
|
char name[PACKAGE_DOMAIN_NAME_LENGTH];
|
|
struct rapl_if_priv *priv;
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
bool has_pmu;
|
|
struct rapl_package_pmu_data pmu_data;
|
|
#endif
|
|
};
|
|
|
|
struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
|
|
bool id_is_cpu);
|
|
struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv,
|
|
bool id_is_cpu);
|
|
void rapl_remove_package_cpuslocked(struct rapl_package *rp);
|
|
|
|
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
|
|
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
|
|
void rapl_remove_package(struct rapl_package *rp);
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
int rapl_package_add_pmu(struct rapl_package *rp);
|
|
int rapl_package_add_pmu_locked(struct rapl_package *rp);
|
|
void rapl_package_remove_pmu(struct rapl_package *rp);
|
|
void rapl_package_remove_pmu_locked(struct rapl_package *rp);
|
|
#else
|
|
static inline int rapl_package_add_pmu(struct rapl_package *rp) { return 0; }
|
|
static inline int rapl_package_add_pmu_locked(struct rapl_package *rp) { return 0; }
|
|
static inline void rapl_package_remove_pmu(struct rapl_package *rp) { }
|
|
static inline void rapl_package_remove_pmu_locked(struct rapl_package *rp) { }
|
|
#endif
|
|
|
|
#endif /* __INTEL_RAPL_H__ */
|