From f371d2afd5453f9ab9a690700bfdaa70fe1e7c2b Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 8 Dec 2025 15:07:15 +0100 Subject: [PATCH 01/32] MAINTAINERS: Update Nova GPU driver git link Nova driver development has been moved to a different git repository. Update the MAINTAINERS entry to reflect that. Reported-by: Gary Guo Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251208140713.41330-3-phasta@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5b11839cba9d..852281f1cc39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8067,7 +8067,7 @@ W: https://rust-for-linux.com/nova-gpu-driver Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau -T: git https://gitlab.freedesktop.org/drm/nova.git nova-next +T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next F: Documentation/gpu/nova/ F: drivers/gpu/nova-core/ @@ -8079,7 +8079,7 @@ W: https://rust-for-linux.com/nova-gpu-driver Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau -T: git https://gitlab.freedesktop.org/drm/nova.git nova-next +T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next F: Documentation/gpu/nova/ F: drivers/gpu/drm/nova/ F: include/uapi/drm/nova_drm.h From 3d3352e73a55a4ccf110f8b3419bbe2fbfd8a030 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 5 Nov 2025 09:40:09 +0900 Subject: [PATCH 02/32] gpu: nova-core: select RUST_FW_LOADER_ABSTRACTIONS RUST_FW_LOADER_ABSTRACTIONS was depended on by NOVA_CORE, but NOVA_CORE is selected by DRM_NOVA. This creates a situation where, if DRM_NOVA is selected, NOVA_CORE gets enabled but not RUST_FW_LOADER_ABSTRACTIONS, which results in a build error. Since the firmware loader is an implementation detail of the driver, it should be enabled along with it, so change the "depends on" to a "select". Fixes: 54e6baf123fd ("gpu: nova-core: add initial driver stub") Closes: https://lore.kernel.org/oe-kbuild-all/202512061721.rxKGnt5q-lkp@intel.com/ Tested-by: Alyssa Ross Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251106-b4-select-rust-fw-v3-2-771172257755@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig index 20d3e6d0d796..527920f9c4d3 100644 --- a/drivers/gpu/nova-core/Kconfig +++ b/drivers/gpu/nova-core/Kconfig @@ -3,7 +3,7 @@ config NOVA_CORE depends on 64BIT depends on PCI depends on RUST - depends on RUST_FW_LOADER_ABSTRACTIONS + select RUST_FW_LOADER_ABSTRACTIONS select AUXILIARY_BUS default n help From b6c76518233e876a0f26e66b83643891c0f341db Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 16 Dec 2025 11:57:07 +0900 Subject: [PATCH 03/32] gpu: nova-core: bindings: add missing explicit padding Explicit padding is needed in order to avoid uninitialized bytes and safely implement `AsBytes`. The `--explicit-padding` of bindgen was omitted by mistake when these bindings were generated. Fixes: 13f85988d4fa ("gpu: nova-core: gsp: Retrieve GSP static info to gather GPU information") Reviewed-by: Lyude Paul Reviewed-by: Joel Fernandes Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251216-nova-fixes-v3-1-c7469a71f7c4@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 5bcfbcd1ad22..5f0569dcc4a0 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -325,6 +325,7 @@ pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { pub totalVFs: u32_, pub firstVfOffset: u32_, pub vfFeatureMask: u32_, + pub __bindgen_padding_0: [u8; 4usize], pub FirstVFBar0Address: u64_, pub FirstVFBar1Address: u64_, pub FirstVFBar2Address: u64_, @@ -340,6 +341,7 @@ pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { pub bClientRmAllocatedCtxBuffer: u8_, pub bNonPowerOf2ChannelCountSupported: u8_, pub bVfResizableBAR1Supported: u8_, + pub __bindgen_padding_1: [u8; 7usize], } #[repr(C)] #[derive(Debug, Default, Copy, Clone)] @@ -347,11 +349,13 @@ pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS { pub BoardID: u32_, pub chipSKU: [ffi::c_char; 9usize], pub chipSKUMod: [ffi::c_char; 5usize], + pub __bindgen_padding_0: [u8; 2usize], pub skuConfigVersion: u32_, pub project: [ffi::c_char; 5usize], pub projectSKU: [ffi::c_char; 5usize], pub CDP: [ffi::c_char; 6usize], pub projectSKUMod: [ffi::c_char; 2usize], + pub __bindgen_padding_1: [u8; 2usize], pub businessCycle: u32_, } pub type NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG = [u8_; 17usize]; @@ -371,6 +375,7 @@ pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO { #[derive(Debug, Default, Copy, Clone)] pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS { pub numFBRegions: u32_, + pub __bindgen_padding_0: [u8; 4usize], pub fbRegion: [NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO; 16usize], } #[repr(C)] @@ -495,13 +500,16 @@ pub struct FW_WPR_LAYOUT_OFFSET { #[derive(Debug, Copy, Clone)] pub struct GspStaticConfigInfo_t { pub grCapsBits: [u8_; 23usize], + pub __bindgen_padding_0: u8, pub gidInfo: NV2080_CTRL_GPU_GET_GID_INFO_PARAMS, pub SKUInfo: NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS, + pub __bindgen_padding_1: [u8; 4usize], pub fbRegionInfoParams: NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS, pub sriovCaps: NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS, pub sriovMaxGfid: u32_, pub engineCaps: [u32_; 3usize], pub poisonFuseEnabled: u8_, + pub __bindgen_padding_2: [u8; 7usize], pub fb_length: u64_, pub fbio_mask: u64_, pub fb_bus_width: u32_, @@ -527,16 +535,20 @@ pub struct GspStaticConfigInfo_t { pub bIsMigSupported: u8_, pub RTD3GC6TotalBoardPower: u16_, pub RTD3GC6PerstDelay: u16_, + pub __bindgen_padding_3: [u8; 2usize], pub bar1PdeBase: u64_, pub bar2PdeBase: u64_, pub bVbiosValid: u8_, + pub __bindgen_padding_4: [u8; 3usize], pub vbiosSubVendor: u32_, pub vbiosSubDevice: u32_, pub bPageRetirementSupported: u8_, pub bSplitVasBetweenServerClientRm: u8_, pub bClRootportNeedsNosnoopWAR: u8_, + pub __bindgen_padding_5: u8, pub displaylessMaxHeads: VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS, pub displaylessMaxResolution: VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS, + pub __bindgen_padding_6: [u8; 4usize], pub displaylessMaxPixels: u64_, pub hInternalClient: u32_, pub hInternalDevice: u32_, From 9d250ab0cf80deb11148efe862d1184ebf5115f6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 16 Dec 2025 11:57:08 +0900 Subject: [PATCH 04/32] gpu: nova-core: gsp: fix length of received messages The size of messages' payload is miscalculated, leading to extra data passed to the message handler. While this is not a problem with our current set of commands, others with a variable-length payload may misbehave. Fix this by introducing a method returning the payload size and using it. Fixes: 75f6b1de8133 ("gpu: nova-core: gsp: Add GSP command queue bindings and handling") Reviewed-by: Lyude Paul Reviewed-by: Joel Fernandes Reviewed-by: Alistair Popple Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251216-nova-fixes-v3-2-c7469a71f7c4@nvidia.com [acourbot@nvidia.com: update `PANIC:` comments as pointed out by Joel.] Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gsp/cmdq.rs | 14 ++++++++------ drivers/gpu/nova-core/gsp/fw.rs | 13 +++++++++---- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 6f946d14868a..3991ccc0c10f 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -588,21 +588,23 @@ impl Cmdq { header.length(), ); + let payload_length = header.payload_length(); + // Check that the driver read area is large enough for the message. - if slice_1.len() + slice_2.len() < header.length() { + if slice_1.len() + slice_2.len() < payload_length { return Err(EIO); } // Cut the message slices down to the actual length of the message. - let (slice_1, slice_2) = if slice_1.len() > header.length() { - // PANIC: we checked above that `slice_1` is at least as long as `msg_header.length()`. - (slice_1.split_at(header.length()).0, &slice_2[0..0]) + let (slice_1, slice_2) = if slice_1.len() > payload_length { + // PANIC: we checked above that `slice_1` is at least as long as `payload_length`. + (slice_1.split_at(payload_length).0, &slice_2[0..0]) } else { ( slice_1, // PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as - // large as `msg_header.length()`. - slice_2.split_at(header.length() - slice_1.len()).0, + // large as `payload_length`. + slice_2.split_at(payload_length - slice_1.len()).0, ) }; diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index abffd6beec65..7b8e710b33e7 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -853,11 +853,16 @@ impl GspMsgElement { self.inner.checkSum = checksum; } - /// Returns the total length of the message. + /// Returns the length of the message's payload. + pub(crate) fn payload_length(&self) -> usize { + // `rpc.length` includes the length of the RPC message header. + num::u32_as_usize(self.inner.rpc.length) + .saturating_sub(size_of::()) + } + + /// Returns the total length of the message, message and RPC headers included. pub(crate) fn length(&self) -> usize { - // `rpc.length` includes the length of the GspRpcHeader but not the message header. - size_of::() - size_of::() - + num::u32_as_usize(self.inner.rpc.length) + size_of::() + self.payload_length() } // Returns the sequence number of the message. From 523317152c4b1b3187d14c65ceacdfc0da451931 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 16 Dec 2025 11:57:09 +0900 Subject: [PATCH 05/32] gpu: nova-core: bindings: derive `MaybeZeroable` Commit 4846300ba8f9 ("rust: derive `Zeroable` for all structs & unions generated by bindgen where possible") automatically derives `MaybeZeroable` for all bindings. This is better than selectively deriving `Zeroable` as it ensures all types that can implement `Zeroable` do. Regenerate the nova-core bindings so they benefit from this, and remove a now unneeded implementation of `Zeroable`. Fixes: 75f6b1de8133 ("gpu: nova-core: gsp: Add GSP command queue bindings and handling") Reviewed-by: Lyude Paul Reviewed-by: Joel Fernandes Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251216-nova-fixes-v3-3-c7469a71f7c4@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gsp/fw.rs | 7 -- drivers/gpu/nova-core/gsp/fw/r570_144.rs | 11 ++- .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 93 ++++++++++--------- 3 files changed, 54 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 7b8e710b33e7..b754631d2d8d 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -797,13 +797,6 @@ impl bindings::rpc_message_header_v { } } -// SAFETY: We can't derive the Zeroable trait for this binding because the -// procedural macro doesn't support the syntax used by bindgen to create the -// __IncompleteArrayField types. So instead we implement it here, which is safe -// because these are explicitly padded structures only containing types for -// which any bit pattern, including all zeros, is valid. -unsafe impl Zeroable for bindings::rpc_message_header_v {} - /// GSP Message Element. /// /// This is essentially a message header expected to be followed by the message data. diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs index 048234d1a9d1..e99d315ae74c 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -24,8 +24,11 @@ unreachable_pub, unsafe_op_in_unsafe_fn )] -use kernel::{ - ffi, - prelude::Zeroable, // -}; +use kernel::ffi; +use pin_init::MaybeZeroable; + include!("r570_144/bindings.rs"); + +// SAFETY: This type has a size of zero, so its inclusion into another type should not affect their +// ability to implement `Zeroable`. +unsafe impl kernel::prelude::Zeroable for __IncompleteArrayField {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 5f0569dcc4a0..6d25fe0bffa9 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -320,7 +320,7 @@ pub const NV_VGPU_MSG_EVENT_RECOVERY_ACTION: _bindgen_ty_3 = 4130; pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131; pub type _bindgen_ty_3 = ffi::c_uint; #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { pub totalVFs: u32_, pub firstVfOffset: u32_, @@ -344,7 +344,7 @@ pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { pub __bindgen_padding_1: [u8; 7usize], } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS { pub BoardID: u32_, pub chipSKU: [ffi::c_char; 9usize], @@ -360,7 +360,7 @@ pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS { } pub type NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG = [u8_; 17usize]; #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO { pub base: u64_, pub limit: u64_, @@ -372,14 +372,14 @@ pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO { pub blackList: NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS { pub numFBRegions: u32_, pub __bindgen_padding_0: [u8; 4usize], pub fbRegion: [NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO; 16usize], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, MaybeZeroable)] pub struct NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { pub index: u32_, pub flags: u32_, @@ -396,14 +396,14 @@ impl Default for NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { } } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct DOD_METHOD_DATA { pub status: u32_, pub acpiIdListLen: u32_, pub acpiIdList: [u32_; 16usize], } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct JT_METHOD_DATA { pub status: u32_, pub jtCaps: u32_, @@ -412,14 +412,14 @@ pub struct JT_METHOD_DATA { pub __bindgen_padding_0: u8, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct MUX_METHOD_DATA_ELEMENT { pub acpiId: u32_, pub mode: u32_, pub status: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct MUX_METHOD_DATA { pub tableLen: u32_, pub acpiIdMuxModeTable: [MUX_METHOD_DATA_ELEMENT; 16usize], @@ -427,13 +427,13 @@ pub struct MUX_METHOD_DATA { pub acpiIdMuxStateTable: [MUX_METHOD_DATA_ELEMENT; 16usize], } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct CAPS_METHOD_DATA { pub status: u32_, pub optimusCaps: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct ACPI_METHOD_DATA { pub bValid: u8_, pub __bindgen_padding_0: [u8; 3usize], @@ -443,20 +443,20 @@ pub struct ACPI_METHOD_DATA { pub capsMethodData: CAPS_METHOD_DATA, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS { pub headIndex: u32_, pub maxHResolution: u32_, pub maxVResolution: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS { pub numHeads: u32_, pub maxNumHeads: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct BUSINFO { pub deviceID: u16_, pub vendorID: u16_, @@ -466,7 +466,7 @@ pub struct BUSINFO { pub __bindgen_padding_0: u8, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_VF_INFO { pub totalVFs: u32_, pub firstVFOffset: u32_, @@ -479,25 +479,25 @@ pub struct GSP_VF_INFO { pub __bindgen_padding_0: [u8; 5usize], } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_PCIE_CONFIG_REG { pub linkCap: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct EcidManufacturingInfo { pub ecidLow: u32_, pub ecidHigh: u32_, pub ecidExtended: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct FW_WPR_LAYOUT_OFFSET { pub nonWprHeapOffset: u64_, pub frtsOffset: u64_, } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, MaybeZeroable)] pub struct GspStaticConfigInfo_t { pub grCapsBits: [u8_; 23usize], pub __bindgen_padding_0: u8, @@ -570,7 +570,7 @@ impl Default for GspStaticConfigInfo_t { } } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GspSystemInfo { pub gpuPhysAddr: u64_, pub gpuPhysFbAddr: u64_, @@ -627,7 +627,7 @@ pub struct GspSystemInfo { pub hostPageSize: u64_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct MESSAGE_QUEUE_INIT_ARGUMENTS { pub sharedMemPhysAddr: u64_, pub pageTableEntryCount: u32_, @@ -636,7 +636,7 @@ pub struct MESSAGE_QUEUE_INIT_ARGUMENTS { pub statQueueOffset: u64_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_SR_INIT_ARGUMENTS { pub oldLevel: u32_, pub flags: u32_, @@ -644,7 +644,7 @@ pub struct GSP_SR_INIT_ARGUMENTS { pub __bindgen_padding_0: [u8; 3usize], } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_ARGUMENTS_CACHED { pub messageQueueInitArguments: MESSAGE_QUEUE_INIT_ARGUMENTS, pub srInitArguments: GSP_SR_INIT_ARGUMENTS, @@ -654,13 +654,13 @@ pub struct GSP_ARGUMENTS_CACHED { pub profilerArgs: GSP_ARGUMENTS_CACHED__bindgen_ty_1, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_ARGUMENTS_CACHED__bindgen_ty_1 { pub pa: u64_, pub size: u64_, } #[repr(C)] -#[derive(Copy, Clone, Zeroable)] +#[derive(Copy, Clone, MaybeZeroable)] pub union rpc_message_rpc_union_field_v03_00 { pub spare: u32_, pub cpuRmGfid: u32_, @@ -676,6 +676,7 @@ impl Default for rpc_message_rpc_union_field_v03_00 { } pub type rpc_message_rpc_union_field_v = rpc_message_rpc_union_field_v03_00; #[repr(C)] +#[derive(MaybeZeroable)] pub struct rpc_message_header_v03_00 { pub header_version: u32_, pub signature: u32_, @@ -698,7 +699,7 @@ impl Default for rpc_message_header_v03_00 { } pub type rpc_message_header_v = rpc_message_header_v03_00; #[repr(C)] -#[derive(Copy, Clone, Zeroable)] +#[derive(Copy, Clone, MaybeZeroable)] pub struct GspFwWprMeta { pub magic: u64_, pub revision: u64_, @@ -733,19 +734,19 @@ pub struct GspFwWprMeta { pub verified: u64_, } #[repr(C)] -#[derive(Copy, Clone, Zeroable)] +#[derive(Copy, Clone, MaybeZeroable)] pub union GspFwWprMeta__bindgen_ty_1 { pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1__bindgen_ty_1, pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_1__bindgen_ty_2, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_1 { pub sysmemAddrOfSignature: u64_, pub sizeOfSignature: u64_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_2 { pub gspFwHeapFreeListWprOffset: u32_, pub unused0: u32_, @@ -761,13 +762,13 @@ impl Default for GspFwWprMeta__bindgen_ty_1 { } } #[repr(C)] -#[derive(Copy, Clone, Zeroable)] +#[derive(Copy, Clone, MaybeZeroable)] pub union GspFwWprMeta__bindgen_ty_2 { pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_2__bindgen_ty_1, pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2__bindgen_ty_2, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { pub partitionRpcAddr: u64_, pub partitionRpcRequestOffset: u16_, @@ -779,7 +780,7 @@ pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { pub lsUcodeVersion: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_2 { pub partitionRpcPadding: [u32_; 4usize], pub sysmemAddrOfCrashReportQueue: u64_, @@ -814,7 +815,7 @@ pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegion pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2; pub type LibosMemoryRegionLoc = ffi::c_uint; #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct LibosMemoryRegionInitArgument { pub id8: LibosAddress, pub pa: LibosAddress, @@ -824,7 +825,7 @@ pub struct LibosMemoryRegionInitArgument { pub __bindgen_padding_0: [u8; 6usize], } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct PACKED_REGISTRY_ENTRY { pub nameOffset: u32_, pub type_: u8_, @@ -833,14 +834,14 @@ pub struct PACKED_REGISTRY_ENTRY { pub length: u32_, } #[repr(C)] -#[derive(Debug, Default)] +#[derive(Debug, Default, MaybeZeroable)] pub struct PACKED_REGISTRY_TABLE { pub size: u32_, pub numEntries: u32_, pub entries: __IncompleteArrayField, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct msgqTxHeader { pub version: u32_, pub size: u32_, @@ -852,13 +853,13 @@ pub struct msgqTxHeader { pub entryOff: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone, Zeroable)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct msgqRxHeader { pub readPtr: u32_, } #[repr(C)] #[repr(align(8))] -#[derive(Zeroable)] +#[derive(MaybeZeroable)] pub struct GSP_MSG_QUEUE_ELEMENT { pub authTagBuffer: [u8_; 16usize], pub aadBuffer: [u8_; 16usize], @@ -878,7 +879,7 @@ impl Default for GSP_MSG_QUEUE_ELEMENT { } } #[repr(C)] -#[derive(Debug, Default)] +#[derive(Debug, Default, MaybeZeroable)] pub struct rpc_run_cpu_sequencer_v17_00 { pub bufferSizeDWord: u32_, pub cmdIndex: u32_, @@ -896,20 +897,20 @@ pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT: GSP_SEQ_BUF_ pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME: GSP_SEQ_BUF_OPCODE = 8; pub type GSP_SEQ_BUF_OPCODE = ffi::c_uint; #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_SEQ_BUF_PAYLOAD_REG_WRITE { pub addr: u32_, pub val: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_SEQ_BUF_PAYLOAD_REG_MODIFY { pub addr: u32_, pub mask: u32_, pub val: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_SEQ_BUF_PAYLOAD_REG_POLL { pub addr: u32_, pub mask: u32_, @@ -918,24 +919,24 @@ pub struct GSP_SEQ_BUF_PAYLOAD_REG_POLL { pub error: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_SEQ_BUF_PAYLOAD_DELAY_US { pub val: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] pub struct GSP_SEQ_BUF_PAYLOAD_REG_STORE { pub addr: u32_, pub index: u32_, } #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, MaybeZeroable)] pub struct GSP_SEQUENCER_BUFFER_CMD { pub opCode: GSP_SEQ_BUF_OPCODE, pub payload: GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1, } #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, MaybeZeroable)] pub union GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { pub regWrite: GSP_SEQ_BUF_PAYLOAD_REG_WRITE, pub regModify: GSP_SEQ_BUF_PAYLOAD_REG_MODIFY, From b58c87b0fde0ac06d550659a7e3ddbcd9183b7b9 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 16 Dec 2025 11:57:10 +0900 Subject: [PATCH 06/32] gpu: nova-core: gsp: replace firmware version with "bindings" alias We have an "bindings" alias to avoid having to mention the firmware version again and again, and limit the diff when upgrading the firmware. Use it where we neglected to. Fixes: eaf0989c77e4 ("gpu: nova-core: Add bindings required by GSP sequencer") Reviewed-by: Lyude Paul Reviewed-by: Joel Fernandes Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251216-nova-fixes-v3-4-c7469a71f7c4@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gsp/fw.rs | 58 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index b754631d2d8d..caeb0d251fe5 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -141,8 +141,8 @@ unsafe impl AsBytes for GspFwWprMeta {} // are valid. unsafe impl FromBytes for GspFwWprMeta {} -type GspFwWprMetaBootResumeInfo = r570_144::GspFwWprMeta__bindgen_ty_1; -type GspFwWprMetaBootInfo = r570_144::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; +type GspFwWprMetaBootResumeInfo = bindings::GspFwWprMeta__bindgen_ty_1; +type GspFwWprMetaBootInfo = bindings::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; impl GspFwWprMeta { /// Fill in and return a `GspFwWprMeta` suitable for booting `gsp_firmware` using the @@ -150,8 +150,8 @@ impl GspFwWprMeta { pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout: &FbLayout) -> Self { Self(bindings::GspFwWprMeta { // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified. - magic: r570_144::GSP_FW_WPR_META_MAGIC as u64, - revision: u64::from(r570_144::GSP_FW_WPR_META_REVISION), + magic: bindings::GSP_FW_WPR_META_MAGIC as u64, + revision: u64::from(bindings::GSP_FW_WPR_META_REVISION), sysmemAddrOfRadix3Elf: gsp_firmware.radix3_dma_handle(), sizeOfRadix3Elf: u64::from_safe_cast(gsp_firmware.size), sysmemAddrOfBootloader: gsp_firmware.bootloader.ucode.dma_handle(), @@ -315,19 +315,19 @@ impl From for u32 { #[repr(u32)] pub(crate) enum SeqBufOpcode { // Core operation opcodes - CoreReset = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET, - CoreResume = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME, - CoreStart = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START, - CoreWaitForHalt = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT, + CoreReset = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET, + CoreResume = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME, + CoreStart = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START, + CoreWaitForHalt = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT, // Delay opcode - DelayUs = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US, + DelayUs = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US, // Register operation opcodes - RegModify = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY, - RegPoll = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL, - RegStore = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE, - RegWrite = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, + RegModify = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY, + RegPoll = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL, + RegStore = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE, + RegWrite = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, } impl fmt::Display for SeqBufOpcode { @@ -351,25 +351,25 @@ impl TryFrom for SeqBufOpcode { fn try_from(value: u32) -> Result { match value { - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET => { + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET => { Ok(SeqBufOpcode::CoreReset) } - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME => { + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME => { Ok(SeqBufOpcode::CoreResume) } - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START => { + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START => { Ok(SeqBufOpcode::CoreStart) } - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT => { + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT => { Ok(SeqBufOpcode::CoreWaitForHalt) } - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US => Ok(SeqBufOpcode::DelayUs), - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY => { + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US => Ok(SeqBufOpcode::DelayUs), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY => { Ok(SeqBufOpcode::RegModify) } - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL => Ok(SeqBufOpcode::RegPoll), - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE => Ok(SeqBufOpcode::RegStore), - r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE => Ok(SeqBufOpcode::RegWrite), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL => Ok(SeqBufOpcode::RegPoll), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE => Ok(SeqBufOpcode::RegStore), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE => Ok(SeqBufOpcode::RegWrite), _ => Err(EINVAL), } } @@ -385,7 +385,7 @@ impl From for u32 { /// Wrapper for GSP sequencer register write payload. #[repr(transparent)] #[derive(Copy, Clone)] -pub(crate) struct RegWritePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); +pub(crate) struct RegWritePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); impl RegWritePayload { /// Returns the register address. @@ -408,7 +408,7 @@ unsafe impl AsBytes for RegWritePayload {} /// Wrapper for GSP sequencer register modify payload. #[repr(transparent)] #[derive(Copy, Clone)] -pub(crate) struct RegModifyPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); +pub(crate) struct RegModifyPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); impl RegModifyPayload { /// Returns the register address. @@ -436,7 +436,7 @@ unsafe impl AsBytes for RegModifyPayload {} /// Wrapper for GSP sequencer register poll payload. #[repr(transparent)] #[derive(Copy, Clone)] -pub(crate) struct RegPollPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_POLL); +pub(crate) struct RegPollPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_POLL); impl RegPollPayload { /// Returns the register address. @@ -469,7 +469,7 @@ unsafe impl AsBytes for RegPollPayload {} /// Wrapper for GSP sequencer delay payload. #[repr(transparent)] #[derive(Copy, Clone)] -pub(crate) struct DelayUsPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_DELAY_US); +pub(crate) struct DelayUsPayload(bindings::GSP_SEQ_BUF_PAYLOAD_DELAY_US); impl DelayUsPayload { /// Returns the delay value in microseconds. @@ -487,7 +487,7 @@ unsafe impl AsBytes for DelayUsPayload {} /// Wrapper for GSP sequencer register store payload. #[repr(transparent)] #[derive(Copy, Clone)] -pub(crate) struct RegStorePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_STORE); +pub(crate) struct RegStorePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_STORE); impl RegStorePayload { /// Returns the register address. @@ -510,7 +510,7 @@ unsafe impl AsBytes for RegStorePayload {} /// Wrapper for GSP sequencer buffer command. #[repr(transparent)] -pub(crate) struct SequencerBufferCmd(r570_144::GSP_SEQUENCER_BUFFER_CMD); +pub(crate) struct SequencerBufferCmd(bindings::GSP_SEQUENCER_BUFFER_CMD); impl SequencerBufferCmd { /// Returns the opcode as a `SeqBufOpcode` enum, or error if invalid. @@ -612,7 +612,7 @@ unsafe impl AsBytes for SequencerBufferCmd {} /// Wrapper for GSP run CPU sequencer RPC. #[repr(transparent)] -pub(crate) struct RunCpuSequencer(r570_144::rpc_run_cpu_sequencer_v17_00); +pub(crate) struct RunCpuSequencer(bindings::rpc_run_cpu_sequencer_v17_00); impl RunCpuSequencer { /// Returns the command index. From de0bdcaf36494c9d91653cb062766846f7c51041 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 23 Dec 2025 12:59:47 +0100 Subject: [PATCH 07/32] MAINTAINERS: fix typo in TYR DRM driver entry Fix a missing ':' in the ARM MALI TYR DRM DRIVER entry, which does prevent script/get_maintainer.pl to properly work and pick up the corresponding maintainers. Fixes: cf4fd52e3236 ("rust: drm: Introduce the Tyr driver for Arm Mali GPUs") Reported-by: Tamir Duberstein Closes: https://lore.kernel.org/lkml/CAJ-ks9mrZtnPUjp5tD03hW+TyS0M9i-KRF_ramNY-oh-0X+ayA@mail.gmail.com/ Signed-off-by: Danilo Krummrich Reviewed-by: Tamir Duberstein Link: https://patch.msgid.link/20251223115949.32531-1-dakr@kernel.org Signed-off-by: Alice Ryhl --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 852281f1cc39..bf060416c3bc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2158,7 +2158,7 @@ M: Alice Ryhl L: dri-devel@lists.freedesktop.org S: Supported W: https://rust-for-linux.com/tyr-gpu-driver -W https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html +W: https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html B: https://gitlab.freedesktop.org/panfrost/linux/-/issues T: git https://gitlab.freedesktop.org/drm/rust/kernel.git F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml From 97872fa28b33a1ca6acc0a7b260750c9a123b193 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 23 Dec 2025 13:04:34 +0100 Subject: [PATCH 08/32] MAINTAINERS: exclude the tyr driver from DRM MISC The ARM MALI TYR DRM DRIVER is already maintained through the drm-rust tree, hence exclude it from drm-misc. Fixes: cf4fd52e3236 ("rust: drm: Introduce the Tyr driver for Arm Mali GPUs") Signed-off-by: Danilo Krummrich Link: https://patch.msgid.link/20251223120436.33233-1-dakr@kernel.org Signed-off-by: Alice Ryhl --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index bf060416c3bc..684c2047e46b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8357,6 +8357,7 @@ X: drivers/gpu/drm/msm/ X: drivers/gpu/drm/nova/ X: drivers/gpu/drm/radeon/ X: drivers/gpu/drm/tegra/ +X: drivers/gpu/drm/tyr/ X: drivers/gpu/drm/xe/ DRM DRIVERS AND COMMON INFRASTRUCTURE [RUST] From 0ddd3bb4b14c9102c0267b3fd916c81fe5ab89c1 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 11 Dec 2025 16:33:44 +0400 Subject: [PATCH 09/32] drm/pl111: Fix error handling in pl111_amba_probe Jump to the existing dev_put label when devm_request_irq() fails so drm_dev_put() and of_reserved_mem_device_release() run instead of returning early and leaking resources. Found via static analysis and code review. Fixes: bed41005e617 ("drm/pl111: Initial drm/kms driver for pl111") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Reviewed-by: Javier Martinez Canillas Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251211123345.2392065-1-linmq006@gmail.com --- drivers/gpu/drm/pl111/pl111_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index 56ff6a3fb483..d7dc83cf7b00 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -295,7 +295,7 @@ static int pl111_amba_probe(struct amba_device *amba_dev, variant->name, priv); if (ret != 0) { dev_err(dev, "%s failed irq %d\n", __func__, ret); - return ret; + goto dev_put; } ret = pl111_modeset_init(drm); From c1ef9a6cabb34dbc09e31417b0c0a672fe0de13a Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 5 Dec 2025 11:51:48 +0200 Subject: [PATCH 10/32] Revert "drm/atomic-helper: Re-order bridge chain pre-enable and post-disable" This reverts commit c9b1150a68d9362a0827609fc0dc1664c0d8bfe1. Changing the enable/disable sequence has caused regressions on multiple platforms: R-Car, MCDE, Rockchip. A series (see link below) was sent to fix these, but it was decided that it's better to revert the original patch and change the enable/disable sequence only in the tidss driver. Reverting this commit breaks tidss's DSI and OLDI outputs, which will be fixed in the following commits. Signed-off-by: Tomi Valkeinen Link: https://lore.kernel.org/all/20251202-mcde-drm-regression-thirdfix-v6-0-f1bffd4ec0fa%40kernel.org/ Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Cc: stable@vger.kernel.org # v6.17+ Reviewed-by: Aradhya Bhatia Reviewed-by: Maxime Ripard Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251205-drm-seq-fix-v1-1-fda68fa1b3de@ideasonboard.com --- drivers/gpu/drm/drm_atomic_helper.c | 8 +- include/drm/drm_bridge.h | 249 ++++++++-------------------- 2 files changed, 70 insertions(+), 187 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 10adac9397cf..ef97f37560b2 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1341,9 +1341,9 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *state) { encoder_bridge_disable(dev, state); - crtc_disable(dev, state); - encoder_bridge_post_disable(dev, state); + + crtc_disable(dev, state); } /** @@ -1682,10 +1682,10 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, struct drm_atomic_state *state) { - encoder_bridge_pre_enable(dev, state); - crtc_enable(dev, state); + encoder_bridge_pre_enable(dev, state); + encoder_bridge_enable(dev, state); drm_atomic_helper_commit_writebacks(dev, state); diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 0ff7ab4aa868..dbafe136833f 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -176,33 +176,17 @@ struct drm_bridge_funcs { /** * @disable: * - * The @disable callback should disable the bridge. + * This callback should disable the bridge. It is called right before + * the preceding element in the display pipe is disabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @disable vfunc. If the preceding element is a &drm_encoder + * it's called right before the &drm_encoder_helper_funcs.disable, + * &drm_encoder_helper_funcs.prepare or &drm_encoder_helper_funcs.dpms + * hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is still running when this callback is called. * - * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is disabled via one of: - * - * - &drm_bridge_funcs.disable - * - &drm_bridge_funcs.atomic_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms. - * * The @disable callback is optional. * * NOTE: @@ -215,34 +199,17 @@ struct drm_bridge_funcs { /** * @post_disable: * + * This callback should disable the bridge. It is called right after the + * preceding element in the display pipe is disabled. If the preceding + * element is a bridge this means it's called after that bridge's + * @post_disable function. If the preceding element is a &drm_encoder + * it's called right after the encoder's + * &drm_encoder_helper_funcs.disable, &drm_encoder_helper_funcs.prepare + * or &drm_encoder_helper_funcs.dpms hook. + * * The bridge must assume that the display pipe (i.e. clocks and timing - * signals) feeding this bridge is no longer running when the - * @post_disable is called. - * - * This callback should perform all the actions required by the hardware - * after it has stopped receiving signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is post-disabled (unless marked otherwise by the - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.post_disable - * - &drm_bridge_funcs.atomic_post_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms + * signals) feeding it is no longer running when this callback is + * called. * * The @post_disable callback is optional. * @@ -285,30 +252,18 @@ struct drm_bridge_funcs { /** * @pre_enable: * + * This callback should enable the bridge. It is called right before + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @pre_enable function. If the preceding element is a + * &drm_encoder it's called right before the encoder's + * &drm_encoder_helper_funcs.enable, &drm_encoder_helper_funcs.commit or + * &drm_encoder_helper_funcs.dpms hook. + * * The display pipe (i.e. clocks and timing signals) feeding this bridge - * will not yet be running when the @pre_enable is called. - * - * This callback should perform all the necessary actions to prepare the - * bridge to accept signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is pre-enabled (unless marked otherwise by - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.pre_enable - * - &drm_bridge_funcs.atomic_pre_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - &drm_encoder_helper_funcs.commit + * will not yet be running when this callback is called. The bridge must + * not enable the display link feeding the next bridge in the chain (if + * there is one) when this callback is called. * * The @pre_enable callback is optional. * @@ -322,31 +277,19 @@ struct drm_bridge_funcs { /** * @enable: * - * The @enable callback should enable the bridge. + * This callback should enable the bridge. It is called right after + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called after that + * bridge's @enable function. If the preceding element is a + * &drm_encoder it's called right after the encoder's + * &drm_encoder_helper_funcs.enable, &drm_encoder_helper_funcs.commit or + * &drm_encoder_helper_funcs.dpms hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is running when this callback is called. This * callback must enable the display link feeding the next bridge in the * chain if there is one. * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is enabled via one of: - * - * - &drm_bridge_funcs.enable - * - &drm_bridge_funcs.atomic_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - drm_encoder_helper_funcs.commit - * * The @enable callback is optional. * * NOTE: @@ -359,30 +302,17 @@ struct drm_bridge_funcs { /** * @atomic_pre_enable: * + * This callback should enable the bridge. It is called right before + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @atomic_pre_enable or @pre_enable function. If the preceding + * element is a &drm_encoder it's called right before the encoder's + * &drm_encoder_helper_funcs.atomic_enable hook. + * * The display pipe (i.e. clocks and timing signals) feeding this bridge - * will not yet be running when the @atomic_pre_enable is called. - * - * This callback should perform all the necessary actions to prepare the - * bridge to accept signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is pre-enabled (unless marked otherwise by - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.pre_enable - * - &drm_bridge_funcs.atomic_pre_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - &drm_encoder_helper_funcs.commit + * will not yet be running when this callback is called. The bridge must + * not enable the display link feeding the next bridge in the chain (if + * there is one) when this callback is called. * * The @atomic_pre_enable callback is optional. */ @@ -392,31 +322,18 @@ struct drm_bridge_funcs { /** * @atomic_enable: * - * The @atomic_enable callback should enable the bridge. + * This callback should enable the bridge. It is called right after + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called after that + * bridge's @atomic_enable or @enable function. If the preceding element + * is a &drm_encoder it's called right after the encoder's + * &drm_encoder_helper_funcs.atomic_enable hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is running when this callback is called. This * callback must enable the display link feeding the next bridge in the * chain if there is one. * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is enabled via one of: - * - * - &drm_bridge_funcs.enable - * - &drm_bridge_funcs.atomic_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - drm_encoder_helper_funcs.commit - * * The @atomic_enable callback is optional. */ void (*atomic_enable)(struct drm_bridge *bridge, @@ -424,32 +341,16 @@ struct drm_bridge_funcs { /** * @atomic_disable: * - * The @atomic_disable callback should disable the bridge. + * This callback should disable the bridge. It is called right before + * the preceding element in the display pipe is disabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @atomic_disable or @disable vfunc. If the preceding element + * is a &drm_encoder it's called right before the + * &drm_encoder_helper_funcs.atomic_disable hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is still running when this callback is called. * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is disabled via one of: - * - * - &drm_bridge_funcs.disable - * - &drm_bridge_funcs.atomic_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms. - * * The @atomic_disable callback is optional. */ void (*atomic_disable)(struct drm_bridge *bridge, @@ -458,34 +359,16 @@ struct drm_bridge_funcs { /** * @atomic_post_disable: * + * This callback should disable the bridge. It is called right after the + * preceding element in the display pipe is disabled. If the preceding + * element is a bridge this means it's called after that bridge's + * @atomic_post_disable or @post_disable function. If the preceding + * element is a &drm_encoder it's called right after the encoder's + * &drm_encoder_helper_funcs.atomic_disable hook. + * * The bridge must assume that the display pipe (i.e. clocks and timing - * signals) feeding this bridge is no longer running when the - * @atomic_post_disable is called. - * - * This callback should perform all the actions required by the hardware - * after it has stopped receiving signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is post-disabled (unless marked otherwise by the - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.post_disable - * - &drm_bridge_funcs.atomic_post_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms + * signals) feeding it is no longer running when this callback is + * called. * * The @atomic_post_disable callback is optional. */ From 33e8150bd32d7dc25c977bb455f1f5d54bfd5241 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 5 Dec 2025 11:51:49 +0200 Subject: [PATCH 11/32] Revert "drm/mediatek: dsi: Fix DSI host and panel bridge pre-enable order" This reverts commit f5b1819193667bf62c3c99d3921b9429997a14b2. As the original commit (c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable")) causing the issue has been reverted, let's revert the fix for mediatek. Signed-off-by: Tomi Valkeinen Cc: stable@vger.kernel.org # v6.17+ Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Reviewed-by: Maxime Ripard Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251205-drm-seq-fix-v1-2-fda68fa1b3de@ideasonboard.com --- drivers/gpu/drm/mediatek/mtk_dsi.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 0e2bcd5f67b7..d7726091819c 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1002,12 +1002,6 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host, return PTR_ERR(dsi->next_bridge); } - /* - * set flag to request the DSI host bridge be pre-enabled before device bridge - * in the chain, so the DSI host is ready when the device bridge is pre-enabled - */ - dsi->next_bridge->pre_enable_prev_first = true; - drm_bridge_add(&dsi->bridge); ret = component_add(host->dev, &mtk_dsi_component_ops); From d1c7dc57ff2400b141e6582a8d2dc5170108cf81 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 5 Dec 2025 11:51:50 +0200 Subject: [PATCH 12/32] drm/atomic-helper: Export and namespace some functions Export and namespace those not prefixed with drm_* so it becomes possible to write custom commit tail functions in individual drivers using the helper infrastructure. Tested-by: Marek Vasut Reviewed-by: Maxime Ripard Signed-off-by: Tomi Valkeinen Cc: stable@vger.kernel.org # v6.17+ Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Reviewed-by: Aradhya Bhatia Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251205-drm-seq-fix-v1-3-fda68fa1b3de@ideasonboard.com --- drivers/gpu/drm/drm_atomic_helper.c | 122 ++++++++++++++++++++++------ include/drm/drm_atomic_helper.h | 22 +++++ 2 files changed, 121 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ef97f37560b2..5beea645035f 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1162,8 +1162,18 @@ crtc_needs_disable(struct drm_crtc_state *old_state, new_state->self_refresh_active; } -static void -encoder_bridge_disable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_disable - disable bridges and encoder + * @dev: DRM device + * @state: the driver state object + * + * Loops over all connectors in the current state and if the CRTC needs + * it, disables the bridge chain all the way, then disables the encoder + * afterwards. + */ +void +drm_atomic_helper_commit_encoder_bridge_disable(struct drm_device *dev, + struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *old_conn_state, *new_conn_state; @@ -1229,9 +1239,18 @@ encoder_bridge_disable(struct drm_device *dev, struct drm_atomic_state *state) } } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_disable); -static void -crtc_disable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_crtc_disable - disable CRTSs + * @dev: DRM device + * @state: the driver state object + * + * Loops over all CRTCs in the current state and if the CRTC needs + * it, disables it. + */ +void +drm_atomic_helper_commit_crtc_disable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; @@ -1282,9 +1301,18 @@ crtc_disable(struct drm_device *dev, struct drm_atomic_state *state) drm_crtc_vblank_put(crtc); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_crtc_disable); -static void -encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_post_disable - post-disable encoder bridges + * @dev: DRM device + * @state: the driver state object + * + * Loops over all connectors in the current state and if the CRTC needs + * it, post-disables all encoder bridges. + */ +void +drm_atomic_helper_commit_encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *old_conn_state, *new_conn_state; @@ -1335,15 +1363,16 @@ encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *sta drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_post_disable); static void disable_outputs(struct drm_device *dev, struct drm_atomic_state *state) { - encoder_bridge_disable(dev, state); + drm_atomic_helper_commit_encoder_bridge_disable(dev, state); - encoder_bridge_post_disable(dev, state); + drm_atomic_helper_commit_encoder_bridge_post_disable(dev, state); - crtc_disable(dev, state); + drm_atomic_helper_commit_crtc_disable(dev, state); } /** @@ -1446,8 +1475,17 @@ void drm_atomic_helper_calc_timestamping_constants(struct drm_atomic_state *stat } EXPORT_SYMBOL(drm_atomic_helper_calc_timestamping_constants); -static void -crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_crtc_set_mode - set the new mode + * @dev: DRM device + * @state: the driver state object + * + * Loops over all connectors in the current state and if the mode has + * changed, change the mode of the CRTC, then call down the bridge + * chain and change the mode in all bridges as well. + */ +void +drm_atomic_helper_commit_crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; @@ -1508,6 +1546,7 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_crtc_set_mode); /** * drm_atomic_helper_commit_modeset_disables - modeset commit to disable outputs @@ -1531,12 +1570,21 @@ void drm_atomic_helper_commit_modeset_disables(struct drm_device *dev, drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_atomic_helper_calc_timestamping_constants(state); - crtc_set_mode(dev, state); + drm_atomic_helper_commit_crtc_set_mode(dev, state); } EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_disables); -static void drm_atomic_helper_commit_writebacks(struct drm_device *dev, - struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_writebacks - issue writebacks + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over the connectors, checks if the new state requires + * a writeback job to be issued and in that case issues an atomic + * commit on each connector. + */ +void drm_atomic_helper_commit_writebacks(struct drm_device *dev, + struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; @@ -1555,9 +1603,18 @@ static void drm_atomic_helper_commit_writebacks(struct drm_device *dev, } } } +EXPORT_SYMBOL(drm_atomic_helper_commit_writebacks); -static void -encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_pre_enable - pre-enable bridges + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over the connectors and if the CRTC needs it, pre-enables + * the entire bridge chain. + */ +void +drm_atomic_helper_commit_encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; @@ -1588,9 +1645,18 @@ encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_pre_enable); -static void -crtc_enable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_crtc_enable - enables the CRTCs + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over CRTCs in the new state, and of the CRTC needs + * it, enables it. + */ +void +drm_atomic_helper_commit_crtc_enable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state; @@ -1619,9 +1685,18 @@ crtc_enable(struct drm_device *dev, struct drm_atomic_state *state) } } } +EXPORT_SYMBOL(drm_atomic_helper_commit_crtc_enable); -static void -encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_enable - enables the bridges + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over all connectors in the new state, and of the CRTC needs + * it, enables the entire bridge chain. + */ +void +drm_atomic_helper_commit_encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; @@ -1664,6 +1739,7 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_enable); /** * drm_atomic_helper_commit_modeset_enables - modeset commit to enable outputs @@ -1682,11 +1758,11 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, struct drm_atomic_state *state) { - crtc_enable(dev, state); + drm_atomic_helper_commit_crtc_enable(dev, state); - encoder_bridge_pre_enable(dev, state); + drm_atomic_helper_commit_encoder_bridge_pre_enable(dev, state); - encoder_bridge_enable(dev, state); + drm_atomic_helper_commit_encoder_bridge_enable(dev, state); drm_atomic_helper_commit_writebacks(dev, state); } diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 53382fe93537..e154ee4f0696 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -60,6 +60,12 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, int drm_atomic_helper_check_planes(struct drm_device *dev, struct drm_atomic_state *state); int drm_atomic_helper_check_crtc_primary_plane(struct drm_crtc_state *crtc_state); +void drm_atomic_helper_commit_encoder_bridge_disable(struct drm_device *dev, + struct drm_atomic_state *state); +void drm_atomic_helper_commit_crtc_disable(struct drm_device *dev, + struct drm_atomic_state *state); +void drm_atomic_helper_commit_encoder_bridge_post_disable(struct drm_device *dev, + struct drm_atomic_state *state); int drm_atomic_helper_check(struct drm_device *dev, struct drm_atomic_state *state); void drm_atomic_helper_commit_tail(struct drm_atomic_state *state); @@ -89,8 +95,24 @@ drm_atomic_helper_update_legacy_modeset_state(struct drm_device *dev, void drm_atomic_helper_calc_timestamping_constants(struct drm_atomic_state *state); +void drm_atomic_helper_commit_crtc_set_mode(struct drm_device *dev, + struct drm_atomic_state *state); + void drm_atomic_helper_commit_modeset_disables(struct drm_device *dev, struct drm_atomic_state *state); + +void drm_atomic_helper_commit_writebacks(struct drm_device *dev, + struct drm_atomic_state *state); + +void drm_atomic_helper_commit_encoder_bridge_pre_enable(struct drm_device *dev, + struct drm_atomic_state *state); + +void drm_atomic_helper_commit_crtc_enable(struct drm_device *dev, + struct drm_atomic_state *state); + +void drm_atomic_helper_commit_encoder_bridge_enable(struct drm_device *dev, + struct drm_atomic_state *state); + void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, struct drm_atomic_state *old_state); From 2fc04340cf30d7960eed2525d26ffb8905aca02b Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 5 Dec 2025 11:51:51 +0200 Subject: [PATCH 13/32] drm/tidss: Fix enable/disable order TI's OLDI and DSI encoders need to be set up before the crtc is enabled, but the DRM helpers will enable the crtc first. This causes various issues on TI platforms, like visual artifacts or crtc sync lost warnings. Thus drm_atomic_helper_commit_modeset_enables() and drm_atomic_helper_commit_modeset_disables() cannot be used, as they enable the crtc before bridges' pre-enable, and disable the crtc after bridges' post-disable. Open code the drm_atomic_helper_commit_modeset_enables() and drm_atomic_helper_commit_modeset_disables(), and first call the bridges' pre-enables, then crtc enable, then bridges' post-enable (and vice versa for disable). Signed-off-by: Tomi Valkeinen Cc: stable@vger.kernel.org # v6.17+ Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Reviewed-by: Aradhya Bhatia Reviewed-by: Maxime Ripard Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251205-drm-seq-fix-v1-4-fda68fa1b3de@ideasonboard.com --- drivers/gpu/drm/tidss/tidss_kms.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c index 86eb5d97410b..8bb93194e5ac 100644 --- a/drivers/gpu/drm/tidss/tidss_kms.c +++ b/drivers/gpu/drm/tidss/tidss_kms.c @@ -26,9 +26,33 @@ static void tidss_atomic_commit_tail(struct drm_atomic_state *old_state) tidss_runtime_get(tidss); - drm_atomic_helper_commit_modeset_disables(ddev, old_state); - drm_atomic_helper_commit_planes(ddev, old_state, DRM_PLANE_COMMIT_ACTIVE_ONLY); - drm_atomic_helper_commit_modeset_enables(ddev, old_state); + /* + * TI's OLDI and DSI encoders need to be set up before the crtc is + * enabled. Thus drm_atomic_helper_commit_modeset_enables() and + * drm_atomic_helper_commit_modeset_disables() cannot be used here, as + * they enable the crtc before bridges' pre-enable, and disable the crtc + * after bridges' post-disable. + * + * Open code the functions here and first call the bridges' pre-enables, + * then crtc enable, then bridges' post-enable (and vice versa for + * disable). + */ + + drm_atomic_helper_commit_encoder_bridge_disable(ddev, old_state); + drm_atomic_helper_commit_crtc_disable(ddev, old_state); + drm_atomic_helper_commit_encoder_bridge_post_disable(ddev, old_state); + + drm_atomic_helper_update_legacy_modeset_state(ddev, old_state); + drm_atomic_helper_calc_timestamping_constants(old_state); + drm_atomic_helper_commit_crtc_set_mode(ddev, old_state); + + drm_atomic_helper_commit_planes(ddev, old_state, + DRM_PLANE_COMMIT_ACTIVE_ONLY); + + drm_atomic_helper_commit_encoder_bridge_pre_enable(ddev, old_state); + drm_atomic_helper_commit_crtc_enable(ddev, old_state); + drm_atomic_helper_commit_encoder_bridge_enable(ddev, old_state); + drm_atomic_helper_commit_writebacks(ddev, old_state); drm_atomic_helper_commit_hw_done(old_state); drm_atomic_helper_wait_for_flip_done(ddev, old_state); From e8b3627bec357698f2d4d6dbf27cdcfa0e9d8715 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 2 Jan 2026 14:18:29 +1000 Subject: [PATCH 14/32] nouveau: don't attempt fwsec on sb on newer platforms. The changes to always loads fwsec sb causes problems on newer GPUs which don't use this path. Add hooks and pass through the device specific layers. Fixes: da67179e5538 ("drm/nouveau/gsp: Allocate fwsec-sb at boot") Cc: # v6.16+ Cc: Lyude Paul Cc: Timur Tabi Tested-by: Matthew Schwartz Tested-by: Christopher Snowhill Reviewed-by: Lyude Paul Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20260102041829.2748009-1-airlied@gmail.com --- .../gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c | 3 +++ .../gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c | 8 +------ .../gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c | 3 +++ .../gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c | 3 +++ .../gpu/drm/nouveau/nvkm/subdev/gsp/priv.h | 23 +++++++++++++++++-- .../gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c | 15 ++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c | 3 +++ 7 files changed, 49 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c index 35d1fcef520b..c456a9626823 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c @@ -30,6 +30,9 @@ ad102_gsp = { .booter.ctor = ga102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c index 503760246660..851140e80122 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c @@ -337,18 +337,12 @@ nvkm_gsp_fwsec_sb(struct nvkm_gsp *gsp) } int -nvkm_gsp_fwsec_sb_ctor(struct nvkm_gsp *gsp) +nvkm_gsp_fwsec_sb_init(struct nvkm_gsp *gsp) { return nvkm_gsp_fwsec_init(gsp, &gsp->fws.falcon.sb, "fwsec-sb", NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB); } -void -nvkm_gsp_fwsec_sb_dtor(struct nvkm_gsp *gsp) -{ - nvkm_falcon_fw_dtor(&gsp->fws.falcon.sb); -} - int nvkm_gsp_fwsec_frts(struct nvkm_gsp *gsp) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c index d201e8697226..27a13aeccd3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c @@ -47,6 +47,9 @@ ga100_gsp = { .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c index 917f7e2f6c46..b6b3eb6f4c00 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c @@ -158,6 +158,9 @@ ga102_gsp_r535 = { .booter.ctor = ga102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h index 86bdd203bc10..9dd66a2e3801 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h @@ -7,9 +7,8 @@ enum nvkm_acr_lsf_id; int nvkm_gsp_fwsec_frts(struct nvkm_gsp *); -int nvkm_gsp_fwsec_sb_ctor(struct nvkm_gsp *); int nvkm_gsp_fwsec_sb(struct nvkm_gsp *); -void nvkm_gsp_fwsec_sb_dtor(struct nvkm_gsp *); +int nvkm_gsp_fwsec_sb_init(struct nvkm_gsp *gsp); struct nvkm_gsp_fwif { int version; @@ -52,6 +51,11 @@ struct nvkm_gsp_func { struct nvkm_falcon *, struct nvkm_falcon_fw *); } booter; + struct { + int (*ctor)(struct nvkm_gsp *); + void (*dtor)(struct nvkm_gsp *); + } fwsec_sb; + void (*dtor)(struct nvkm_gsp *); int (*oneinit)(struct nvkm_gsp *); int (*init)(struct nvkm_gsp *); @@ -67,6 +71,8 @@ extern const struct nvkm_falcon_func tu102_gsp_flcn; extern const struct nvkm_falcon_fw_func tu102_gsp_fwsec; int tu102_gsp_booter_ctor(struct nvkm_gsp *, const char *, const struct firmware *, struct nvkm_falcon *, struct nvkm_falcon_fw *); +int tu102_gsp_fwsec_sb_ctor(struct nvkm_gsp *); +void tu102_gsp_fwsec_sb_dtor(struct nvkm_gsp *); int tu102_gsp_oneinit(struct nvkm_gsp *); int tu102_gsp_init(struct nvkm_gsp *); int tu102_gsp_fini(struct nvkm_gsp *, bool suspend); @@ -91,5 +97,18 @@ int r535_gsp_fini(struct nvkm_gsp *, bool suspend); int nvkm_gsp_new_(const struct nvkm_gsp_fwif *, struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_gsp **); +static inline int nvkm_gsp_fwsec_sb_ctor(struct nvkm_gsp *gsp) +{ + if (gsp->func->fwsec_sb.ctor) + return gsp->func->fwsec_sb.ctor(gsp); + return 0; +} + +static inline void nvkm_gsp_fwsec_sb_dtor(struct nvkm_gsp *gsp) +{ + if (gsp->func->fwsec_sb.dtor) + gsp->func->fwsec_sb.dtor(gsp); +} + extern const struct nvkm_gsp_func gv100_gsp; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index 81e56da0474a..04b642a1f730 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c @@ -30,6 +30,18 @@ #include #include +int +tu102_gsp_fwsec_sb_ctor(struct nvkm_gsp *gsp) +{ + return nvkm_gsp_fwsec_sb_init(gsp); +} + +void +tu102_gsp_fwsec_sb_dtor(struct nvkm_gsp *gsp) +{ + nvkm_falcon_fw_dtor(&gsp->fws.falcon.sb); +} + static int tu102_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1) { @@ -370,6 +382,9 @@ tu102_gsp = { .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c index 97eb046c25d0..58cf25842421 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu116.c @@ -30,6 +30,9 @@ tu116_gsp = { .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, From 70740454377f1ba3ff32f5df4acd965db99d055b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 13 Dec 2025 15:16:43 +0900 Subject: [PATCH 15/32] drm/amd/display: Apply e4479aecf658 to dml After an innocuous optimization change in clang-22, allmodconfig (which enables CONFIG_KASAN and CONFIG_WERROR) breaks with: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1724:6: error: stack frame size (3144) exceeds limit (3072) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 1724 | void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ With clang-21, this function was already pretty close to the existing limit of 3072 bytes. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1724:6: error: stack frame size (2904) exceeds limit (2048) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 1724 | void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ A similar situation occurred in dml2, which was resolved by commit e4479aecf658 ("drm/amd/display: Increase sanitizer frame larger than limit when compile testing with clang") by increasing the limit for clang when compile testing with certain sanitizer enabled, so that allmodconfig (an easy testing target) continues to work. Apply that same change to the dml folder to clear up the warning for allmodconfig, unbreaking the build. Closes: https://github.com/ClangBuiltLinux/linux/issues/2135 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher (cherry picked from commit 25314b453cf812150e9951a32007a32bba85707e) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index b357683b4255..268b5fbdb48b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -30,7 +30,11 @@ dml_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) - frame_warn_limit := 3072 + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) + frame_warn_limit := 4096 + else + frame_warn_limit := 3072 + endif else frame_warn_limit := 2048 endif From f54a91f5337cd918eb86cf600320d25b6cfd8209 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 13 Dec 2025 19:58:10 +0900 Subject: [PATCH 16/32] drm/amd/display: Reduce number of arguments of dcn30's CalculatePrefetchSchedule() After an innocuous optimization change in clang-22, dml30_ModeSupportAndSystemConfigurationFull() is over the 2048 byte stack limit for display_mode_vba_30.c. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3529:6: warning: stack frame size (2096) exceeds limit (2048) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than] 3529 | void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ With clang-21, this function was already close to the limit: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3529:6: warning: stack frame size (1912) exceeds limit (1586) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than] 3529 | void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ CalculatePrefetchSchedule() has a large number of parameters, which must be passed on the stack. Most of the parameters between the two callsites are the same, so they can be accessed through the existing mode_lib pointer, instead of being passed as explicit arguments. Doing this reduces the stack size of dml30_ModeSupportAndSystemConfigurationFull() from 2096 bytes to 1912 bytes with clang-22. Closes: https://github.com/ClangBuiltLinux/linux/issues/2117 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher (cherry picked from commit b20b3fc4210f83089f835cdb91deec4b0778761a) --- .../dc/dml/dcn30/display_mode_vba_30.c | 258 +++++------------- 1 file changed, 73 insertions(+), 185 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 8d24763938ea..2d19bb8de59c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -77,32 +77,14 @@ static unsigned int dscceComputeDelay( static unsigned int dscComputeDelay( enum output_format_class pixelFormat, enum output_encoder_class Output); -// Super monster function with some 45 argument static bool CalculatePrefetchSchedule( struct display_mode_lib *mode_lib, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + unsigned int k, Pipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, @@ -116,7 +98,6 @@ static bool CalculatePrefetchSchedule( unsigned int MaxNumSwathY, double PrefetchSourceLinesC, unsigned int SwathWidthC, - int BytePerPixelC, double VInitPreFillC, unsigned int MaxNumSwathC, long swath_width_luma_ub, @@ -124,9 +105,6 @@ static bool CalculatePrefetchSchedule( unsigned int SwathHeightY, unsigned int SwathHeightC, double TWait, - bool ProgressiveToInterlaceUnitInOPP, - double *DSTXAfterScaler, - double *DSTYAfterScaler, double *DestinationLinesForPrefetch, double *PrefetchBandwidth, double *DestinationLinesToRequestVMInVBlank, @@ -135,14 +113,7 @@ static bool CalculatePrefetchSchedule( double *VRatioPrefetchC, double *RequiredPrefetchPixDataBWLuma, double *RequiredPrefetchPixDataBWChroma, - bool *NotEnoughTimeForDynamicMetadata, - double *Tno_bw, - double *prefetch_vmrow_bw, - double *Tdmdl_vm, - double *Tdmdl, - unsigned int *VUpdateOffsetPix, - double *VUpdateWidthPix, - double *VReadyOffsetPix); + bool *NotEnoughTimeForDynamicMetadata); static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); static void CalculateDCCConfiguration( @@ -810,29 +781,12 @@ static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum o static bool CalculatePrefetchSchedule( struct display_mode_lib *mode_lib, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + unsigned int k, Pipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, @@ -846,7 +800,6 @@ static bool CalculatePrefetchSchedule( unsigned int MaxNumSwathY, double PrefetchSourceLinesC, unsigned int SwathWidthC, - int BytePerPixelC, double VInitPreFillC, unsigned int MaxNumSwathC, long swath_width_luma_ub, @@ -854,9 +807,6 @@ static bool CalculatePrefetchSchedule( unsigned int SwathHeightY, unsigned int SwathHeightC, double TWait, - bool ProgressiveToInterlaceUnitInOPP, - double *DSTXAfterScaler, - double *DSTYAfterScaler, double *DestinationLinesForPrefetch, double *PrefetchBandwidth, double *DestinationLinesToRequestVMInVBlank, @@ -865,15 +815,10 @@ static bool CalculatePrefetchSchedule( double *VRatioPrefetchC, double *RequiredPrefetchPixDataBWLuma, double *RequiredPrefetchPixDataBWChroma, - bool *NotEnoughTimeForDynamicMetadata, - double *Tno_bw, - double *prefetch_vmrow_bw, - double *Tdmdl_vm, - double *Tdmdl, - unsigned int *VUpdateOffsetPix, - double *VUpdateWidthPix, - double *VReadyOffsetPix) + bool *NotEnoughTimeForDynamicMetadata) { + struct vba_vars_st *v = &mode_lib->vba; + double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; bool MyError = false; unsigned int DPPCycles = 0, DISPCLKCycles = 0; double DSTTotalPixelsAfterScaler = 0; @@ -905,26 +850,26 @@ static bool CalculatePrefetchSchedule( double Tdmec = 0; double Tdmsks = 0; - if (GPUVMEnable == true && HostVMEnable == true) { - HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + if (v->GPUVMEnable == true && v->HostVMEnable == true) { + HostVMInefficiencyFactor = v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; } else { HostVMInefficiencyFactor = 1; HostVMDynamicLevelsTrips = 0; } CalculateDynamicMetadataParameters( - MaxInterDCNTileRepeaters, + v->MaxInterDCNTileRepeaters, myPipe->DPPCLK, myPipe->DISPCLK, myPipe->DCFCLKDeepSleep, myPipe->PixelClock, myPipe->HTotal, myPipe->VBlank, - DynamicMetadataTransmittedBytes, - DynamicMetadataLinesBeforeActiveRequired, + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], myPipe->InterlaceEnable, - ProgressiveToInterlaceUnitInOPP, + v->ProgressiveToInterlaceUnitInOPP, &Tsetup, &Tdmbf, &Tdmec, @@ -932,16 +877,16 @@ static bool CalculatePrefetchSchedule( LineTime = myPipe->HTotal / myPipe->PixelClock; trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); - if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { - *Tdmdl = TWait + Tvm_trips + trip_to_mem; + if (v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true) { + v->Tdmdl[k] = TWait + Tvm_trips + trip_to_mem; } else { - *Tdmdl = TWait + UrgentExtraLatency; + v->Tdmdl[k] = TWait + UrgentExtraLatency; } - if (DynamicMetadataEnable == true) { - if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + if (v->DynamicMetadataEnable[k] == true) { + if (VStartup * LineTime < Tsetup + v->Tdmdl[k] + Tdmbf + Tdmec + Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; } else { *NotEnoughTimeForDynamicMetadata = false; @@ -949,39 +894,39 @@ static bool CalculatePrefetchSchedule( dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); - dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]); } } else { *NotEnoughTimeForDynamicMetadata = false; } - *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); + v->Tdmdl_vm[k] = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true ? TWait + Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + DISPCLKCycles = v->DISPCLKDelaySubtotal; if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) return true; - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + v->DSTXAfterScaler[k] = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; - *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; + v->DSTXAfterScaler[k] = v->DSTXAfterScaler[k] + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; - if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) - *DSTYAfterScaler = 1; + if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && v->ProgressiveToInterlaceUnitInOPP)) + v->DSTYAfterScaler[k] = 1; else - *DSTYAfterScaler = 0; + v->DSTYAfterScaler[k] = 0; - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + DSTTotalPixelsAfterScaler = v->DSTYAfterScaler[k] * myPipe->HTotal + v->DSTXAfterScaler[k]; + v->DSTYAfterScaler[k] = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + v->DSTXAfterScaler[k] = DSTTotalPixelsAfterScaler - ((double) (v->DSTYAfterScaler[k] * myPipe->HTotal)); MyError = false; @@ -990,33 +935,33 @@ static bool CalculatePrefetchSchedule( Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; - if (GPUVMEnable) { - if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); + if (v->GPUVMEnable) { + if (v->GPUVMMaxPageTableLevels >= 3) { + v->Tno_bw[k] = UrgentExtraLatency + trip_to_mem * ((v->GPUVMMaxPageTableLevels - 2) - 1); } else - *Tno_bw = 0; + v->Tno_bw[k] = 0; } else if (!myPipe->DCCEnable) - *Tno_bw = LineTime; + v->Tno_bw[k] = LineTime; else - *Tno_bw = LineTime / 4; + v->Tno_bw[k] = LineTime / 4; - dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, v->Tdmdl[k])) / LineTime + - (v->DSTYAfterScaler[k] + v->DSTXAfterScaler[k] / myPipe->HTotal); dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC); Tsw_oto = Lsw_oto * LineTime; - prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto; + prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) / Tsw_oto; - if (GPUVMEnable == true) { - Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, + if (v->GPUVMEnable == true) { + Tvm_oto = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); } else Tvm_oto = LineTime / 4.0; - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_oto = dml_max3( (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, LineTime - Tvm_oto, LineTime / 4); @@ -1042,10 +987,10 @@ static bool CalculatePrefetchSchedule( dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); - dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); - dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); - dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); - dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler); + dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", v->Tdmdl_vm[k]); + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]); + dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", v->DSTXAfterScaler[k]); + dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)v->DSTYAfterScaler[k]); *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -1059,26 +1004,26 @@ static bool CalculatePrefetchSchedule( double PrefetchBandwidth3 = 0; double PrefetchBandwidth4 = 0; - if (Tpre_rounded - *Tno_bw > 0) + if (Tpre_rounded - v->Tno_bw[k] > 0) PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY - + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) - / (Tpre_rounded - *Tno_bw); + + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) + / (Tpre_rounded - v->Tno_bw[k]); else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) { - PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw); + if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]) > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]); } - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) + if (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded > 0) PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * - BytePerPixelC) / - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + v->BytePerPixelC[k]) / + (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded); else PrefetchBandwidth2 = 0; @@ -1086,7 +1031,7 @@ static bool CalculatePrefetchSchedule( PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * - swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded - + swath_width_chroma_ub * v->BytePerPixelC[k]) / (Tpre_rounded - Tvm_trips_rounded); else PrefetchBandwidth3 = 0; @@ -1096,7 +1041,7 @@ static bool CalculatePrefetchSchedule( } if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) - PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) + PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); else PrefetchBandwidth4 = 0; @@ -1107,7 +1052,7 @@ static bool CalculatePrefetchSchedule( bool Case3OK; if (PrefetchBandwidth1 > 0) { - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { Case1OK = true; } else { @@ -1118,7 +1063,7 @@ static bool CalculatePrefetchSchedule( } if (PrefetchBandwidth2 > 0) { - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { Case2OK = true; } else { @@ -1129,7 +1074,7 @@ static bool CalculatePrefetchSchedule( } if (PrefetchBandwidth3 > 0) { - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { Case3OK = true; } else { @@ -1152,13 +1097,13 @@ static bool CalculatePrefetchSchedule( dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ); if (prefetch_bw_equ > 0) { - if (GPUVMEnable) { - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); + if (v->GPUVMEnable) { + Tvm_equ = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); } else { Tvm_equ = LineTime / 4; } - if ((GPUVMEnable || myPipe->DCCEnable)) { + if ((v->GPUVMEnable || myPipe->DCCEnable)) { Tr0_equ = dml_max4( (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, @@ -1227,7 +1172,7 @@ static bool CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime; - *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime; + *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * v->BytePerPixelC[k] * swath_width_chroma_ub / LineTime; } else { MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); @@ -1243,9 +1188,9 @@ static bool CalculatePrefetchSchedule( dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime); - dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime); dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); } else { @@ -1276,7 +1221,7 @@ static bool CalculatePrefetchSchedule( dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); } - *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + v->prefetch_vmrow_bw[k] = dml_max(prefetch_vm_bw, prefetch_row_bw); } if (MyError) { @@ -2437,30 +2382,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->ErrorResult[k] = CalculatePrefetchSchedule( mode_lib, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + k, &myPipe, v->DSCDelay[k], - v->DPPCLKDelaySubtotal - + v->DPPCLKDelayCNVCFormater, - v->DPPCLKDelaySCL, - v->DPPCLKDelaySCLLBOnly, - v->DPPCLKDelayCNVCCursor, - v->DISPCLKDelaySubtotal, (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), - v->OutputFormat[k], - v->MaxInterDCNTileRepeaters, dml_min(v->VStartupLines, v->MaxVStartupLines[k]), v->MaxVStartupLines[k], - v->GPUVMMaxPageTableLevels, - v->GPUVMEnable, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->HostVMMinPageSize, - v->DynamicMetadataEnable[k], - v->DynamicMetadataVMEnabled, - v->DynamicMetadataLinesBeforeActiveRequired[k], - v->DynamicMetadataTransmittedBytes[k], v->UrgentLatency, v->UrgentExtraLatency, v->TCalc, @@ -2474,7 +2401,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->MaxNumSwathY[k], v->PrefetchSourceLinesC[k], v->SwathWidthC[k], - v->BytePerPixelC[k], v->VInitPreFillC[k], v->MaxNumSwathC[k], v->swath_width_luma_ub[k], @@ -2482,9 +2408,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->SwathHeightY[k], v->SwathHeightC[k], TWait, - v->ProgressiveToInterlaceUnitInOPP, - &v->DSTXAfterScaler[k], - &v->DSTYAfterScaler[k], &v->DestinationLinesForPrefetch[k], &v->PrefetchBandwidth[k], &v->DestinationLinesToRequestVMInVBlank[k], @@ -2493,14 +2416,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman &v->VRatioPrefetchC[k], &v->RequiredPrefetchPixDataBWLuma[k], &v->RequiredPrefetchPixDataBWChroma[k], - &v->NotEnoughTimeForDynamicMetadata[k], - &v->Tno_bw[k], - &v->prefetch_vmrow_bw[k], - &v->Tdmdl_vm[k], - &v->Tdmdl[k], - &v->VUpdateOffsetPix[k], - &v->VUpdateWidthPix[k], - &v->VReadyOffsetPix[k]); + &v->NotEnoughTimeForDynamicMetadata[k]); if (v->BlendingAndTiming[k] == k) { double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK); v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k]; @@ -4770,29 +4686,12 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( mode_lib, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + k, &myPipe, v->DSCDelayPerState[i][k], - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, - v->DPPCLKDelaySCL, - v->DPPCLKDelaySCLLBOnly, - v->DPPCLKDelayCNVCCursor, - v->DISPCLKDelaySubtotal, v->SwathWidthYThisState[k] / v->HRatio[k], - v->OutputFormat[k], - v->MaxInterDCNTileRepeaters, dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), v->MaximumVStartup[i][j][k], - v->GPUVMMaxPageTableLevels, - v->GPUVMEnable, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->HostVMMinPageSize, - v->DynamicMetadataEnable[k], - v->DynamicMetadataVMEnabled, - v->DynamicMetadataLinesBeforeActiveRequired[k], - v->DynamicMetadataTransmittedBytes[k], v->UrgLatency[i], v->ExtraLatency, v->TimeCalc, @@ -4806,7 +4705,6 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaxNumSwY[k], v->PrefetchLinesC[i][j][k], v->SwathWidthCThisState[k], - v->BytePerPixelC[k], v->PrefillC[k], v->MaxNumSwC[k], v->swath_width_luma_ub_this_state[k], @@ -4814,9 +4712,6 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->TWait, - v->ProgressiveToInterlaceUnitInOPP, - &v->DSTXAfterScaler[k], - &v->DSTYAfterScaler[k], &v->LineTimesForPrefetch[k], &v->PrefetchBW[k], &v->LinesForMetaPTE[k], @@ -4825,14 +4720,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->VRatioPreC[i][j][k], &v->RequiredPrefetchPixelDataBWLuma[i][j][k], &v->RequiredPrefetchPixelDataBWChroma[i][j][k], - &v->NoTimeForDynamicMetadata[i][j][k], - &v->Tno_bw[k], - &v->prefetch_vmrow_bw[k], - &v->Tdmdl_vm[k], - &v->Tdmdl[k], - &v->VUpdateOffsetPix[k], - &v->VUpdateWidthPix[k], - &v->VReadyOffsetPix[k]); + &v->NoTimeForDynamicMetadata[i][j][k]); } for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { From 6ce6fbfddc5b127e4f57c3b5bfdcf40239a4fc2f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 13 Dec 2025 19:58:11 +0900 Subject: [PATCH 17/32] drm/amd/display: Reduce number of arguments of dcn30's CalculateWatermarksAndDRAMSpeedChangeSupport() CalculateWatermarksAndDRAMSpeedChangeSupport() has a large number of parameters, which must be passed on the stack. Most of the parameters between the two callsites are the same, so they can be accessed through the existing mode_lib pointer, instead of being passed as explicit arguments. Doing this reduces the stack size of dml30_ModeSupportAndSystemConfigurationFull() from 1912 bytes to 1840 bytes building for x86_64 with clang-22, helping stay under the 2048 byte limit for display_mode_vba_30.c. Additionally, now that there is a pointer to mode_lib->vba available, use 'v' consistently throughout the entire function. Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher (cherry picked from commit 563dfbefdf633c8d958398ddfa3955f9f40e47d9) --- .../dc/dml/dcn30/display_mode_vba_30.c | 287 ++++-------------- 1 file changed, 66 insertions(+), 221 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 2d19bb8de59c..1df3412be346 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -265,62 +265,23 @@ static void CalculateDynamicMetadataParameters( static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int DPPOutputBufferPixels, - unsigned int DETBufferSizeInKByte, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool GPUVMEnable, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, double DCFCLKDeepSleep, unsigned int DPPPerPlane[], - bool DCCEnable[], double DPPCLK[], unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], - enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, - double *StutterExitWatermark, - double *StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported); + enum clock_change_support *DRAMClockChangeSupport); static void CalculateDCFCLKDeepSleep( struct display_mode_lib *mode_lib, unsigned int NumberOfActivePlanes, @@ -2646,62 +2607,23 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, PrefetchMode, - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->DPPOutputBufferPixels, - v->DETBufferSizeInKByte[0], - v->WritebackInterfaceBufferSize, v->DCFCLK, v->ReturnBW, - v->GPUVMEnable, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgentLatency, v->UrgentExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLK, - v->FinalDRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, v->DCFCLKDeepSleep, v->DPPPerPlane, - v->DCCEnable, v->DPPCLK, v->DETBufferSizeY, v->DETBufferSizeC, v->SwathHeightY, v->SwathHeightC, - v->LBBitPerPixel, v->SwathWidthY, v->SwathWidthC, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->BytePerPixelDETY, v->BytePerPixelDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, - &DRAMClockChangeSupport, - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, - &v->StutterExitWatermark, - &v->StutterEnterPlusExitWatermark, - &v->MinActiveDRAMClockChangeLatencySupported); + &DRAMClockChangeSupport); for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true) { @@ -4895,62 +4817,23 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, v->PrefetchModePerState[i][j], - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->DPPOutputBufferPixels, - v->DETBufferSizeInKByte[0], - v->WritebackInterfaceBufferSize, v->DCFCLKState[i][j], v->ReturnBWPerState[i][j], - v->GPUVMEnable, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgLatency[i], v->ExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLKPerState[i], - v->FinalDRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, v->ProjectedDCFCLKDeepSleep[i][j], v->NoOfDPPThisState, - v->DCCEnable, v->RequiredDPPCLKThisState, v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, - v->LBBitPerPixel, v->SwathWidthYThisState, v->SwathWidthCThisState, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->BytePerPixelInDETY, v->BytePerPixelInDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, - &v->DRAMClockChangeSupport[i][j], - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, - &v->StutterExitWatermark, - &v->StutterEnterPlusExitWatermark, - &v->MinActiveDRAMClockChangeLatencySupported); + &v->DRAMClockChangeSupport[i][j]); } } @@ -5067,63 +4950,25 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int DPPOutputBufferPixels, - unsigned int DETBufferSizeInKByte, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool GPUVMEnable, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, double DCFCLKDeepSleep, unsigned int DPPPerPlane[], - bool DCCEnable[], double DPPCLK[], unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], - enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, - double *StutterExitWatermark, - double *StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported) + enum clock_change_support *DRAMClockChangeSupport) { + struct vba_vars_st *v = &mode_lib->vba; double EffectiveLBLatencyHidingY = 0; double EffectiveLBLatencyHidingC = 0; double LinesInDETY[DC__NUM_DPP__MAX] = { 0 }; @@ -5142,101 +4987,101 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double WritebackDRAMClockChangeLatencyHiding = 0; unsigned int k, j; - mode_lib->vba.TotalActiveDPP = 0; - mode_lib->vba.TotalDCCActiveDPP = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k]; - if (DCCEnable[k] == true) { - mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k]; + v->TotalActiveDPP = 0; + v->TotalDCCActiveDPP = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->TotalActiveDPP = v->TotalActiveDPP + DPPPerPlane[k]; + if (v->DCCEnable[k] == true) { + v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + DPPPerPlane[k]; } } - *UrgentWatermark = UrgentLatency + ExtraLatency; + v->UrgentWatermark = UrgentLatency + ExtraLatency; - *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; + v->DRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->UrgentWatermark; - mode_lib->vba.TotalActiveWriteback = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (WritebackEnable[k] == true) { - mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; + v->TotalActiveWriteback = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true) { + v->TotalActiveWriteback = v->TotalActiveWriteback + 1; } } - if (mode_lib->vba.TotalActiveWriteback <= 1) { - *WritebackUrgentWatermark = WritebackLatency; + if (v->TotalActiveWriteback <= 1) { + v->WritebackUrgentWatermark = v->WritebackLatency; } else { - *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - if (mode_lib->vba.TotalActiveWriteback <= 1) { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; + if (v->TotalActiveWriteback <= 1) { + v->WritebackDRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->WritebackLatency; } else { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackDRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { - mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); + v->LBLatencyHidingSourceLinesY = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); - mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); + v->LBLatencyHidingSourceLinesC = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); - EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); - EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; if (BytePerPixelDETC[k] > 0) { - LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; } else { LinesInDETC = 0; FullDETBufferingTimeC = 999999; } - ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark; + ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - v->UrgentWatermark - (v->HTotal[k] / v->PixelClock[k]) * (v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { - ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; + if (v->NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark; + ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - v->UrgentWatermark - (v->HTotal[k] / v->PixelClock[k]) * (v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { - ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; + if (v->NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; } - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); + v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); } else { - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; + v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; } - if (WritebackEnable[k] == true) { + if (v->WritebackEnable[k] == true) { - WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); - if (WritebackPixelFormat[k] == dm_444_64) { + WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; } - if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) { + if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2; } - WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark; - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); + WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; + v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); } } - mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + v->MinActiveDRAMClockChangeMargin = 999999; PlaneWithMinActiveDRAMClockChangeMargin = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) { - mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; - if (BlendingAndTiming[k] == k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { + v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; + if (v->BlendingAndTiming[k] == k) { PlaneWithMinActiveDRAMClockChangeMargin = k; } else { - for (j = 0; j < NumberOfActivePlanes; ++j) { - if (BlendingAndTiming[k] == j) { + for (j = 0; j < v->NumberOfActivePlanes; ++j) { + if (v->BlendingAndTiming[k] == j) { PlaneWithMinActiveDRAMClockChangeMargin = j; } } @@ -5244,40 +5089,40 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( } } - *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; + v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->FinalDRAMClockChangeLatency; SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { - SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; } } - mode_lib->vba.TotalNumberOfActiveOTG = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (BlendingAndTiming[k] == k) { - mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1; + v->TotalNumberOfActiveOTG = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { + v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; } } - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + if (v->MinActiveDRAMClockChangeMargin > 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) { + } else if (((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) { *DRAMClockChangeSupport = dm_dram_clock_change_vblank; } else { *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0]; - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) { FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k]; - TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k]; + TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; } } - *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; - *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane); + v->StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; + v->StutterEnterPlusExitWatermark = dml_max(v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane); } From 19158c7332468bc28572bdca428e89c7954ee1b1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 30 Jun 2025 10:47:09 -0400 Subject: [PATCH 18/32] drm/radeon: Remove __counted_by from ClockInfoArray.clockInfo[] clockInfo[] is a generic uchar pointer to variable sized structures which vary from ASIC to ASIC. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4374 Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit dc135aa73561b5acc74eadf776e48530996529a3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/pptable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h index 969a8fb0ee9e..f4e71046dc91 100644 --- a/drivers/gpu/drm/radeon/pptable.h +++ b/drivers/gpu/drm/radeon/pptable.h @@ -450,7 +450,7 @@ typedef struct _ClockInfoArray{ //sizeof(ATOM_PPLIB_CLOCK_INFO) UCHAR ucEntrySize; - UCHAR clockInfo[] __counted_by(ucNumEntries); + UCHAR clockInfo[] /*__counted_by(ucNumEntries)*/; }ClockInfoArray; typedef struct _NonClockInfoArray{ From 4f74c2dd970611d3ec3bb0d58215e73af5cd7214 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 11 Dec 2025 10:47:18 +0800 Subject: [PATCH 19/32] drm/amd/pm: fix wrong pcie parameter on navi1x fix wrong pcie dpm parameter on navi1x Fixes: 1a18607c07bb ("drm/amd/pm: override pcie dpm parameters only if it is necessary") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4671 Signed-off-by: Yang Wang Co-developed-by: Kenneth Feng Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 5c5189cf4b0cc0a22bac74a40743ee711cff07f8) --- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 7c9f77124ab2..16e3cc10891d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2464,8 +2464,8 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, pptable->PcieLaneCount[i] > pcie_width_cap ? pcie_width_cap : pptable->PcieLaneCount[i]; smu_pcie_arg = i << 16; - smu_pcie_arg |= pcie_gen_cap << 8; - smu_pcie_arg |= pcie_width_cap; + smu_pcie_arg |= dpm_context->dpm_tables.pcie_table.pcie_gen[i] << 8; + smu_pcie_arg |= dpm_context->dpm_tables.pcie_table.pcie_lane[i]; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, smu_pcie_arg, From dc8a887de1a7d397ab4131f45676e89565417aa8 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 15 Dec 2025 17:51:11 +0800 Subject: [PATCH 20/32] drm/amd/pm: force send pcie parmater on navi1x v1: the PMFW didn't initialize the PCIe DPM parameters and requires the KMD to actively provide these parameters. v2: clean & remove unused code logic (lijo) Fixes: 1a18607c07bb ("drm/amd/pm: override pcie dpm parameters only if it is necessary") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4671 Signed-off-by: Yang Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit b0dbd5db7cf1f81e4aaedd25cb5e72ce369387b2) --- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 16e3cc10891d..c4966dcc6875 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2455,24 +2455,21 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, } for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (pptable->PcieGenSpeed[i] > pcie_gen_cap || - pptable->PcieLaneCount[i] > pcie_width_cap) { - dpm_context->dpm_tables.pcie_table.pcie_gen[i] = - pptable->PcieGenSpeed[i] > pcie_gen_cap ? - pcie_gen_cap : pptable->PcieGenSpeed[i]; - dpm_context->dpm_tables.pcie_table.pcie_lane[i] = - pptable->PcieLaneCount[i] > pcie_width_cap ? - pcie_width_cap : pptable->PcieLaneCount[i]; - smu_pcie_arg = i << 16; - smu_pcie_arg |= dpm_context->dpm_tables.pcie_table.pcie_gen[i] << 8; - smu_pcie_arg |= dpm_context->dpm_tables.pcie_table.pcie_lane[i]; - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg, - NULL); - if (ret) - break; - } + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = + pptable->PcieGenSpeed[i] > pcie_gen_cap ? + pcie_gen_cap : pptable->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = + pptable->PcieLaneCount[i] > pcie_width_cap ? + pcie_width_cap : pptable->PcieLaneCount[i]; + smu_pcie_arg = i << 16; + smu_pcie_arg |= dpm_context->dpm_tables.pcie_table.pcie_gen[i] << 8; + smu_pcie_arg |= dpm_context->dpm_tables.pcie_table.pcie_lane[i]; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + return ret; } return ret; From 9fc27cbabee6d2e63b0268ca709ad3129b3ac50d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Nov 2025 13:24:10 -0500 Subject: [PATCH 21/32] drm/amdgpu: don't reemit ring contents more than once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we cancel a bad job and reemit the ring contents, and we get another timeout, cancel everything rather than reemitting. The wptr markers are only relevant for the original emit. If we reemit, the wptr markers are no longer correct. Reviewed-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit fb62a2067ca4555a6572d911e05919a311c010aa) --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 22 +++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index c7843e336310..4f74a02a9a05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -709,6 +709,7 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) struct amdgpu_ring *ring = af->ring; unsigned long flags; u32 seq, last_seq; + bool reemitted = false; last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; @@ -726,7 +727,9 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base); - if (fence == af) + if (fence->reemitted > 1) + reemitted = true; + else if (fence == af) dma_fence_set_error(&fence->base, -ETIME); else if (fence->context == af->context) dma_fence_set_error(&fence->base, -ECANCELED); @@ -734,9 +737,16 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) rcu_read_unlock(); } while (last_seq != seq); spin_unlock_irqrestore(&ring->fence_drv.lock, flags); - /* signal the guilty fence */ - amdgpu_fence_write(ring, (u32)af->base.seqno); - amdgpu_fence_process(ring); + + if (reemitted) { + /* if we've already reemitted once then just cancel everything */ + amdgpu_fence_driver_force_completion(af->ring); + af->ring->ring_backup_entries_to_copy = 0; + } else { + /* signal the guilty fence */ + amdgpu_fence_write(ring, (u32)af->base.seqno); + amdgpu_fence_process(ring); + } } void amdgpu_fence_save_wptr(struct amdgpu_fence *af) @@ -784,10 +794,12 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, /* save everything if the ring is not guilty, otherwise * just save the content from other contexts. */ - if (!guilty_fence || (fence->context != guilty_fence->context)) + if (!fence->reemitted && + (!guilty_fence || (fence->context != guilty_fence->context))) amdgpu_ring_backup_unprocessed_command(ring, wptr, fence->wptr); wptr = fence->wptr; + fence->reemitted++; } rcu_read_unlock(); } while (last_seq != seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7a27c6c4bb44..5044cf9e45fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -148,6 +148,8 @@ struct amdgpu_fence { u64 wptr; /* fence context for resets */ u64 context; + /* has this fence been reemitted */ + unsigned int reemitted; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; From 531b43260928a53f4190f3e61de788551af6157e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Nov 2025 14:12:10 -0500 Subject: [PATCH 22/32] drm/amdgpu: always backup and reemit fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If when we backup the ring contents for reemit before a ring reset, we skip jobs associated with the bad context, however, we need to make sure the fences are reemited as unprocessed submissions may depend on them. v2: clean up fence handling, make helpers static Reviewed-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 155a748f14bc0b72783994dea7c5a12276730342) --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 24 ++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 ++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 4f74a02a9a05..06c333b2213b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -89,6 +89,16 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) return seq; } +static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) +{ + af->fence_wptr_start = af->ring->wptr; +} + +static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) +{ + af->fence_wptr_end = af->ring->wptr; +} + /** * amdgpu_fence_emit - emit a fence on the requested ring * @@ -116,8 +126,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); + amdgpu_fence_save_fence_wptr_start(af); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); + amdgpu_fence_save_fence_wptr_end(af); amdgpu_fence_save_wptr(af); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; @@ -742,10 +754,6 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) /* if we've already reemitted once then just cancel everything */ amdgpu_fence_driver_force_completion(af->ring); af->ring->ring_backup_entries_to_copy = 0; - } else { - /* signal the guilty fence */ - amdgpu_fence_write(ring, (u32)af->base.seqno); - amdgpu_fence_process(ring); } } @@ -795,9 +803,15 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, * just save the content from other contexts. */ if (!fence->reemitted && - (!guilty_fence || (fence->context != guilty_fence->context))) + (!guilty_fence || (fence->context != guilty_fence->context))) { amdgpu_ring_backup_unprocessed_command(ring, wptr, fence->wptr); + } else if (!fence->reemitted) { + /* always save the fence */ + amdgpu_ring_backup_unprocessed_command(ring, + fence->fence_wptr_start, + fence->fence_wptr_end); + } wptr = fence->wptr; fence->reemitted++; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5044cf9e45fb..055437d4edf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -144,12 +144,15 @@ struct amdgpu_fence { struct amdgpu_ring *ring; ktime_t start_timestamp; - /* wptr for the fence for resets */ + /* wptr for the total submission for resets */ u64 wptr; /* fence context for resets */ u64 context; /* has this fence been reemitted */ unsigned int reemitted; + /* wptr for the fence for the submission */ + u64 fence_wptr_start; + u64 fence_wptr_end; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; From 7ed51e3a1381422278933d0d3ebda0268b6825de Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Tue, 9 Dec 2025 20:22:15 -0500 Subject: [PATCH 23/32] drm/amd/amdgpu: Fix SMU warning during isp suspend-resume ISP mfd child devices are using genpd and the system suspend-resume operations between genpd and amdgpu parent device which uses only runtime suspend-resume are not in sync. Linux power manager during suspend-resume resuming the genpd devices earlier than the amdgpu parent device. This is resulting in the below warning as SMU is in suspended state when genpd attempts to resume ISP. WARNING: CPU: 13 PID: 5435 at drivers/gpu/drm/amd/amdgpu/../pm/swsmu/amdgpu_smu.c:398 smu_dpm_set_power_gate+0x36f/0x380 [amdgpu] To fix this warning isp suspend-resume is handled as part of amdgpu parent device suspend-resume instead of genpd sequence. Each ISP MFD child device is marked as dev_pm_syscore_device to skip genpd suspend-resume and use pm_runtime_force api's to suspend-resume the devices when callbacks from amdgpu are received. Co-developed-by: Gjorgji Rosikopulos Signed-off-by: Gjorgji Rosikopulos Signed-off-by: Bin Du Signed-off-by: Pratap Nirujogi Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 0288a345f19b2162546352161509bb24614729e1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c | 24 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h | 2 ++ drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c | 41 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 37270c4dab8d..532f83d783d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -318,12 +318,36 @@ void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr) } EXPORT_SYMBOL(isp_kernel_buffer_free); +static int isp_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_isp *isp = &adev->isp; + + if (isp->funcs->hw_resume) + return isp->funcs->hw_resume(isp); + + return -ENODEV; +} + +static int isp_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_isp *isp = &adev->isp; + + if (isp->funcs->hw_suspend) + return isp->funcs->hw_suspend(isp); + + return -ENODEV; +} + static const struct amd_ip_funcs isp_ip_funcs = { .name = "isp_ip", .early_init = isp_early_init, .hw_init = isp_hw_init, .hw_fini = isp_hw_fini, .is_idle = isp_is_idle, + .suspend = isp_suspend, + .resume = isp_resume, .set_clockgating_state = isp_set_clockgating_state, .set_powergating_state = isp_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index d6f4ffa4c97c..9a5d2b1dff9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -38,6 +38,8 @@ struct amdgpu_isp; struct isp_funcs { int (*hw_init)(struct amdgpu_isp *isp); int (*hw_fini)(struct amdgpu_isp *isp); + int (*hw_suspend)(struct amdgpu_isp *isp); + int (*hw_resume)(struct amdgpu_isp *isp); }; struct amdgpu_isp { diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 4258d3e0b706..0002bcc6c4ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -26,6 +26,7 @@ */ #include +#include #include "amdgpu.h" #include "isp_v4_1_1.h" @@ -145,6 +146,9 @@ static int isp_genpd_add_device(struct device *dev, void *data) return -ENODEV; } + /* The devices will be managed by the pm ops from the parent */ + dev_pm_syscore_device(dev, true); + exit: /* Continue to add */ return 0; @@ -177,12 +181,47 @@ static int isp_genpd_remove_device(struct device *dev, void *data) drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret); return -ENODEV; } + dev_pm_syscore_device(dev, false); exit: /* Continue to remove */ return 0; } +static int isp_suspend_device(struct device *dev, void *data) +{ + return pm_runtime_force_suspend(dev); +} + +static int isp_resume_device(struct device *dev, void *data) +{ + return pm_runtime_force_resume(dev); +} + +static int isp_v4_1_1_hw_suspend(struct amdgpu_isp *isp) +{ + int r; + + r = device_for_each_child(isp->parent, NULL, + isp_suspend_device); + if (r) + dev_err(isp->parent, "failed to suspend hw devices (%d)\n", r); + + return r; +} + +static int isp_v4_1_1_hw_resume(struct amdgpu_isp *isp) +{ + int r; + + r = device_for_each_child(isp->parent, NULL, + isp_resume_device); + if (r) + dev_err(isp->parent, "failed to resume hw device (%d)\n", r); + + return r; +} + static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { const struct software_node *amd_camera_node, *isp4_node; @@ -369,6 +408,8 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) static const struct isp_funcs isp_v4_1_1_funcs = { .hw_init = isp_v4_1_1_hw_init, .hw_fini = isp_v4_1_1_hw_fini, + .hw_suspend = isp_v4_1_1_hw_suspend, + .hw_resume = isp_v4_1_1_hw_resume, }; void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp) From fd40c146c8ed7f7e8ae36922dcc583bd0ca70b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 6 Dec 2025 03:31:03 +0100 Subject: [PATCH 24/32] drm/amd/display: Correct color depth for SelectCRTC_Source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the correct enum values as expected by the VBIOS. Previously the actual bit depth integer value was passed, which was a mistake. Fixes: 7fb4f254c8eb ("drm/amd/display: Add SelectCRTC_Source to BIOS parser") Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit cdf6e4c0cdab129ffc4e41a8ac53a0738f805072) --- .../drm/amd/display/dc/bios/command_table.c | 25 ++++++++++++++++- .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 28 +------------------ .../amd/display/include/bios_parser_types.h | 2 +- 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 22457f417e65..d56c0d3763dd 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -1797,7 +1797,30 @@ static enum bp_result select_crtc_source_v3( ¶ms.ucEncodeMode)) return BP_RESULT_BADINPUT; - params.ucDstBpc = bp_params->bit_depth; + switch (bp_params->color_depth) { + case COLOR_DEPTH_UNDEFINED: + params.ucDstBpc = PANEL_BPC_UNDEFINE; + break; + case COLOR_DEPTH_666: + params.ucDstBpc = PANEL_6BIT_PER_COLOR; + break; + default: + case COLOR_DEPTH_888: + params.ucDstBpc = PANEL_8BIT_PER_COLOR; + break; + case COLOR_DEPTH_101010: + params.ucDstBpc = PANEL_10BIT_PER_COLOR; + break; + case COLOR_DEPTH_121212: + params.ucDstBpc = PANEL_12BIT_PER_COLOR; + break; + case COLOR_DEPTH_141414: + dm_error("14-bit color not supported by SelectCRTC_Source v3\n"); + break; + case COLOR_DEPTH_161616: + params.ucDstBpc = PANEL_16BIT_PER_COLOR; + break; + } if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params)) result = BP_RESULT_OK; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 0cdd8c74abdf..ebd74b43e935 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1610,38 +1610,12 @@ dce110_select_crtc_source(struct pipe_ctx *pipe_ctx) struct dc_bios *bios = link->ctx->dc_bios; struct bp_crtc_source_select crtc_source_select = {0}; enum engine_id engine_id = link->link_enc->preferred_engine; - uint8_t bit_depth; if (dc_is_rgb_signal(pipe_ctx->stream->signal)) engine_id = link->link_enc->analog_engine; - switch (pipe_ctx->stream->timing.display_color_depth) { - case COLOR_DEPTH_UNDEFINED: - bit_depth = 0; - break; - case COLOR_DEPTH_666: - bit_depth = 6; - break; - default: - case COLOR_DEPTH_888: - bit_depth = 8; - break; - case COLOR_DEPTH_101010: - bit_depth = 10; - break; - case COLOR_DEPTH_121212: - bit_depth = 12; - break; - case COLOR_DEPTH_141414: - bit_depth = 14; - break; - case COLOR_DEPTH_161616: - bit_depth = 16; - break; - } - crtc_source_select.controller_id = CONTROLLER_ID_D0 + pipe_ctx->stream_res.tg->inst; - crtc_source_select.bit_depth = bit_depth; + crtc_source_select.color_depth = pipe_ctx->stream->timing.display_color_depth; crtc_source_select.engine_id = engine_id; crtc_source_select.sink_signal = pipe_ctx->stream->signal; diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index 973b6bdbac63..f40dc612ec73 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -136,7 +136,7 @@ struct bp_crtc_source_select { enum engine_id engine_id; enum controller_id controller_id; enum signal_type sink_signal; - uint8_t bit_depth; + enum dc_color_depth color_depth; }; struct bp_transmitter_control { From e0d20a7658129fc681e392ba27edea99b77c2e21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 6 Dec 2025 03:31:04 +0100 Subject: [PATCH 25/32] drm/amd/display: Add missing encoder setup to DACnEncoderControl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently the DAC encoder needs to be set up before use. The BIOS parser in DC did not support this so I assumed it was not necessary, but the DAC doesn't work without it on some GPUs. Fixes: 69b29b894660 ("drm/amd/display: Hook up DAC to bios_parser_encoder_control") Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit bb5dfe2f5630ce344c654c705d28b4e20cb9d334) --- .../gpu/drm/amd/display/dc/bios/bios_parser.c | 4 ++-- .../drm/amd/display/dc/bios/command_table.c | 19 +++++++++++-------- .../drm/amd/display/dc/bios/command_table.h | 4 ++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index d1471f34e419..9f11e6ca4051 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -763,14 +763,14 @@ static enum bp_result bios_parser_encoder_control( return BP_RESULT_FAILURE; return bp->cmd_tbl.dac1_encoder_control( - bp, cntl->action == ENCODER_CONTROL_ENABLE, + bp, cntl->action, cntl->pixel_clock, ATOM_DAC1_PS2); } else if (cntl->engine_id == ENGINE_ID_DACB) { if (!bp->cmd_tbl.dac2_encoder_control) return BP_RESULT_FAILURE; return bp->cmd_tbl.dac2_encoder_control( - bp, cntl->action == ENCODER_CONTROL_ENABLE, + bp, cntl->action, cntl->pixel_clock, ATOM_DAC1_PS2); } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index d56c0d3763dd..76a3559f0ddc 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -1838,12 +1838,12 @@ static enum bp_result select_crtc_source_v3( static enum bp_result dac1_encoder_control_v1( struct bios_parser *bp, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard); static enum bp_result dac2_encoder_control_v1( struct bios_parser *bp, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard); @@ -1869,12 +1869,15 @@ static void init_dac_encoder_control(struct bios_parser *bp) static void dac_encoder_control_prepare_params( DAC_ENCODER_CONTROL_PS_ALLOCATION *params, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard) { params->ucDacStandard = dac_standard; - if (enable) + if (action == ENCODER_CONTROL_SETUP || + action == ENCODER_CONTROL_INIT) + params->ucAction = ATOM_ENCODER_INIT; + else if (action == ENCODER_CONTROL_ENABLE) params->ucAction = ATOM_ENABLE; else params->ucAction = ATOM_DISABLE; @@ -1887,7 +1890,7 @@ static void dac_encoder_control_prepare_params( static enum bp_result dac1_encoder_control_v1( struct bios_parser *bp, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard) { @@ -1896,7 +1899,7 @@ static enum bp_result dac1_encoder_control_v1( dac_encoder_control_prepare_params( ¶ms, - enable, + action, pixel_clock, dac_standard); @@ -1908,7 +1911,7 @@ static enum bp_result dac1_encoder_control_v1( static enum bp_result dac2_encoder_control_v1( struct bios_parser *bp, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard) { @@ -1917,7 +1920,7 @@ static enum bp_result dac2_encoder_control_v1( dac_encoder_control_prepare_params( ¶ms, - enable, + action, pixel_clock, dac_standard); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.h b/drivers/gpu/drm/amd/display/dc/bios/command_table.h index e89b1ba0048b..78bdbcaa61c8 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.h @@ -57,12 +57,12 @@ struct cmd_tbl { struct bp_crtc_source_select *bp_params); enum bp_result (*dac1_encoder_control)( struct bios_parser *bp, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard); enum bp_result (*dac2_encoder_control)( struct bios_parser *bp, - bool enable, + enum bp_encoder_control_action action, uint32_t pixel_clock, uint8_t dac_standard); enum bp_result (*dac1_output_control)( From 72d7f4573660287f1b66c30319efecd6fcde92ee Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Mon, 22 Dec 2025 12:26:35 +0800 Subject: [PATCH 26/32] drm/amdgpu: Fix query for VPE block_type and ip_count [Why] Query for VPE block_type and ip_count is missing. [How] Add VPE case in ip_block_type and hw_ip_count query. Reviewed-by: Lang Yu Signed-off-by: Alan Liu Signed-off-by: Alex Deucher (cherry picked from commit a6ea0a430aca5932b9c75d8e38deeb45665dd2ae) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6ee77f431d56..f65edd80cabf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -201,6 +201,9 @@ static enum amd_ip_block_type type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; break; + case AMDGPU_HW_IP_VPE: + type = AMD_IP_BLOCK_TYPE_VPE; + break; default: type = AMD_IP_BLOCK_TYPE_NUM; break; @@ -721,6 +724,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMD_IP_BLOCK_TYPE_UVD: count = adev->uvd.num_uvd_inst; break; + case AMD_IP_BLOCK_TYPE_VPE: + count = adev->vpe.num_instances; + break; /* For all other IP block types not listed in the switch statement * the ip status is valid here and the instance count is one. */ From fd390ff144513eb0310c350b1cf5fa8d6ddd0c53 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Mon, 5 Jan 2026 22:46:38 -0600 Subject: [PATCH 27/32] PCI/VGA: Don't assume the only VGA device on a system is `boot_vga` Some systems ship with multiple display class devices but not all of them are VGA devices. If the "only" VGA device on the system is not used for displaying the image on the screen marking it as `boot_vga` because nothing was found is totally wrong. This behavior actually leads to mistakes of the wrong device being advertised to userspace and then userspace can make incorrect decisions. As there is an accurate `boot_display` sysfs file stop lying about `boot_vga` by assuming if nothing is found it's the right device. Reported-by: Aaron Erhardt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220712 Tested-by: Aaron Erhardt Acked-by: Thomas Zimmermann Fixes: ad90860bd10ee ("fbcon: Use screen info to find primary device") Tested-by: Luke D. Jones Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260106044638.52906-1-superm1@kernel.org --- drivers/pci/vgaarb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index 436fa7f4c387..baa242b14099 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -652,13 +652,6 @@ static bool vga_is_boot_device(struct vga_device *vgadev) return true; } - /* - * Vgadev has neither IO nor MEM enabled. If we haven't found any - * other VGA devices, it is the best candidate so far. - */ - if (!boot_vga) - return true; - return false; } From d5bdf88d1f9d1e4808177f03d89de3d0ba6c6e84 Mon Sep 17 00:00:00 2001 From: Chengjun Yao Date: Mon, 15 Dec 2025 16:18:21 +0800 Subject: [PATCH 28/32] drm/fb-helper: Fix vblank timeout during suspend/reset During GPU reset, VBlank interrupts are disabled which causes drm_fb_helper_fb_dirty() to wait for VBlank timeout. This will create call traces like (seen on an RX7900 series dGPU): [ 101.313646] ------------[ cut here ]------------ [ 101.313648] amdgpu 0000:03:00.0: [drm] vblank wait timed out on crtc 0 [ 101.313657] WARNING: CPU: 0 PID: 461 at drivers/gpu/drm/drm_vblank.c:1320 drm_wait_one_vblank+0x176/0x220 [ 101.313663] Modules linked in: amdgpu amdxcp drm_panel_backlight_quirks gpu_sched drm_buddy drm_ttm_helper ttm drm_exec drm_suballoc_helper drm_display_helper cec rc_core i2c_algo_bit nf_conntrack_netlink xt_nat xt_tcpudp veth xt_conntrack xt_MASQUERADE bridge stp llc xfrm_user xfrm_algo xt_set ip_set nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_addrtype nft_compat x_tables nf_tables overlay qrtr sunrpc snd_hda_codec_alc882 snd_hda_codec_realtek_lib snd_hda_codec_generic snd_hda_codec_atihdmi snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hda_core snd_intel_dspcfg snd_intel_sdw_acpi snd_hwdep snd_pcm amd_atl intel_rapl_msr snd_seq_midi intel_rapl_common asus_ec_sensors snd_seq_midi_event snd_rawmidi snd_seq eeepc_wmi snd_seq_device edac_mce_amd asus_wmi polyval_clmulni ghash_clmulni_intel snd_timer platform_profile aesni_intel wmi_bmof sparse_keymap joydev snd rapl input_leds i2c_piix4 soundcore ccp k10temp i2c_smbus gpio_amdpt mac_hid binfmt_misc sch_fq_codel msr parport_pc ppdev lp parport [ 101.313745] efi_pstore nfnetlink dmi_sysfs autofs4 hid_generic usbhid hid r8169 realtek ahci libahci video wmi [ 101.313760] CPU: 0 UID: 0 PID: 461 Comm: kworker/0:2 Not tainted 6.18.0-rc6-174403b3b920 #1 PREEMPT(voluntary) [ 101.313763] Hardware name: ASUS System Product Name/TUF GAMING X670E-PLUS, BIOS 0821 11/15/2022 [ 101.313765] Workqueue: events drm_fb_helper_damage_work [ 101.313769] RIP: 0010:drm_wait_one_vblank+0x176/0x220 [ 101.313772] Code: 7c 24 08 4c 8b 77 50 4d 85 f6 0f 84 a1 00 00 00 e8 2f 11 03 00 44 89 e9 4c 89 f2 48 c7 c7 d0 ad 0d a8 48 89 c6 e8 2a e0 4a ff <0f> 0b e9 f2 fe ff ff 48 85 ff 74 04 4c 8b 67 08 4d 8b 6c 24 50 4d [ 101.313774] RSP: 0018:ffffc99c00d47d68 EFLAGS: 00010246 [ 101.313777] RAX: 0000000000000000 RBX: 000000000200038a RCX: 0000000000000000 [ 101.313778] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 101.313779] RBP: ffffc99c00d47dc0 R08: 0000000000000000 R09: 0000000000000000 [ 101.313781] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8948c4280010 [ 101.313782] R13: 0000000000000000 R14: ffff894883263a50 R15: ffff89488c384830 [ 101.313784] FS: 0000000000000000(0000) GS:ffff895424692000(0000) knlGS:0000000000000000 [ 101.313785] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 101.313787] CR2: 00007773650ee200 CR3: 0000000588e40000 CR4: 0000000000f50ef0 [ 101.313788] PKRU: 55555554 [ 101.313790] Call Trace: [ 101.313791] [ 101.313795] ? __pfx_autoremove_wake_function+0x10/0x10 [ 101.313800] drm_crtc_wait_one_vblank+0x17/0x30 [ 101.313802] drm_client_modeset_wait_for_vblank+0x61/0x80 [ 101.313805] drm_fb_helper_damage_work+0x46/0x1a0 [ 101.313808] process_one_work+0x1a1/0x3f0 [ 101.313812] worker_thread+0x2ba/0x3d0 [ 101.313816] kthread+0x107/0x220 [ 101.313818] ? __pfx_worker_thread+0x10/0x10 [ 101.313821] ? __pfx_kthread+0x10/0x10 [ 101.313823] ret_from_fork+0x202/0x230 [ 101.313826] ? __pfx_kthread+0x10/0x10 [ 101.313828] ret_from_fork_asm+0x1a/0x30 [ 101.313834] [ 101.313835] ---[ end trace 0000000000000000 ]--- Cancel pending damage work synchronously before console_lock() to ensure any in-flight framebuffer damage operations complete before suspension. Also check for FBINFO_STATE_RUNNING in drm_fb_helper_damage_work() to avoid executing damage work if it is rescheduled while the device is suspended. Fixes: d8c4bddcd8bc ("drm/fb-helper: Synchronize dirty worker with vblank") Signed-off-by: Aurabindo Pillai Signed-off-by: Chengjun Yao Signed-off-by: Thomas Zimmermann Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/20251215081822.432005-1-Chengjun.Yao@amd.com --- drivers/gpu/drm/drm_fb_helper.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 4a7f72044ab8..4b47aa0dab35 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -366,6 +366,9 @@ static void drm_fb_helper_damage_work(struct work_struct *work) { struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper, damage_work); + if (helper->info->state != FBINFO_STATE_RUNNING) + return; + drm_fb_helper_fb_dirty(helper); } @@ -732,6 +735,13 @@ void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper, if (fb_helper->info->state != FBINFO_STATE_RUNNING) return; + /* + * Cancel pending damage work. During GPU reset, VBlank + * interrupts are disabled and drm_fb_helper_fb_dirty() + * would wait for VBlank timeout otherwise. + */ + cancel_work_sync(&fb_helper->damage_work); + console_lock(); } else { From 8e6ad0dac6266c5e13874e816b016759d7145500 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 4 Nov 2025 12:29:23 +0100 Subject: [PATCH 29/32] drm/exynos: hdmi: replace use of system_wq with system_percpu_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. This patch continues the effort to refactor worqueue APIs, which has begun with the change introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") system_wq should be the per-cpu workqueue, yet in this name nothing makes that clear, so replace system_wq with system_percpu_wq. The old wq (system_wq) will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 01813e11e6c6..8e76ac8ee4e2 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1692,7 +1692,7 @@ static irqreturn_t hdmi_irq_thread(int irq, void *arg) { struct hdmi_context *hdata = arg; - mod_delayed_work(system_wq, &hdata->hotplug_work, + mod_delayed_work(system_percpu_wq, &hdata->hotplug_work, msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS)); return IRQ_HANDLED; From 0de604d0357d0d22cbf03af1077d174b641707b6 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 25 Dec 2025 16:43:49 +0800 Subject: [PATCH 30/32] drm/amd/pm: Disable MMIO access during SMU Mode 1 reset During Mode 1 reset, the ASIC undergoes a reset cycle and becomes temporarily inaccessible via PCIe. Any attempt to access MMIO registers during this window (e.g., from interrupt handlers or other driver threads) can result in uncompleted PCIe transactions, leading to NMI panics or system hangs. To prevent this, set the `no_hw_access` flag to true immediately after triggering the reset. This signals other driver components to skip register accesses while the device is offline. A memory barrier `smp_mb()` is added to ensure the flag update is globally visible to all cores before the driver enters the sleep/wait state. Signed-off-by: Perry Yuan Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher (cherry picked from commit 7edb503fe4b6d67f47d8bb0dfafb8e699bb0f8a4) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 7 ++++++- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 +++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 12201b8e99b3..25f49be4d0bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5867,6 +5867,9 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) if (ret) goto mode1_reset_failed; + /* enable mmio access after mode 1 reset completed */ + adev->no_hw_access = false; + amdgpu_device_load_pci_state(adev->pdev); ret = amdgpu_psp_wait_for_bootloader(adev); if (ret) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 677781060246..eaeff6a9bc50 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2923,8 +2923,13 @@ static int smu_v13_0_0_mode1_reset(struct smu_context *smu) break; } - if (!ret) + if (!ret) { + /* disable mmio access while doing mode 1 reset*/ + smu->adev->no_hw_access = true; + /* ensure no_hw_access is globally visible before any MMIO */ + smp_mb(); msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 2cea688c604f..33c3cd2e1e24 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2143,10 +2143,15 @@ static int smu_v14_0_2_mode1_reset(struct smu_context *smu) ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset); if (!ret) { - if (amdgpu_emu_mode == 1) + if (amdgpu_emu_mode == 1) { msleep(50000); - else + } else { + /* disable mmio access while doing mode 1 reset*/ + smu->adev->no_hw_access = true; + /* ensure no_hw_access is globally visible before any MMIO */ + smp_mb(); msleep(1000); + } } return ret; From eb236fb911ca0f4243124d8a285ec0453a99a8b4 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 8 Dec 2025 12:11:43 -0700 Subject: [PATCH 31/32] drm/amd/display: Check NULL before calling dac_load_detection dac_load_detection can be NULL in some scenario, so checking it before calling. Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Chenyu Chen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 179176134b535246f0b368b30e8ecad50066f896) --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 6d31f4967f1a..e1940b8e5bc3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -932,7 +932,7 @@ static bool link_detect_dac_load_detect(struct dc_link *link) struct link_encoder *link_enc = link->link_enc; enum engine_id engine_id = link_enc->preferred_engine; enum dal_device_type device_type = DEVICE_TYPE_CRT; - enum bp_result bp_result; + enum bp_result bp_result = BP_RESULT_UNSUPPORTED; uint32_t enum_id; switch (engine_id) { @@ -946,7 +946,9 @@ static bool link_detect_dac_load_detect(struct dc_link *link) break; } - bp_result = bios->funcs->dac_load_detection(bios, engine_id, device_type, enum_id); + if (bios->funcs->dac_load_detection) + bp_result = bios->funcs->dac_load_detection(bios, engine_id, device_type, enum_id); + return bp_result == BP_RESULT_OK; } From 6b2989ac5e8c496c1814d7961bee6f2d05382b3e Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Sat, 29 Nov 2025 19:46:31 -0600 Subject: [PATCH 32/32] Reapply "Revert "drm/amd: Skip power ungate during suspend for VPE"" Skipping power ungate exposed some scenarios that will fail like below: ``` amdgpu: Register(0) [regVPEC_QUEUE_RESET_REQ] failed to reach value 0x00000000 != 0x00000001n amdgpu 0000:c1:00.0: amdgpu: VPE queue reset failed ... amdgpu: [drm] *ERROR* wait_for_completion_timeout timeout! ``` The underlying s2idle issue that prompted this commit is going to be fixed in BIOS. This reverts commit 2a6c826cfeedd7714611ac115371a959ead55bda. This was lost in the 6.19 merge so reapply it. Fixes: 2a6c826cfeed ("drm/amd: Skip power ungate during suspend for VPE") Signed-off-by: Mario Limonciello (AMD) Acked-by: Alex Deucher Reported-by: Konstantin Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220812 Reported-by: Matthew Schwartz Signed-off-by: Alex Deucher (cherry picked from commit 3925683515e93844be204381d2d5a1df5de34f31) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 25f49be4d0bd..d5c44bd34d45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3445,11 +3445,10 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) continue; - /* skip CG for VCE/UVD/VPE, it's handled specially */ + /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && - adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VPE && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */