1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-11 17:10:13 +00:00
Jason Gunthorpe 1eb0ae6fbd iommupt/vtd: Support mgaw's less than a 4 level walk for first stage
If the IOVA is limited to less than 48 the page table will be constructed
with a 3 level configuration which is unsupported by hardware.

Like the second stage the caller needs to pass in both the top_level an
the vasz to specify a table that has more levels than required to hold the
IOVA range.

Fixes: 6cbc09b7719e ("iommu/vt-d: Restore previous domain::aperture_end calculation")
Reported-by: Calvin Owens <calvin@wbinvd.org>
Closes: https://lore.kernel.org/r/8f257d2651eb8a4358fcbd47b0145002e5f1d638.1764237717.git.calvin@wbinvd.org
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Calvin Owens <calvin@wbinvd.org>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2025-11-28 08:43:55 +01:00

294 lines
8.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
*/
#ifndef __GENERIC_PT_IOMMU_H
#define __GENERIC_PT_IOMMU_H
#include <linux/generic_pt/common.h>
#include <linux/iommu.h>
#include <linux/mm_types.h>
struct iommu_iotlb_gather;
struct pt_iommu_ops;
struct pt_iommu_driver_ops;
struct iommu_dirty_bitmap;
/**
* DOC: IOMMU Radix Page Table
*
* The IOMMU implementation of the Generic Page Table provides an ops struct
* that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
* the generic map/unmap interface.
*
* This interface uses a caller provided locking approach. The caller must have
* a VA range lock concept that prevents concurrent threads from calling ops on
* the same VA. Generally the range lock must be at least as large as a single
* map call.
*/
/**
* struct pt_iommu - Base structure for IOMMU page tables
*
* The format-specific struct will include this as the first member.
*/
struct pt_iommu {
/**
* @domain: The core IOMMU domain. The driver should use a union to
* overlay this memory with its previously existing domain struct to
* create an alias.
*/
struct iommu_domain domain;
/**
* @ops: Function pointers to access the API
*/
const struct pt_iommu_ops *ops;
/**
* @driver_ops: Function pointers provided by the HW driver to help
* manage HW details like caches.
*/
const struct pt_iommu_driver_ops *driver_ops;
/**
* @nid: Node ID to use for table memory allocations. The IOMMU driver
* may want to set the NID to the device's NID, if there are multiple
* table walkers.
*/
int nid;
/**
* @iommu_device: Device pointer used for any DMA cache flushing when
* PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
* page table which must have dma ops that perform cache flushing.
*/
struct device *iommu_device;
};
/**
* struct pt_iommu_info - Details about the IOMMU page table
*
* Returned from pt_iommu_ops->get_info()
*/
struct pt_iommu_info {
/**
* @pgsize_bitmap: A bitmask where each set bit indicates
* a page size that can be natively stored in the page table.
*/
u64 pgsize_bitmap;
};
struct pt_iommu_ops {
/**
* @set_dirty: Make the iova write dirty
* @iommu_table: Table to manipulate
* @iova: IO virtual address to start
*
* This is only used by iommufd testing. It makes the iova dirty so that
* read_and_clear_dirty() will see it as dirty. Unlike all the other ops
* this one is safe to call without holding any locking. It may return
* -EAGAIN if there is a race.
*/
int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);
/**
* @get_info: Return the pt_iommu_info structure
* @iommu_table: Table to query
*
* Return some basic static information about the page table.
*/
void (*get_info)(struct pt_iommu *iommu_table,
struct pt_iommu_info *info);
/**
* @deinit: Undo a format specific init operation
* @iommu_table: Table to destroy
*
* Release all of the memory. The caller must have already removed the
* table from all HW access and all caches.
*/
void (*deinit)(struct pt_iommu *iommu_table);
};
/**
* struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
*
* The IOMMU driver should implement these using container_of(iommu_table) to
* get to it's iommu_domain derived structure. All ops can be called in atomic
* contexts as they are buried under DMA API calls.
*/
struct pt_iommu_driver_ops {
/**
* @change_top: Update the top of table pointer
* @iommu_table: Table to operate on
* @top_paddr: New CPU physical address of the top pointer
* @top_level: IOMMU PT level of the new top
*
* Called under the get_top_lock() spinlock. The driver must update all
* HW references to this domain with a new top address and
* configuration. On return mappings placed in the new top must be
* reachable by the HW.
*
* top_level encodes the level in IOMMU PT format, level 0 is the
* smallest page size increasing from there. This has to be translated
* to any HW specific format. During this call the new top will not be
* visible to any other API.
*
* This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
* enabled.
*/
void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
unsigned int top_level);
/**
* @get_top_lock: lock to hold when changing the table top
* @iommu_table: Table to operate on
*
* Return a lock to hold when changing the table top page table from
* being stored in HW. The lock will be held prior to calling
* change_top() and released once the top is fully visible.
*
* Typically this would be a lock that protects the iommu_domain's
* attachment list.
*
* This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
* enabled.
*/
spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
};
static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
{
/*
* It is safe to call pt_iommu_deinit() before an init, or if init
* fails. The ops pointer will only become non-NULL if deinit needs to be
* run.
*/
if (iommu_table->ops)
iommu_table->ops->deinit(iommu_table);
}
/**
* struct pt_iommu_cfg - Common configuration values for all formats
*/
struct pt_iommu_cfg {
/**
* @features: Features required. Only these features will be turned on.
* The feature list should reflect what the IOMMU HW is capable of.
*/
unsigned int features;
/**
* @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
* imply the top level of the table.
*/
u8 hw_max_vasz_lg2;
/**
* @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
* might select a lower maximum OA.
*/
u8 hw_max_oasz_lg2;
};
/* Generate the exported function signatures from iommu_pt.h */
#define IOMMU_PROTOTYPES(fmt) \
phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
dma_addr_t iova); \
int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \
unsigned long iova, phys_addr_t paddr, \
size_t pgsize, size_t pgcount, \
int prot, gfp_t gfp, size_t *mapped); \
size_t pt_iommu_##fmt##_unmap_pages( \
struct iommu_domain *domain, unsigned long iova, \
size_t pgsize, size_t pgcount, \
struct iommu_iotlb_gather *iotlb_gather); \
int pt_iommu_##fmt##_read_and_clear_dirty( \
struct iommu_domain *domain, unsigned long iova, size_t size, \
unsigned long flags, struct iommu_dirty_bitmap *dirty); \
int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \
const struct pt_iommu_##fmt##_cfg *cfg, \
gfp_t gfp); \
void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \
struct pt_iommu_##fmt##_hw_info *info)
#define IOMMU_FORMAT(fmt, member) \
struct pt_iommu_##fmt { \
struct pt_iommu iommu; \
struct pt_##fmt member; \
}; \
IOMMU_PROTOTYPES(fmt)
/*
* A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
* iommu_pt
*/
#define IOMMU_PT_DOMAIN_OPS(fmt) \
.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
.map_pages = &pt_iommu_##fmt##_map_pages, \
.unmap_pages = &pt_iommu_##fmt##_unmap_pages
#define IOMMU_PT_DIRTY_OPS(fmt) \
.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
/*
* The driver should setup its domain struct like
* union {
* struct iommu_domain domain;
* struct pt_iommu_xxx xx;
* };
* PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
*
* Which creates an alias between driver_domain.domain and
* driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
* driver_domain.domain users.
*/
#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
static_assert(offsetof(s, pt_iommu_memb.domain) == \
offsetof(s, domain_memb))
struct pt_iommu_amdv1_cfg {
struct pt_iommu_cfg common;
unsigned int starting_level;
};
struct pt_iommu_amdv1_hw_info {
u64 host_pt_root;
u8 mode;
};
IOMMU_FORMAT(amdv1, amdpt);
/* amdv1_mock is used by the iommufd selftest */
#define pt_iommu_amdv1_mock pt_iommu_amdv1
#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
struct pt_iommu_amdv1_mock_hw_info;
IOMMU_PROTOTYPES(amdv1_mock);
struct pt_iommu_vtdss_cfg {
struct pt_iommu_cfg common;
/* 4 is a 57 bit 5 level table */
unsigned int top_level;
};
struct pt_iommu_vtdss_hw_info {
u64 ssptptr;
u8 aw;
};
IOMMU_FORMAT(vtdss, vtdss_pt);
struct pt_iommu_x86_64_cfg {
struct pt_iommu_cfg common;
/* 4 is a 57 bit 5 level table */
unsigned int top_level;
};
struct pt_iommu_x86_64_hw_info {
u64 gcr3_pt;
u8 levels;
};
IOMMU_FORMAT(x86_64, x86_64_pt);
#undef IOMMU_PROTOTYPES
#undef IOMMU_FORMAT
#endif