1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-12 01:20:14 +00:00

Core Changes:

- Add timeslicing and allocation restriction for SVM
 
 Driver Changes:
 - Fix shrinker debugfs name
 - Add HW workaround to Xe2
 - Fix SVM when mixing GPU and CPU atomics
 - Fix per client engine utilization due to active contexts
   not saving timestamp with lite restore enabled.
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE6rM8lpABPHM5FqyDm6KlpjDL6lMFAmgmANsZHGx1Y2FzLmRl
 bWFyY2hpQGludGVsLmNvbQAKCRCboqWmMMvqU6XUD/9w+rT/q24scAAqS606JI45
 zad5F6p+LEQAduDd96kUInhEQHOKpMm54DxMrNx9uKe+8uH0X57xJ0+N7Z4p44AB
 99sAjdtO02QodgzoFcK5TOSPdS4CUp4ISmlITmJyuwM8naBXWY/pFcOnRk9ve4QY
 f//Oo2GcJs1KXjKiNnTbQAuIR2SAjvArMtb5zcKeS3S14/bLNXskGDOHEbOQFYRz
 0jN65Lv24muslB+tc4WmB33oUSnbgmIkRcNPAWs98BGF4MvSGFk6cLuWCT16TwJg
 bIN+712OWs6dET27SC8ko98Tdmled1xkdoKLkAHyyV/UXuBtMEad+/2l3DbbgwEE
 vHcBpUstkCtUV1dRPUN8nC/TuC4m1WuhQnwYCLc13WuzvPo9s6/TzVtBSxkB1h7C
 jy7YwtTU6ock0IuDbso2IqlEQmySz0E1tNkGq2DeuJgJsbKP4Gnq5fmoJSjQ89oj
 Nnqh3yXnpFXuAIqpe90NwvOSWuwCVAV6Xu+sP/SZCDs36VIp7iWfs4CKpJJFOz+s
 6nfZTg2nf0ztlg+qRbGYX3kVTLuzE8rLEvzE+ooiVcrpW72Gbu81zmrv6ygCPlJJ
 hd7yCjIcE7O4UhnP12Ff9bU6CtqYOz4MK5K/4RbMAtV/1nEXBxSYZier3oz5MIg0
 N8Xep2y4JXRmOGwiQ6hiGg==
 =+U9w
 -----END PGP SIGNATURE-----

Merge tag 'drm-xe-fixes-2025-05-15-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Core Changes:
- Add timeslicing and allocation restriction for SVM

Driver Changes:
- Fix shrinker debugfs name
- Add HW workaround to Xe2
- Fix SVM when mixing GPU and CPU atomics
- Fix per client engine utilization due to active contexts
  not saving timestamp with lite restore enabled.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/qil4scyn6ucnt43u5ju64bi7r7n5r36k4pz5rsh2maz7isle6g@lac3jpsjrrvs
This commit is contained in:
Dave Airlie 2025-05-16 11:21:29 +10:00
commit c81dbc490b
23 changed files with 391 additions and 87 deletions

View File

@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
lockdep_assert_held(&gpusvm->notifier_lock);
if (range->flags.has_dma_mapping) {
struct drm_gpusvm_range_flags flags = {
.__flags = range->flags.__flags,
};
for (i = 0, j = 0; i < npages; j++) {
struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
dev, *addr);
i += 1 << addr->order;
}
range->flags.has_devmem_pages = false;
range->flags.has_dma_mapping = false;
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
flags.has_devmem_pages = false;
flags.has_dma_mapping = false;
WRITE_ONCE(range->flags.__flags, flags.__flags);
range->dpagemap = NULL;
}
}
@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
int err = 0;
struct dev_pagemap *pagemap;
struct drm_pagemap *dpagemap;
struct drm_gpusvm_range_flags flags;
retry:
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
@ -1378,7 +1387,8 @@ map_pages:
*/
drm_gpusvm_notifier_lock(gpusvm);
if (range->flags.unmapped) {
flags.__flags = range->flags.__flags;
if (flags.unmapped) {
drm_gpusvm_notifier_unlock(gpusvm);
err = -EFAULT;
goto err_free;
@ -1454,6 +1464,11 @@ map_pages:
goto err_unmap;
}
if (ctx->devmem_only) {
err = -EFAULT;
goto err_unmap;
}
addr = dma_map_page(gpusvm->drm->dev,
page, 0,
PAGE_SIZE << order,
@ -1469,14 +1484,17 @@ map_pages:
}
i += 1 << order;
num_dma_mapped = i;
range->flags.has_dma_mapping = true;
flags.has_dma_mapping = true;
}
if (zdd) {
range->flags.has_devmem_pages = true;
flags.has_devmem_pages = true;
range->dpagemap = dpagemap;
}
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
WRITE_ONCE(range->flags.__flags, flags.__flags);
drm_gpusvm_notifier_unlock(gpusvm);
kvfree(pfns);
set_seqno:
@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm,
goto err_finalize;
/* Upon success bind devmem allocation to range and zdd */
devmem_allocation->timeslice_expiration = get_jiffies_64() +
msecs_to_jiffies(ctx->timeslice_ms);
zdd->devmem_allocation = devmem_allocation; /* Owns ref */
err_finalize:
@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
void *buf;
int i, err = 0;
if (page) {
zdd = page->zone_device_data;
if (time_before64(get_jiffies_64(),
zdd->devmem_allocation->timeslice_expiration))
return 0;
}
start = ALIGN_DOWN(fault_addr, size);
end = ALIGN(fault_addr + 1, size);

View File

@ -47,6 +47,10 @@
#define MI_LRI_FORCE_POSTED REG_BIT(12)
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4))
#define MI_SRM_USE_GGTT REG_BIT(22)
#define MI_SRM_ADD_CS_OFFSET REG_BIT(19)
#define MI_FLUSH_DW __MI_INSTR(0x26)
#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)

View File

@ -43,6 +43,10 @@
#define XEHPC_BCS8_RING_BASE 0x3ee000
#define GSCCS_RING_BASE 0x11a000
#define ENGINE_ID(base) XE_REG((base) + 0x8c)
#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4)
#define ENGINE_CLASS_ID REG_GENMASK(2, 0)
#define RING_TAIL(base) XE_REG((base) + 0x30)
#define TAIL_ADDR REG_GENMASK(20, 3)
@ -154,6 +158,7 @@
#define STOP_RING REG_BIT(8)
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac)
#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)

View File

@ -157,6 +157,7 @@
#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12)
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED)

View File

@ -11,7 +11,9 @@
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_PER_CTX_PTR (0x12 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_TIMESTAMP_UDW (0x24 + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)

View File

@ -330,6 +330,8 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
/** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
u8 has_64bit_timestamp:1;
/** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
/**

View File

@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_lrc *lrc;
u32 old_ts, new_ts;
u64 old_ts, new_ts;
int idx;
/*

View File

@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
return xe_sched_invalidate_job(job, 2);
}
ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
/*

View File

@ -24,6 +24,7 @@
#include "xe_hw_fence.h"
#include "xe_map.h"
#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_trace_lrc.h"
#include "xe_vm.h"
@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
{
/* The start seqno is stored in the driver-defined portion of PPHWSP */
/* This is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
}
@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
}
static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
{
return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
}
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
{
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
}
static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
{
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
}
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
/* Indirect ring state page is at the very end of LRC */
@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
DECL_MAP_ADDR_HELPERS(ctx_timestamp)
DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
DECL_MAP_ADDR_HELPERS(parallel)
DECL_MAP_ADDR_HELPERS(indirect_ring)
DECL_MAP_ADDR_HELPERS(engine_id)
#undef DECL_MAP_ADDR_HELPERS
@ -742,19 +756,38 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
}
/**
* xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp udw GGTT address
*/
u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
}
/**
* xe_lrc_ctx_timestamp() - Read ctx timestamp value
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp value
*/
u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
u32 ldw, udw = 0;
map = __xe_lrc_ctx_timestamp_map(lrc);
return xe_map_read32(xe, &map);
ldw = xe_map_read32(xe, &map);
if (xe->info.has_64bit_timestamp) {
map = __xe_lrc_ctx_timestamp_udw_map(lrc);
udw = xe_map_read32(xe, &map);
}
return (u64)udw << 32 | ldw;
}
/**
@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
xe_bo_unpin(lrc->bo);
xe_bo_unlock(lrc->bo);
xe_bo_put(lrc->bo);
xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
}
/*
* xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
* context run ticks.
* @lrc: Pointer to the lrc.
*
* Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
* context, but only gets updated when the context switches out. In order to
* check how long a context has been active before it switches out, two things
* are required:
*
* (1) Determine if the context is running:
* To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
* the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
* initialized. During a query, we just check for this value to determine if the
* context is active. If the context switched out, it would overwrite this
* location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
* the last part of context restore, so reusing this LRC location will not
* clobber anything.
*
* (2) Calculate the time that the context has been active for:
* The CTX_TIMESTAMP ticks only when the context is active. If a context is
* active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
* While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
* engine instance. Since we do not know which instance the context is running
* on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
* store it in the PPHSWP.
*/
#define CONTEXT_ACTIVE 1ULL
static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
{
u32 *cmd;
cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
*cmd++ = ENGINE_ID(0).addr;
*cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
*cmd++ = 0;
*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
*cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
*cmd++ = 0;
*cmd++ = lower_32_bits(CONTEXT_ACTIVE);
if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
*cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
*cmd++ = 0;
*cmd++ = upper_32_bits(CONTEXT_ACTIVE);
}
*cmd++ = MI_BATCH_BUFFER_END;
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
}
#define PVC_CTX_ASID (0x2e + 1)
@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
u32 bo_flags;
int err;
kref_init(&lrc->refcount);
lrc->gt = gt;
lrc->flags = 0;
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE;
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
ttm_bo_type_kernel,
bo_flags);
if (IS_ERR(lrc->bb_per_ctx_bo)) {
err = PTR_ERR(lrc->bb_per_ctx_bo);
goto err_lrc_finish;
}
lrc->size = lrc_size;
lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
lrc->ctx_timestamp = 0;
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
hwe->fence_irq, hwe->name);
@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
_MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
lrc->ctx_timestamp = 0;
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
xe_lrc_setup_utilization(lrc);
return 0;
err_lrc_finish:
@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
return __xe_lrc_parallel_map(lrc);
}
/**
* xe_lrc_engine_id() - Read engine id value
* @lrc: Pointer to the lrc.
*
* Returns: context id value
*/
static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_engine_id_map(lrc);
return xe_map_read32(xe, &map);
}
static int instr_dw(u32 cmd_header)
{
/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
return snapshot;
}
@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
kfree(snapshot);
}
static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
{
u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
struct xe_hw_engine *hwe;
u64 val;
hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
"Unexpected engine class:instance %d:%d for context utilization\n",
class, instance))
return -1;
if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
val = xe_mmio_read64_2x32(&hwe->gt->mmio,
RING_CTX_TIMESTAMP(hwe->mmio_base));
else
val = xe_mmio_read32(&hwe->gt->mmio,
RING_CTX_TIMESTAMP(hwe->mmio_base));
*reg_ctx_ts = val;
return 0;
}
/**
* xe_lrc_update_timestamp() - Update ctx timestamp
* @lrc: Pointer to the lrc.
* @old_ts: Old timestamp value
*
* Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
* update saved value.
* update saved value. With support for active contexts, the calculation may be
* slightly racy, so follow a read-again logic to ensure that the context is
* still active before returning the right timestamp.
*
* Returns: New ctx timestamp value
*/
u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
{
u64 lrc_ts, reg_ts;
u32 engine_id;
*old_ts = lrc->ctx_timestamp;
lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
lrc_ts = xe_lrc_ctx_timestamp(lrc);
/* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
lrc->ctx_timestamp = lrc_ts;
goto done;
}
if (lrc_ts == CONTEXT_ACTIVE) {
engine_id = xe_lrc_engine_id(lrc);
if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
lrc->ctx_timestamp = reg_ts;
/* read lrc again to ensure context is still active */
lrc_ts = xe_lrc_ctx_timestamp(lrc);
}
/*
* If context switched out, just use the lrc_ts. Note that this needs to
* be a separate if condition.
*/
if (lrc_ts != CONTEXT_ACTIVE)
lrc->ctx_timestamp = lrc_ts;
done:
trace_xe_lrc_update_timestamp(lrc, *old_ts);
return lrc->ctx_timestamp;

View File

@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc);
u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
*
* Returns the current LRC timestamp
*/
u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts);
u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts);
#endif

View File

@ -25,8 +25,8 @@ struct xe_lrc {
/** @size: size of lrc including any indirect ring state page */
u32 size;
/** @tile: tile which this LRC belongs to */
struct xe_tile *tile;
/** @gt: gt which this LRC belongs to */
struct xe_gt *gt;
/** @flags: LRC flags */
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
@ -52,7 +52,10 @@ struct xe_lrc {
struct xe_hw_fence_ctx fence_ctx;
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
u32 ctx_timestamp;
u64 ctx_timestamp;
/** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;

View File

@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = {
module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600);
MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2");
module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444);
MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault");
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");

View File

@ -12,7 +12,6 @@
struct xe_modparam {
bool force_execlist;
bool probe_display;
bool always_migrate_to_vram;
u32 force_vram_bar_size;
int guc_log_level;
char *guc_firmware_path;

View File

@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
.has_64bit_timestamp = 1, \
.va_bits = 48, \
.vm_max_level = 4, \
.hw_engine_mask = \
@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
for_each_remote_tile(tile, xe, id) {
int err;

View File

@ -21,6 +21,7 @@ struct xe_graphics_desc {
u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1;
u8 has_usm:1;
u8 has_64bit_timestamp:1;
};
struct xe_media_desc {

View File

@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm,
}
case DRM_GPUVA_OP_DRIVER:
{
/* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
op->map_range.range->tile_present |= BIT(tile->id);
op->map_range.range->tile_invalidated &= ~BIT(tile->id);
WRITE_ONCE(op->map_range.range->tile_present,
op->map_range.range->tile_present |
BIT(tile->id));
WRITE_ONCE(op->map_range.range->tile_invalidated,
op->map_range.range->tile_invalidated &
~BIT(tile->id));
} else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
op->unmap_range.range->tile_present &= ~BIT(tile->id);
WRITE_ONCE(op->unmap_range.range->tile_present,
op->unmap_range.range->tile_present &
~BIT(tile->id));
}
break;
}

View File

@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
{
dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT |
MI_COPY_MEM_MEM_DST_GGTT;
dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
dw[i++] = RING_CTX_TIMESTAMP(0).addr;
dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
dw[i++] = MI_NOOP;
return i;
}

View File

@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
if (!shrinker)
return ERR_PTR(-ENOMEM);
shrinker->shrink = shrinker_alloc(0, "xe system shrinker");
shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique);
if (!shrinker->shrink) {
kfree(shrinker);
return ERR_PTR(-ENOMEM);

View File

@ -15,8 +15,17 @@
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
/* Not reliable without notifier lock */
return range->base.flags.has_devmem_pages;
/*
* Advisory only check whether the range is currently backed by VRAM
* memory.
*/
struct drm_gpusvm_range_flags flags = {
/* Pairs with WRITE_ONCE in drm_gpusvm.c */
.__flags = READ_ONCE(range->base.flags.__flags),
};
return flags.has_devmem_pages;
}
static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm)
}
static bool xe_svm_range_is_valid(struct xe_svm_range *range,
struct xe_tile *tile)
struct xe_tile *tile,
bool devmem_only)
{
return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
/*
* Advisory only check whether the range currently has a valid mapping,
* READ_ONCE pairs with WRITE_ONCE in xe_pt.c
*/
return ((READ_ONCE(range->tile_present) &
~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
(!devmem_only || xe_svm_range_in_vram(range));
}
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
@ -712,6 +728,36 @@ unlock:
return err;
}
static bool supports_4K_migration(struct xe_device *xe)
{
if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
return false;
return true;
}
static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
struct xe_vma *vma)
{
struct xe_vm *vm = range_to_vm(&range->base);
u64 range_size = xe_svm_range_size(range);
if (!range->base.flags.migrate_devmem)
return false;
if (xe_svm_range_in_vram(range)) {
drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
return false;
}
if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
return false;
}
return true;
}
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.check_pages_threshold = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
.devmem_only = atomic && IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
};
struct xe_svm_range *range;
struct drm_gpusvm_range *r;
struct drm_exec exec;
struct dma_fence *fence;
int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0;
int err;
@ -758,24 +809,31 @@ retry:
if (IS_ERR(r))
return PTR_ERR(r);
if (ctx.devmem_only && !r->flags.migrate_devmem)
return -EACCES;
range = to_xe_range(r);
if (xe_svm_range_is_valid(range, tile))
if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
return 0;
range_debug(range, "PAGE FAULT");
/* XXX: Add migration policy, for now migrate range once */
if (!range->skip_migrate && range->base.flags.migrate_devmem &&
xe_svm_range_size(range) >= SZ_64K) {
range->skip_migrate = true;
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma)) {
err = xe_svm_alloc_vram(vm, tile, range, &ctx);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
drm_dbg(&vm->xe->drm,
"VRAM allocation failed, falling back to "
"retrying fault, asid=%u, errno=%pe\n",
vm->usm.asid, ERR_PTR(err));
goto retry;
if (migrate_try_count || !ctx.devmem_only) {
drm_dbg(&vm->xe->drm,
"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
vm->usm.asid, ERR_PTR(err));
goto retry;
} else {
drm_err(&vm->xe->drm,
"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
vm->usm.asid, ERR_PTR(err));
return err;
}
}
}
@ -783,15 +841,23 @@ retry:
err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
/* Corner where CPU mappings have changed */
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
if (err == -EOPNOTSUPP) {
range_debug(range, "PAGE FAULT - EVICT PAGES");
drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (migrate_try_count > 0 || !ctx.devmem_only) {
if (err == -EOPNOTSUPP) {
range_debug(range, "PAGE FAULT - EVICT PAGES");
drm_gpusvm_range_evict(&vm->svm.gpusvm,
&range->base);
}
drm_dbg(&vm->xe->drm,
"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
range_debug(range, "PAGE FAULT - RETRY PAGES");
goto retry;
} else {
drm_err(&vm->xe->drm,
"Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
}
drm_dbg(&vm->xe->drm,
"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
range_debug(range, "PAGE FAULT - RETRY PAGES");
goto retry;
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
@ -815,6 +881,7 @@ retry_bind:
drm_exec_fini(&exec);
err = PTR_ERR(fence);
if (err == -EAGAIN) {
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
range_debug(range, "PAGE FAULT - RETRY BIND");
goto retry;
}
@ -825,9 +892,6 @@ retry_bind:
}
drm_exec_fini(&exec);
if (xe_modparam.always_migrate_to_vram)
range->skip_migrate = false;
dma_fence_wait(fence, false);
dma_fence_put(fence);

View File

@ -36,11 +36,6 @@ struct xe_svm_range {
* range. Protected by GPU SVM notifier lock.
*/
u8 tile_invalidated;
/**
* @skip_migrate: Skip migration to VRAM, protected by GPU fault handler
* locking.
*/
u8 skip_migrate :1;
};
#if IS_ENABLED(CONFIG_DRM_GPUSVM)

View File

@ -19,12 +19,12 @@
#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
TRACE_EVENT(xe_lrc_update_timestamp,
TP_PROTO(struct xe_lrc *lrc, uint32_t old),
TP_PROTO(struct xe_lrc *lrc, uint64_t old),
TP_ARGS(lrc, old),
TP_STRUCT__entry(
__field(struct xe_lrc *, lrc)
__field(u32, old)
__field(u32, new)
__field(u64, old)
__field(u64, new)
__string(name, lrc->fence_ctx.name)
__string(device_id, __dev_name_lrc(lrc))
),
@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp,
__assign_str(name);
__assign_str(device_id);
),
TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s",
__entry->lrc, __get_str(name),
__entry->old, __entry->new,
__get_str(device_id))

View File

@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
{ XE_RTP_NAME("22021007897"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
},
/* Xe3_LPG */
{ XE_RTP_NAME("14021490052"),

View File

@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops {
* @ops: Pointer to the operations structure for GPU SVM device memory
* @dpagemap: The struct drm_pagemap of the pages this allocation belongs to.
* @size: Size of device memory allocation
* @timeslice_expiration: Timeslice expiration in jiffies
*/
struct drm_gpusvm_devmem {
struct device *dev;
@ -97,6 +98,7 @@ struct drm_gpusvm_devmem {
const struct drm_gpusvm_devmem_ops *ops;
struct drm_pagemap *dpagemap;
size_t size;
u64 timeslice_expiration;
};
/**
@ -185,6 +187,31 @@ struct drm_gpusvm_notifier {
} flags;
};
/**
* struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags
*
* @migrate_devmem: Flag indicating whether the range can be migrated to device memory
* @unmapped: Flag indicating if the range has been unmapped
* @partial_unmap: Flag indicating if the range has been partially unmapped
* @has_devmem_pages: Flag indicating if the range has devmem pages
* @has_dma_mapping: Flag indicating if the range has a DMA mapping
* @__flags: Flags for range in u16 form (used for READ_ONCE)
*/
struct drm_gpusvm_range_flags {
union {
struct {
/* All flags below must be set upon creation */
u16 migrate_devmem : 1;
/* All flags below must be set / cleared under notifier lock */
u16 unmapped : 1;
u16 partial_unmap : 1;
u16 has_devmem_pages : 1;
u16 has_dma_mapping : 1;
};
u16 __flags;
};
};
/**
* struct drm_gpusvm_range - Structure representing a GPU SVM range
*
@ -198,11 +225,6 @@ struct drm_gpusvm_notifier {
* @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
* Note this is assuming only one drm_pagemap per range is allowed.
* @flags: Flags for range
* @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory
* @flags.unmapped: Flag indicating if the range has been unmapped
* @flags.partial_unmap: Flag indicating if the range has been partially unmapped
* @flags.has_devmem_pages: Flag indicating if the range has devmem pages
* @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping
*
* This structure represents a GPU SVM range used for tracking memory ranges
* mapped in a DRM device.
@ -216,15 +238,7 @@ struct drm_gpusvm_range {
unsigned long notifier_seq;
struct drm_pagemap_device_addr *dma_addr;
struct drm_pagemap *dpagemap;
struct {
/* All flags below must be set upon creation */
u16 migrate_devmem : 1;
/* All flags below must be set / cleared under notifier lock */
u16 unmapped : 1;
u16 partial_unmap : 1;
u16 has_devmem_pages : 1;
u16 has_dma_mapping : 1;
} flags;
struct drm_gpusvm_range_flags flags;
};
/**
@ -283,17 +297,22 @@ struct drm_gpusvm {
* @check_pages_threshold: Check CPU pages for present if chunk is less than or
* equal to threshold. If not present, reduce chunk
* size.
* @timeslice_ms: The timeslice MS which in minimum time a piece of memory
* remains with either exclusive GPU or CPU access.
* @in_notifier: entering from a MMU notifier
* @read_only: operating on read-only memory
* @devmem_possible: possible to use device memory
* @devmem_only: use only device memory
*
* Context that is DRM GPUSVM is operating in (i.e. user arguments).
*/
struct drm_gpusvm_ctx {
unsigned long check_pages_threshold;
unsigned long timeslice_ms;
unsigned int in_notifier :1;
unsigned int read_only :1;
unsigned int devmem_possible :1;
unsigned int devmem_only :1;
};
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,