mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 01:20:14 +00:00
Core Changes:
- Add timeslicing and allocation restriction for SVM Driver Changes: - Fix shrinker debugfs name - Add HW workaround to Xe2 - Fix SVM when mixing GPU and CPU atomics - Fix per client engine utilization due to active contexts not saving timestamp with lite restore enabled. -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE6rM8lpABPHM5FqyDm6KlpjDL6lMFAmgmANsZHGx1Y2FzLmRl bWFyY2hpQGludGVsLmNvbQAKCRCboqWmMMvqU6XUD/9w+rT/q24scAAqS606JI45 zad5F6p+LEQAduDd96kUInhEQHOKpMm54DxMrNx9uKe+8uH0X57xJ0+N7Z4p44AB 99sAjdtO02QodgzoFcK5TOSPdS4CUp4ISmlITmJyuwM8naBXWY/pFcOnRk9ve4QY f//Oo2GcJs1KXjKiNnTbQAuIR2SAjvArMtb5zcKeS3S14/bLNXskGDOHEbOQFYRz 0jN65Lv24muslB+tc4WmB33oUSnbgmIkRcNPAWs98BGF4MvSGFk6cLuWCT16TwJg bIN+712OWs6dET27SC8ko98Tdmled1xkdoKLkAHyyV/UXuBtMEad+/2l3DbbgwEE vHcBpUstkCtUV1dRPUN8nC/TuC4m1WuhQnwYCLc13WuzvPo9s6/TzVtBSxkB1h7C jy7YwtTU6ock0IuDbso2IqlEQmySz0E1tNkGq2DeuJgJsbKP4Gnq5fmoJSjQ89oj Nnqh3yXnpFXuAIqpe90NwvOSWuwCVAV6Xu+sP/SZCDs36VIp7iWfs4CKpJJFOz+s 6nfZTg2nf0ztlg+qRbGYX3kVTLuzE8rLEvzE+ooiVcrpW72Gbu81zmrv6ygCPlJJ hd7yCjIcE7O4UhnP12Ff9bU6CtqYOz4MK5K/4RbMAtV/1nEXBxSYZier3oz5MIg0 N8Xep2y4JXRmOGwiQ6hiGg== =+U9w -----END PGP SIGNATURE----- Merge tag 'drm-xe-fixes-2025-05-15-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes Core Changes: - Add timeslicing and allocation restriction for SVM Driver Changes: - Fix shrinker debugfs name - Add HW workaround to Xe2 - Fix SVM when mixing GPU and CPU atomics - Fix per client engine utilization due to active contexts not saving timestamp with lite restore enabled. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://lore.kernel.org/r/qil4scyn6ucnt43u5ju64bi7r7n5r36k4pz5rsh2maz7isle6g@lac3jpsjrrvs
This commit is contained in:
commit
c81dbc490b
@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
|
||||
lockdep_assert_held(&gpusvm->notifier_lock);
|
||||
|
||||
if (range->flags.has_dma_mapping) {
|
||||
struct drm_gpusvm_range_flags flags = {
|
||||
.__flags = range->flags.__flags,
|
||||
};
|
||||
|
||||
for (i = 0, j = 0; i < npages; j++) {
|
||||
struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
|
||||
|
||||
@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
|
||||
dev, *addr);
|
||||
i += 1 << addr->order;
|
||||
}
|
||||
range->flags.has_devmem_pages = false;
|
||||
range->flags.has_dma_mapping = false;
|
||||
|
||||
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
|
||||
flags.has_devmem_pages = false;
|
||||
flags.has_dma_mapping = false;
|
||||
WRITE_ONCE(range->flags.__flags, flags.__flags);
|
||||
|
||||
range->dpagemap = NULL;
|
||||
}
|
||||
}
|
||||
@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
|
||||
int err = 0;
|
||||
struct dev_pagemap *pagemap;
|
||||
struct drm_pagemap *dpagemap;
|
||||
struct drm_gpusvm_range_flags flags;
|
||||
|
||||
retry:
|
||||
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
|
||||
@ -1378,7 +1387,8 @@ map_pages:
|
||||
*/
|
||||
drm_gpusvm_notifier_lock(gpusvm);
|
||||
|
||||
if (range->flags.unmapped) {
|
||||
flags.__flags = range->flags.__flags;
|
||||
if (flags.unmapped) {
|
||||
drm_gpusvm_notifier_unlock(gpusvm);
|
||||
err = -EFAULT;
|
||||
goto err_free;
|
||||
@ -1454,6 +1464,11 @@ map_pages:
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
if (ctx->devmem_only) {
|
||||
err = -EFAULT;
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
addr = dma_map_page(gpusvm->drm->dev,
|
||||
page, 0,
|
||||
PAGE_SIZE << order,
|
||||
@ -1469,14 +1484,17 @@ map_pages:
|
||||
}
|
||||
i += 1 << order;
|
||||
num_dma_mapped = i;
|
||||
range->flags.has_dma_mapping = true;
|
||||
flags.has_dma_mapping = true;
|
||||
}
|
||||
|
||||
if (zdd) {
|
||||
range->flags.has_devmem_pages = true;
|
||||
flags.has_devmem_pages = true;
|
||||
range->dpagemap = dpagemap;
|
||||
}
|
||||
|
||||
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
|
||||
WRITE_ONCE(range->flags.__flags, flags.__flags);
|
||||
|
||||
drm_gpusvm_notifier_unlock(gpusvm);
|
||||
kvfree(pfns);
|
||||
set_seqno:
|
||||
@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm,
|
||||
goto err_finalize;
|
||||
|
||||
/* Upon success bind devmem allocation to range and zdd */
|
||||
devmem_allocation->timeslice_expiration = get_jiffies_64() +
|
||||
msecs_to_jiffies(ctx->timeslice_ms);
|
||||
zdd->devmem_allocation = devmem_allocation; /* Owns ref */
|
||||
|
||||
err_finalize:
|
||||
@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
|
||||
void *buf;
|
||||
int i, err = 0;
|
||||
|
||||
if (page) {
|
||||
zdd = page->zone_device_data;
|
||||
if (time_before64(get_jiffies_64(),
|
||||
zdd->devmem_allocation->timeslice_expiration))
|
||||
return 0;
|
||||
}
|
||||
|
||||
start = ALIGN_DOWN(fault_addr, size);
|
||||
end = ALIGN(fault_addr + 1, size);
|
||||
|
||||
|
||||
@ -47,6 +47,10 @@
|
||||
#define MI_LRI_FORCE_POSTED REG_BIT(12)
|
||||
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
|
||||
|
||||
#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4))
|
||||
#define MI_SRM_USE_GGTT REG_BIT(22)
|
||||
#define MI_SRM_ADD_CS_OFFSET REG_BIT(19)
|
||||
|
||||
#define MI_FLUSH_DW __MI_INSTR(0x26)
|
||||
#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
|
||||
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
|
||||
|
||||
@ -43,6 +43,10 @@
|
||||
#define XEHPC_BCS8_RING_BASE 0x3ee000
|
||||
#define GSCCS_RING_BASE 0x11a000
|
||||
|
||||
#define ENGINE_ID(base) XE_REG((base) + 0x8c)
|
||||
#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4)
|
||||
#define ENGINE_CLASS_ID REG_GENMASK(2, 0)
|
||||
|
||||
#define RING_TAIL(base) XE_REG((base) + 0x30)
|
||||
#define TAIL_ADDR REG_GENMASK(20, 3)
|
||||
|
||||
@ -154,6 +158,7 @@
|
||||
#define STOP_RING REG_BIT(8)
|
||||
|
||||
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
|
||||
#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac)
|
||||
#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
|
||||
|
||||
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
|
||||
|
||||
@ -157,6 +157,7 @@
|
||||
#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
|
||||
|
||||
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
|
||||
#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12)
|
||||
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
|
||||
|
||||
#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED)
|
||||
|
||||
@ -11,7 +11,9 @@
|
||||
#define CTX_RING_TAIL (0x06 + 1)
|
||||
#define CTX_RING_START (0x08 + 1)
|
||||
#define CTX_RING_CTL (0x0a + 1)
|
||||
#define CTX_BB_PER_CTX_PTR (0x12 + 1)
|
||||
#define CTX_TIMESTAMP (0x22 + 1)
|
||||
#define CTX_TIMESTAMP_UDW (0x24 + 1)
|
||||
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
|
||||
#define CTX_PDP0_UDW (0x30 + 1)
|
||||
#define CTX_PDP0_LDW (0x32 + 1)
|
||||
|
||||
@ -330,6 +330,8 @@ struct xe_device {
|
||||
u8 has_sriov:1;
|
||||
/** @info.has_usm: Device has unified shared memory support */
|
||||
u8 has_usm:1;
|
||||
/** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
|
||||
u8 has_64bit_timestamp:1;
|
||||
/** @info.is_dgfx: is discrete device */
|
||||
u8 is_dgfx:1;
|
||||
/**
|
||||
|
||||
@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
|
||||
{
|
||||
struct xe_device *xe = gt_to_xe(q->gt);
|
||||
struct xe_lrc *lrc;
|
||||
u32 old_ts, new_ts;
|
||||
u64 old_ts, new_ts;
|
||||
int idx;
|
||||
|
||||
/*
|
||||
|
||||
@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
|
||||
return xe_sched_invalidate_job(job, 2);
|
||||
}
|
||||
|
||||
ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
|
||||
ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
|
||||
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
|
||||
|
||||
/*
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "xe_hw_fence.h"
|
||||
#include "xe_map.h"
|
||||
#include "xe_memirq.h"
|
||||
#include "xe_mmio.h"
|
||||
#include "xe_sriov.h"
|
||||
#include "xe_trace_lrc.h"
|
||||
#include "xe_vm.h"
|
||||
@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
|
||||
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
|
||||
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
|
||||
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
|
||||
#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
|
||||
#define LRC_PPHWSP_SIZE SZ_4K
|
||||
|
||||
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
|
||||
@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
|
||||
|
||||
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
/* The start seqno is stored in the driver-defined portion of PPHWSP */
|
||||
/* This is stored in the driver-defined portion of PPHWSP */
|
||||
return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
|
||||
}
|
||||
|
||||
@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
|
||||
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
|
||||
}
|
||||
|
||||
static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
|
||||
}
|
||||
|
||||
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
|
||||
}
|
||||
|
||||
static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
|
||||
}
|
||||
|
||||
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
/* Indirect ring state page is at the very end of LRC */
|
||||
@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs)
|
||||
DECL_MAP_ADDR_HELPERS(start_seqno)
|
||||
DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
|
||||
DECL_MAP_ADDR_HELPERS(ctx_timestamp)
|
||||
DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
|
||||
DECL_MAP_ADDR_HELPERS(parallel)
|
||||
DECL_MAP_ADDR_HELPERS(indirect_ring)
|
||||
DECL_MAP_ADDR_HELPERS(engine_id)
|
||||
|
||||
#undef DECL_MAP_ADDR_HELPERS
|
||||
|
||||
@ -742,19 +756,38 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
|
||||
return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
|
||||
* @lrc: Pointer to the lrc.
|
||||
*
|
||||
* Returns: ctx timestamp udw GGTT address
|
||||
*/
|
||||
u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
|
||||
{
|
||||
return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_lrc_ctx_timestamp() - Read ctx timestamp value
|
||||
* @lrc: Pointer to the lrc.
|
||||
*
|
||||
* Returns: ctx timestamp value
|
||||
*/
|
||||
u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
|
||||
u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
|
||||
{
|
||||
struct xe_device *xe = lrc_to_xe(lrc);
|
||||
struct iosys_map map;
|
||||
u32 ldw, udw = 0;
|
||||
|
||||
map = __xe_lrc_ctx_timestamp_map(lrc);
|
||||
return xe_map_read32(xe, &map);
|
||||
ldw = xe_map_read32(xe, &map);
|
||||
|
||||
if (xe->info.has_64bit_timestamp) {
|
||||
map = __xe_lrc_ctx_timestamp_udw_map(lrc);
|
||||
udw = xe_map_read32(xe, &map);
|
||||
}
|
||||
|
||||
return (u64)udw << 32 | ldw;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
|
||||
|
||||
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
|
||||
{
|
||||
u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
|
||||
u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
|
||||
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
|
||||
@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
|
||||
xe_bo_unpin(lrc->bo);
|
||||
xe_bo_unlock(lrc->bo);
|
||||
xe_bo_put(lrc->bo);
|
||||
xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
|
||||
}
|
||||
|
||||
/*
|
||||
* xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
|
||||
* context run ticks.
|
||||
* @lrc: Pointer to the lrc.
|
||||
*
|
||||
* Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
|
||||
* context, but only gets updated when the context switches out. In order to
|
||||
* check how long a context has been active before it switches out, two things
|
||||
* are required:
|
||||
*
|
||||
* (1) Determine if the context is running:
|
||||
* To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
|
||||
* the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
|
||||
* initialized. During a query, we just check for this value to determine if the
|
||||
* context is active. If the context switched out, it would overwrite this
|
||||
* location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
|
||||
* the last part of context restore, so reusing this LRC location will not
|
||||
* clobber anything.
|
||||
*
|
||||
* (2) Calculate the time that the context has been active for:
|
||||
* The CTX_TIMESTAMP ticks only when the context is active. If a context is
|
||||
* active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
|
||||
* While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
|
||||
* engine instance. Since we do not know which instance the context is running
|
||||
* on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
|
||||
* store it in the PPHSWP.
|
||||
*/
|
||||
#define CONTEXT_ACTIVE 1ULL
|
||||
static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
|
||||
{
|
||||
u32 *cmd;
|
||||
|
||||
cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
|
||||
|
||||
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
|
||||
*cmd++ = ENGINE_ID(0).addr;
|
||||
*cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
|
||||
*cmd++ = 0;
|
||||
|
||||
*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
|
||||
*cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
|
||||
*cmd++ = 0;
|
||||
*cmd++ = lower_32_bits(CONTEXT_ACTIVE);
|
||||
|
||||
if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
|
||||
*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
|
||||
*cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
|
||||
*cmd++ = 0;
|
||||
*cmd++ = upper_32_bits(CONTEXT_ACTIVE);
|
||||
}
|
||||
|
||||
*cmd++ = MI_BATCH_BUFFER_END;
|
||||
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
|
||||
xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
|
||||
|
||||
}
|
||||
|
||||
#define PVC_CTX_ASID (0x2e + 1)
|
||||
@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
||||
void *init_data = NULL;
|
||||
u32 arb_enable;
|
||||
u32 lrc_size;
|
||||
u32 bo_flags;
|
||||
int err;
|
||||
|
||||
kref_init(&lrc->refcount);
|
||||
lrc->gt = gt;
|
||||
lrc->flags = 0;
|
||||
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
|
||||
if (xe_gt_has_indirect_ring_state(gt))
|
||||
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
|
||||
|
||||
bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_GGTT_INVALIDATE;
|
||||
|
||||
/*
|
||||
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
|
||||
* via VM bind calls.
|
||||
*/
|
||||
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
|
||||
ttm_bo_type_kernel,
|
||||
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
|
||||
XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_GGTT_INVALIDATE);
|
||||
bo_flags);
|
||||
if (IS_ERR(lrc->bo))
|
||||
return PTR_ERR(lrc->bo);
|
||||
|
||||
lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
|
||||
ttm_bo_type_kernel,
|
||||
bo_flags);
|
||||
if (IS_ERR(lrc->bb_per_ctx_bo)) {
|
||||
err = PTR_ERR(lrc->bb_per_ctx_bo);
|
||||
goto err_lrc_finish;
|
||||
}
|
||||
|
||||
lrc->size = lrc_size;
|
||||
lrc->tile = gt_to_tile(hwe->gt);
|
||||
lrc->ring.size = ring_size;
|
||||
lrc->ring.tail = 0;
|
||||
lrc->ctx_timestamp = 0;
|
||||
|
||||
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
|
||||
hwe->fence_irq, hwe->name);
|
||||
@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
||||
xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
|
||||
_MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
|
||||
|
||||
lrc->ctx_timestamp = 0;
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
|
||||
if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
|
||||
|
||||
if (xe->info.has_asid && vm)
|
||||
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
|
||||
@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
||||
map = __xe_lrc_start_seqno_map(lrc);
|
||||
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
|
||||
|
||||
xe_lrc_setup_utilization(lrc);
|
||||
|
||||
return 0;
|
||||
|
||||
err_lrc_finish:
|
||||
@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
|
||||
return __xe_lrc_parallel_map(lrc);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_lrc_engine_id() - Read engine id value
|
||||
* @lrc: Pointer to the lrc.
|
||||
*
|
||||
* Returns: context id value
|
||||
*/
|
||||
static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
|
||||
{
|
||||
struct xe_device *xe = lrc_to_xe(lrc);
|
||||
struct iosys_map map;
|
||||
|
||||
map = __xe_lrc_engine_id_map(lrc);
|
||||
return xe_map_read32(xe, &map);
|
||||
}
|
||||
|
||||
static int instr_dw(u32 cmd_header)
|
||||
{
|
||||
/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
|
||||
@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
|
||||
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
|
||||
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
|
||||
snapshot->lrc_snapshot = NULL;
|
||||
snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
|
||||
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
|
||||
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
|
||||
return snapshot;
|
||||
}
|
||||
@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
|
||||
kfree(snapshot);
|
||||
}
|
||||
|
||||
static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
|
||||
{
|
||||
u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
|
||||
u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
|
||||
struct xe_hw_engine *hwe;
|
||||
u64 val;
|
||||
|
||||
hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
|
||||
if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
|
||||
"Unexpected engine class:instance %d:%d for context utilization\n",
|
||||
class, instance))
|
||||
return -1;
|
||||
|
||||
if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
|
||||
val = xe_mmio_read64_2x32(&hwe->gt->mmio,
|
||||
RING_CTX_TIMESTAMP(hwe->mmio_base));
|
||||
else
|
||||
val = xe_mmio_read32(&hwe->gt->mmio,
|
||||
RING_CTX_TIMESTAMP(hwe->mmio_base));
|
||||
|
||||
*reg_ctx_ts = val;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_lrc_update_timestamp() - Update ctx timestamp
|
||||
* @lrc: Pointer to the lrc.
|
||||
* @old_ts: Old timestamp value
|
||||
*
|
||||
* Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
|
||||
* update saved value.
|
||||
* update saved value. With support for active contexts, the calculation may be
|
||||
* slightly racy, so follow a read-again logic to ensure that the context is
|
||||
* still active before returning the right timestamp.
|
||||
*
|
||||
* Returns: New ctx timestamp value
|
||||
*/
|
||||
u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
|
||||
u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
|
||||
{
|
||||
u64 lrc_ts, reg_ts;
|
||||
u32 engine_id;
|
||||
|
||||
*old_ts = lrc->ctx_timestamp;
|
||||
|
||||
lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
|
||||
lrc_ts = xe_lrc_ctx_timestamp(lrc);
|
||||
/* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
|
||||
if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
|
||||
lrc->ctx_timestamp = lrc_ts;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (lrc_ts == CONTEXT_ACTIVE) {
|
||||
engine_id = xe_lrc_engine_id(lrc);
|
||||
if (!get_ctx_timestamp(lrc, engine_id, ®_ts))
|
||||
lrc->ctx_timestamp = reg_ts;
|
||||
|
||||
/* read lrc again to ensure context is still active */
|
||||
lrc_ts = xe_lrc_ctx_timestamp(lrc);
|
||||
}
|
||||
|
||||
/*
|
||||
* If context switched out, just use the lrc_ts. Note that this needs to
|
||||
* be a separate if condition.
|
||||
*/
|
||||
if (lrc_ts != CONTEXT_ACTIVE)
|
||||
lrc->ctx_timestamp = lrc_ts;
|
||||
|
||||
done:
|
||||
trace_xe_lrc_update_timestamp(lrc, *old_ts);
|
||||
|
||||
return lrc->ctx_timestamp;
|
||||
|
||||
@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
|
||||
void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
|
||||
|
||||
u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc);
|
||||
u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
|
||||
u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc);
|
||||
u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
|
||||
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
|
||||
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
|
||||
|
||||
@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
|
||||
*
|
||||
* Returns the current LRC timestamp
|
||||
*/
|
||||
u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts);
|
||||
u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts);
|
||||
|
||||
#endif
|
||||
|
||||
@ -25,8 +25,8 @@ struct xe_lrc {
|
||||
/** @size: size of lrc including any indirect ring state page */
|
||||
u32 size;
|
||||
|
||||
/** @tile: tile which this LRC belongs to */
|
||||
struct xe_tile *tile;
|
||||
/** @gt: gt which this LRC belongs to */
|
||||
struct xe_gt *gt;
|
||||
|
||||
/** @flags: LRC flags */
|
||||
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
|
||||
@ -52,7 +52,10 @@ struct xe_lrc {
|
||||
struct xe_hw_fence_ctx fence_ctx;
|
||||
|
||||
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
|
||||
u32 ctx_timestamp;
|
||||
u64 ctx_timestamp;
|
||||
|
||||
/** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
|
||||
struct xe_bo *bb_per_ctx_bo;
|
||||
};
|
||||
|
||||
struct xe_lrc_snapshot;
|
||||
|
||||
@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = {
|
||||
module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600);
|
||||
MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2");
|
||||
|
||||
module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444);
|
||||
MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault");
|
||||
|
||||
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
|
||||
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
|
||||
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
struct xe_modparam {
|
||||
bool force_execlist;
|
||||
bool probe_display;
|
||||
bool always_migrate_to_vram;
|
||||
u32 force_vram_bar_size;
|
||||
int guc_log_level;
|
||||
char *guc_firmware_path;
|
||||
|
||||
@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
|
||||
.has_indirect_ring_state = 1, \
|
||||
.has_range_tlb_invalidation = 1, \
|
||||
.has_usm = 1, \
|
||||
.has_64bit_timestamp = 1, \
|
||||
.va_bits = 48, \
|
||||
.vm_max_level = 4, \
|
||||
.hw_engine_mask = \
|
||||
@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe,
|
||||
|
||||
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
|
||||
xe->info.has_usm = graphics_desc->has_usm;
|
||||
xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
|
||||
|
||||
for_each_remote_tile(tile, xe, id) {
|
||||
int err;
|
||||
|
||||
@ -21,6 +21,7 @@ struct xe_graphics_desc {
|
||||
u8 has_indirect_ring_state:1;
|
||||
u8 has_range_tlb_invalidation:1;
|
||||
u8 has_usm:1;
|
||||
u8 has_64bit_timestamp:1;
|
||||
};
|
||||
|
||||
struct xe_media_desc {
|
||||
|
||||
@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm,
|
||||
}
|
||||
case DRM_GPUVA_OP_DRIVER:
|
||||
{
|
||||
/* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
|
||||
|
||||
if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
|
||||
op->map_range.range->tile_present |= BIT(tile->id);
|
||||
op->map_range.range->tile_invalidated &= ~BIT(tile->id);
|
||||
WRITE_ONCE(op->map_range.range->tile_present,
|
||||
op->map_range.range->tile_present |
|
||||
BIT(tile->id));
|
||||
WRITE_ONCE(op->map_range.range->tile_invalidated,
|
||||
op->map_range.range->tile_invalidated &
|
||||
~BIT(tile->id));
|
||||
} else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
|
||||
op->unmap_range.range->tile_present &= ~BIT(tile->id);
|
||||
WRITE_ONCE(op->unmap_range.range->tile_present,
|
||||
op->unmap_range.range->tile_present &
|
||||
~BIT(tile->id));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
|
||||
|
||||
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
|
||||
{
|
||||
dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT |
|
||||
MI_COPY_MEM_MEM_DST_GGTT;
|
||||
dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
|
||||
dw[i++] = RING_CTX_TIMESTAMP(0).addr;
|
||||
dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
|
||||
dw[i++] = 0;
|
||||
dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc);
|
||||
dw[i++] = 0;
|
||||
dw[i++] = MI_NOOP;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
|
||||
if (!shrinker)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
shrinker->shrink = shrinker_alloc(0, "xe system shrinker");
|
||||
shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique);
|
||||
if (!shrinker->shrink) {
|
||||
kfree(shrinker);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@ -15,8 +15,17 @@
|
||||
|
||||
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
|
||||
{
|
||||
/* Not reliable without notifier lock */
|
||||
return range->base.flags.has_devmem_pages;
|
||||
/*
|
||||
* Advisory only check whether the range is currently backed by VRAM
|
||||
* memory.
|
||||
*/
|
||||
|
||||
struct drm_gpusvm_range_flags flags = {
|
||||
/* Pairs with WRITE_ONCE in drm_gpusvm.c */
|
||||
.__flags = READ_ONCE(range->base.flags.__flags),
|
||||
};
|
||||
|
||||
return flags.has_devmem_pages;
|
||||
}
|
||||
|
||||
static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
|
||||
@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm)
|
||||
}
|
||||
|
||||
static bool xe_svm_range_is_valid(struct xe_svm_range *range,
|
||||
struct xe_tile *tile)
|
||||
struct xe_tile *tile,
|
||||
bool devmem_only)
|
||||
{
|
||||
return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
|
||||
/*
|
||||
* Advisory only check whether the range currently has a valid mapping,
|
||||
* READ_ONCE pairs with WRITE_ONCE in xe_pt.c
|
||||
*/
|
||||
return ((READ_ONCE(range->tile_present) &
|
||||
~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
|
||||
(!devmem_only || xe_svm_range_in_vram(range));
|
||||
}
|
||||
|
||||
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
|
||||
@ -712,6 +728,36 @@ unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool supports_4K_migration(struct xe_device *xe)
|
||||
{
|
||||
if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
|
||||
struct xe_vma *vma)
|
||||
{
|
||||
struct xe_vm *vm = range_to_vm(&range->base);
|
||||
u64 range_size = xe_svm_range_size(range);
|
||||
|
||||
if (!range->base.flags.migrate_devmem)
|
||||
return false;
|
||||
|
||||
if (xe_svm_range_in_vram(range)) {
|
||||
drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
|
||||
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_svm_handle_pagefault() - SVM handle page fault
|
||||
* @vm: The VM.
|
||||
@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
|
||||
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
|
||||
.check_pages_threshold = IS_DGFX(vm->xe) &&
|
||||
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
|
||||
.devmem_only = atomic && IS_DGFX(vm->xe) &&
|
||||
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
|
||||
.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
|
||||
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
|
||||
};
|
||||
struct xe_svm_range *range;
|
||||
struct drm_gpusvm_range *r;
|
||||
struct drm_exec exec;
|
||||
struct dma_fence *fence;
|
||||
int migrate_try_count = ctx.devmem_only ? 3 : 1;
|
||||
ktime_t end = 0;
|
||||
int err;
|
||||
|
||||
@ -758,24 +809,31 @@ retry:
|
||||
if (IS_ERR(r))
|
||||
return PTR_ERR(r);
|
||||
|
||||
if (ctx.devmem_only && !r->flags.migrate_devmem)
|
||||
return -EACCES;
|
||||
|
||||
range = to_xe_range(r);
|
||||
if (xe_svm_range_is_valid(range, tile))
|
||||
if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
|
||||
return 0;
|
||||
|
||||
range_debug(range, "PAGE FAULT");
|
||||
|
||||
/* XXX: Add migration policy, for now migrate range once */
|
||||
if (!range->skip_migrate && range->base.flags.migrate_devmem &&
|
||||
xe_svm_range_size(range) >= SZ_64K) {
|
||||
range->skip_migrate = true;
|
||||
|
||||
if (--migrate_try_count >= 0 &&
|
||||
xe_svm_range_needs_migrate_to_vram(range, vma)) {
|
||||
err = xe_svm_alloc_vram(vm, tile, range, &ctx);
|
||||
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
|
||||
if (err) {
|
||||
drm_dbg(&vm->xe->drm,
|
||||
"VRAM allocation failed, falling back to "
|
||||
"retrying fault, asid=%u, errno=%pe\n",
|
||||
vm->usm.asid, ERR_PTR(err));
|
||||
goto retry;
|
||||
if (migrate_try_count || !ctx.devmem_only) {
|
||||
drm_dbg(&vm->xe->drm,
|
||||
"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
|
||||
vm->usm.asid, ERR_PTR(err));
|
||||
goto retry;
|
||||
} else {
|
||||
drm_err(&vm->xe->drm,
|
||||
"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
|
||||
vm->usm.asid, ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -783,15 +841,23 @@ retry:
|
||||
err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
|
||||
/* Corner where CPU mappings have changed */
|
||||
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
|
||||
if (err == -EOPNOTSUPP) {
|
||||
range_debug(range, "PAGE FAULT - EVICT PAGES");
|
||||
drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
|
||||
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
|
||||
if (migrate_try_count > 0 || !ctx.devmem_only) {
|
||||
if (err == -EOPNOTSUPP) {
|
||||
range_debug(range, "PAGE FAULT - EVICT PAGES");
|
||||
drm_gpusvm_range_evict(&vm->svm.gpusvm,
|
||||
&range->base);
|
||||
}
|
||||
drm_dbg(&vm->xe->drm,
|
||||
"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
|
||||
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
|
||||
range_debug(range, "PAGE FAULT - RETRY PAGES");
|
||||
goto retry;
|
||||
} else {
|
||||
drm_err(&vm->xe->drm,
|
||||
"Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
|
||||
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
|
||||
}
|
||||
drm_dbg(&vm->xe->drm,
|
||||
"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
|
||||
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
|
||||
range_debug(range, "PAGE FAULT - RETRY PAGES");
|
||||
goto retry;
|
||||
}
|
||||
if (err) {
|
||||
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
|
||||
@ -815,6 +881,7 @@ retry_bind:
|
||||
drm_exec_fini(&exec);
|
||||
err = PTR_ERR(fence);
|
||||
if (err == -EAGAIN) {
|
||||
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
|
||||
range_debug(range, "PAGE FAULT - RETRY BIND");
|
||||
goto retry;
|
||||
}
|
||||
@ -825,9 +892,6 @@ retry_bind:
|
||||
}
|
||||
drm_exec_fini(&exec);
|
||||
|
||||
if (xe_modparam.always_migrate_to_vram)
|
||||
range->skip_migrate = false;
|
||||
|
||||
dma_fence_wait(fence, false);
|
||||
dma_fence_put(fence);
|
||||
|
||||
|
||||
@ -36,11 +36,6 @@ struct xe_svm_range {
|
||||
* range. Protected by GPU SVM notifier lock.
|
||||
*/
|
||||
u8 tile_invalidated;
|
||||
/**
|
||||
* @skip_migrate: Skip migration to VRAM, protected by GPU fault handler
|
||||
* locking.
|
||||
*/
|
||||
u8 skip_migrate :1;
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
|
||||
|
||||
@ -19,12 +19,12 @@
|
||||
#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
|
||||
|
||||
TRACE_EVENT(xe_lrc_update_timestamp,
|
||||
TP_PROTO(struct xe_lrc *lrc, uint32_t old),
|
||||
TP_PROTO(struct xe_lrc *lrc, uint64_t old),
|
||||
TP_ARGS(lrc, old),
|
||||
TP_STRUCT__entry(
|
||||
__field(struct xe_lrc *, lrc)
|
||||
__field(u32, old)
|
||||
__field(u32, new)
|
||||
__field(u64, old)
|
||||
__field(u64, new)
|
||||
__string(name, lrc->fence_ctx.name)
|
||||
__string(device_id, __dev_name_lrc(lrc))
|
||||
),
|
||||
@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp,
|
||||
__assign_str(name);
|
||||
__assign_str(device_id);
|
||||
),
|
||||
TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
|
||||
TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s",
|
||||
__entry->lrc, __get_str(name),
|
||||
__entry->old, __entry->new,
|
||||
__get_str(device_id))
|
||||
|
||||
@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
|
||||
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
|
||||
},
|
||||
{ XE_RTP_NAME("22021007897"),
|
||||
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
|
||||
},
|
||||
|
||||
/* Xe3_LPG */
|
||||
{ XE_RTP_NAME("14021490052"),
|
||||
|
||||
@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops {
|
||||
* @ops: Pointer to the operations structure for GPU SVM device memory
|
||||
* @dpagemap: The struct drm_pagemap of the pages this allocation belongs to.
|
||||
* @size: Size of device memory allocation
|
||||
* @timeslice_expiration: Timeslice expiration in jiffies
|
||||
*/
|
||||
struct drm_gpusvm_devmem {
|
||||
struct device *dev;
|
||||
@ -97,6 +98,7 @@ struct drm_gpusvm_devmem {
|
||||
const struct drm_gpusvm_devmem_ops *ops;
|
||||
struct drm_pagemap *dpagemap;
|
||||
size_t size;
|
||||
u64 timeslice_expiration;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -185,6 +187,31 @@ struct drm_gpusvm_notifier {
|
||||
} flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags
|
||||
*
|
||||
* @migrate_devmem: Flag indicating whether the range can be migrated to device memory
|
||||
* @unmapped: Flag indicating if the range has been unmapped
|
||||
* @partial_unmap: Flag indicating if the range has been partially unmapped
|
||||
* @has_devmem_pages: Flag indicating if the range has devmem pages
|
||||
* @has_dma_mapping: Flag indicating if the range has a DMA mapping
|
||||
* @__flags: Flags for range in u16 form (used for READ_ONCE)
|
||||
*/
|
||||
struct drm_gpusvm_range_flags {
|
||||
union {
|
||||
struct {
|
||||
/* All flags below must be set upon creation */
|
||||
u16 migrate_devmem : 1;
|
||||
/* All flags below must be set / cleared under notifier lock */
|
||||
u16 unmapped : 1;
|
||||
u16 partial_unmap : 1;
|
||||
u16 has_devmem_pages : 1;
|
||||
u16 has_dma_mapping : 1;
|
||||
};
|
||||
u16 __flags;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_gpusvm_range - Structure representing a GPU SVM range
|
||||
*
|
||||
@ -198,11 +225,6 @@ struct drm_gpusvm_notifier {
|
||||
* @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
|
||||
* Note this is assuming only one drm_pagemap per range is allowed.
|
||||
* @flags: Flags for range
|
||||
* @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory
|
||||
* @flags.unmapped: Flag indicating if the range has been unmapped
|
||||
* @flags.partial_unmap: Flag indicating if the range has been partially unmapped
|
||||
* @flags.has_devmem_pages: Flag indicating if the range has devmem pages
|
||||
* @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping
|
||||
*
|
||||
* This structure represents a GPU SVM range used for tracking memory ranges
|
||||
* mapped in a DRM device.
|
||||
@ -216,15 +238,7 @@ struct drm_gpusvm_range {
|
||||
unsigned long notifier_seq;
|
||||
struct drm_pagemap_device_addr *dma_addr;
|
||||
struct drm_pagemap *dpagemap;
|
||||
struct {
|
||||
/* All flags below must be set upon creation */
|
||||
u16 migrate_devmem : 1;
|
||||
/* All flags below must be set / cleared under notifier lock */
|
||||
u16 unmapped : 1;
|
||||
u16 partial_unmap : 1;
|
||||
u16 has_devmem_pages : 1;
|
||||
u16 has_dma_mapping : 1;
|
||||
} flags;
|
||||
struct drm_gpusvm_range_flags flags;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -283,17 +297,22 @@ struct drm_gpusvm {
|
||||
* @check_pages_threshold: Check CPU pages for present if chunk is less than or
|
||||
* equal to threshold. If not present, reduce chunk
|
||||
* size.
|
||||
* @timeslice_ms: The timeslice MS which in minimum time a piece of memory
|
||||
* remains with either exclusive GPU or CPU access.
|
||||
* @in_notifier: entering from a MMU notifier
|
||||
* @read_only: operating on read-only memory
|
||||
* @devmem_possible: possible to use device memory
|
||||
* @devmem_only: use only device memory
|
||||
*
|
||||
* Context that is DRM GPUSVM is operating in (i.e. user arguments).
|
||||
*/
|
||||
struct drm_gpusvm_ctx {
|
||||
unsigned long check_pages_threshold;
|
||||
unsigned long timeslice_ms;
|
||||
unsigned int in_notifier :1;
|
||||
unsigned int read_only :1;
|
||||
unsigned int devmem_possible :1;
|
||||
unsigned int devmem_only :1;
|
||||
};
|
||||
|
||||
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user