[PATCH] drm/xe: Replace the scratch pages with NULL PTEs

Thu Dec 7 17:10:56 UTC 2023

On 12/7/23 17:42, Lucas De Marchi wrote:
> On Thu, Dec 07, 2023 at 05:25:01PM +0100, Thomas Hellström wrote:
>> + intel-xe cc.
>>
>>
>> On 12/7/23 17:17, Lucas De Marchi wrote:
>>> On Thu, Dec 07, 2023 at 04:44:23PM +0100, Thomas Hellström wrote:
>>>> Replace the scratch pages with NULL PTEs that return 0 when reading 
>>>> and
>>>> do nothing when writing.
>>>
>>> but don't we want to keep scratch so we can check when things go wrong
>>> and writes go where they shouldn't?
>>
>> The argument against that was that apps that incorrectly writes to 
>> scratch and then reads back the same value wouldn't notice that it 
>> did something wrong, and the case you mention would to some extent be 
>> captured by not having a scratch page at all and check for pagefaults?
>
> maybe... But if we have a null pte where we had scratch mapped, aren't we
> actually masking the pagefault? What about platforms where we don't have
> pagefault? Relying on pagefault alone I think would actually be: use a
> null pte to account for HW prefetch where applicable, but don't use it
> elsewhere, and only if we support pagefault on that platform. But I
> may be misunderstanding to what degree this patch is replacing scratch
> since I see you kept xe_vm_has_scratch().

With the current uAPI we're by default setting unmapped PTEs to 0, but 
UMD can at VM creation request scratch PTEs instead, they are then 
resolving to a single RW page.

What this patch does (if it weren't buggy) is to change the latter to 
point to the NULL PTEs that read 0 and writes nothing, mainly for the 
purpose of (buggy?) 3D workloads that expect reading 0 if they access an 
unmapped area....

But with the default (no scrach) behaviour any  HW prefetch outside a 
mapped area would cause, I figure, at least an unrecoverable GPU 
pagefault or IOMMU pagefault, so I figure UMDs are typically expected to 
over-allocate to account for that?

/Thomas

>
> Lucas De Marchi
>
>
>>
>> I admit I haven't been following the discussions around scratch pages 
>> long enough to know the historical uses for it. Cc'ing Joonas for 
>> further discussion.
>>
>> /Thomas
>>
>>
>>>
>>> Lucas De Marchi
>>>
>>>>
>>>> Cc: Brian Welty <brian.welty at intel.com>
>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>>>> ---
>>>> drivers/gpu/drm/xe/xe_pt.c       | 68 ++++----------------------------
>>>> drivers/gpu/drm/xe/xe_pt.h       |  3 --
>>>> drivers/gpu/drm/xe/xe_vm.c       | 45 +--------------------
>>>> drivers/gpu/drm/xe/xe_vm.h       |  5 +++
>>>> drivers/gpu/drm/xe/xe_vm_types.h |  2 -
>>>> 5 files changed, 14 insertions(+), 109 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>>> index 35bd7940a571..ce2846d56b04 100644
>>>> --- a/drivers/gpu/drm/xe/xe_pt.c
>>>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>>>> @@ -50,17 +50,14 @@ static struct xe_pt *xe_pt_entry(struct 
>>>> xe_pt_dir *pt_dir, unsigned int index)
>>>> static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
>>>>                  unsigned int level)
>>>> {
>>>> -    u16 pat_index = tile_to_xe(tile)->pat.idx[XE_CACHE_WB];
>>>> -    u8 id = tile->id;
>>>> +    struct xe_device *xe = tile_to_xe(tile);
>>>> +    u16 pat_index = xe->pat.idx[XE_CACHE_WB];
>>>>
>>>> -    if (!vm->scratch_bo[id])
>>>> +    if (!xe_vm_has_scratch(vm))
>>>>         return 0;
>>>>
>>>> -    if (level > 0)
>>>> -        return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level 
>>>> - 1]->bo,
>>>> -                         0, pat_index);
>>>> -
>>>> -    return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, 
>>>> pat_index, 0);
>>>> +    return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, 
>>>> IS_DGFX(xe),
>>>> +                       0) | XE_PTE_NULL;
>>>> }
>>>>
>>>> /**
>>>> @@ -135,7 +132,7 @@ void xe_pt_populate_empty(struct xe_tile *tile, 
>>>> struct xe_vm *vm,
>>>>     u64 empty;
>>>>     int i;
>>>>
>>>> -    if (!vm->scratch_bo[tile->id]) {
>>>> +    if (!xe_vm_has_scratch(vm)) {
>>>>         /*
>>>>          * FIXME: Some memory is allocated already allocated to zero?
>>>>          * Find out which memory that is and avoid this memset...
>>>> @@ -194,57 +191,6 @@ void xe_pt_destroy(struct xe_pt *pt, u32 
>>>> flags, struct llist_head *deferred)
>>>>     kfree(pt);
>>>> }
>>>>
>>>> -/**
>>>> - * xe_pt_create_scratch() - Setup a scratch memory pagetable tree 
>>>> for the
>>>> - * given tile and vm.
>>>> - * @xe: xe device.
>>>> - * @tile: tile to set up for.
>>>> - * @vm: vm to set up for.
>>>> - *
>>>> - * Sets up a pagetable tree with one page-table per level and a 
>>>> single
>>>> - * leaf bo. All pagetable entries point to the single page-table or,
>>>> - * for L0, the single bo one level below.
>>>> - *
>>>> - * Return: 0 on success, negative error code on error.
>>>> - */
>>>> -int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
>>>> -             struct xe_vm *vm)
>>>> -{
>>>> -    u8 id = tile->id;
>>>> -    unsigned int flags;
>>>> -    int i;
>>>> -
>>>> -    /*
>>>> -     * So we don't need to worry about 64K TLB hints when dealing 
>>>> with
>>>> -     * scratch entires, rather keep the scratch page in system 
>>>> memory on
>>>> -     * platforms where 64K pages are needed for VRAM.
>>>> -     */
>>>> -    flags = XE_BO_CREATE_PINNED_BIT;
>>>> -    if (vm->flags & XE_VM_FLAG_64K)
>>>> -        flags |= XE_BO_CREATE_SYSTEM_BIT;
>>>> -    else
>>>> -        flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile);
>>>> -
>>>> -    vm->scratch_bo[id] = xe_bo_create_pin_map(xe, tile, vm, SZ_4K,
>>>> -                          ttm_bo_type_kernel,
>>>> -                          flags);
>>>> -    if (IS_ERR(vm->scratch_bo[id]))
>>>> -        return PTR_ERR(vm->scratch_bo[id]);
>>>> -
>>>> -    xe_map_memset(vm->xe, &vm->scratch_bo[id]->vmap, 0, 0,
>>>> -              vm->scratch_bo[id]->size);
>>>> -
>>>> -    for (i = 0; i < vm->pt_root[id]->level; i++) {
>>>> -        vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
>>>> -        if (IS_ERR(vm->scratch_pt[id][i]))
>>>> -            return PTR_ERR(vm->scratch_pt[id][i]);
>>>> -
>>>> -        xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
>>>> -    }
>>>> -
>>>> -    return 0;
>>>> -}
>>>> -
>>>> /**
>>>>  * DOC: Pagetable building
>>>>  *
>>>> @@ -1286,7 +1232,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct 
>>>> xe_vma *vma, struct xe_exec_queue
>>>>      * it needs to be done here.
>>>>      */
>>>>     if ((rebind && !xe_vm_in_lr_mode(vm) && 
>>>> !vm->batch_invalidate_tlb) ||
>>>> -        (!rebind && vm->scratch_bo[tile->id] && 
>>>> xe_vm_in_preempt_fence_mode(vm))) {
>>>> +        (!rebind && xe_vm_has_scratch(vm) && 
>>>> xe_vm_in_preempt_fence_mode(vm))) {
>>>>         ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
>>>>         if (!ifence)
>>>>             return ERR_PTR(-ENOMEM);
>>>> diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
>>>> index d5460e58dbbf..6f7359ac55bd 100644
>>>> --- a/drivers/gpu/drm/xe/xe_pt.h
>>>> +++ b/drivers/gpu/drm/xe/xe_pt.h
>>>> @@ -26,9 +26,6 @@ unsigned int xe_pt_shift(unsigned int level);
>>>> struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
>>>>                unsigned int level);
>>>>
>>>> -int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
>>>> -             struct xe_vm *vm);
>>>> -
>>>> void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
>>>>               struct xe_pt *pt);
>>>>
>>>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>>>> index e09050f16f07..868b9a2a5d02 100644
>>>> --- a/drivers/gpu/drm/xe/xe_vm.c
>>>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>>>> @@ -1332,7 +1332,7 @@ static void vm_destroy_work_func(struct 
>>>> work_struct *w);
>>>> struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>>>> {
>>>>     struct xe_vm *vm;
>>>> -    int err, i = 0, number_tiles = 0;
>>>> +    int err, number_tiles = 0;
>>>>     struct xe_tile *tile;
>>>>     u8 id;
>>>>
>>>> @@ -1397,17 +1397,8 @@ struct xe_vm *xe_vm_create(struct xe_device 
>>>> *xe, u32 flags)
>>>>         }
>>>>     }
>>>>
>>>> -    if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
>>>> -        for_each_tile(tile, xe, id) {
>>>> -            if (!vm->pt_root[id])
>>>> -                continue;
>>>> -
>>>> -            err = xe_pt_create_scratch(xe, tile, vm);
>>>> -            if (err)
>>>> -                goto err_scratch_pt;
>>>> -        }
>>>> +    if (xe_vm_has_scratch(vm))
>>>>         vm->batch_invalidate_tlb = true;
>>>> -    }
>>>>
>>>>     if (flags & XE_VM_FLAG_LR_MODE) {
>>>>         INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
>>>> @@ -1415,7 +1406,6 @@ struct xe_vm *xe_vm_create(struct xe_device 
>>>> *xe, u32 flags)
>>>>         vm->batch_invalidate_tlb = false;
>>>>     }
>>>>
>>>> -    /* Fill pt_root after allocating scratch tables */
>>>>     for_each_tile(tile, xe, id) {
>>>>         if (!vm->pt_root[id])
>>>>             continue;
>>>> @@ -1465,19 +1455,6 @@ struct xe_vm *xe_vm_create(struct xe_device 
>>>> *xe, u32 flags)
>>>>
>>>>     return vm;
>>>>
>>>> -err_scratch_pt:
>>>> -    for_each_tile(tile, xe, id) {
>>>> -        if (!vm->pt_root[id])
>>>> -            continue;
>>>> -
>>>> -        i = vm->pt_root[id]->level;
>>>> -        while (i)
>>>> -            if (vm->scratch_pt[id][--i])
>>>> -                xe_pt_destroy(vm->scratch_pt[id][i],
>>>> -                          vm->flags, NULL);
>>>> -        xe_bo_unpin(vm->scratch_bo[id]);
>>>> -        xe_bo_put(vm->scratch_bo[id]);
>>>> -    }
>>>> err_destroy_root:
>>>>     for_each_tile(tile, xe, id) {
>>>>         if (vm->pt_root[id])
>>>> @@ -1556,24 +1533,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>>>>         vma->gpuva.flags |= XE_VMA_DESTROYED;
>>>>     }
>>>>
>>>> -    /*
>>>> -     * All vm operations will add shared fences to resv.
>>>> -     * The only exception is eviction for a shared object,
>>>> -     * but even so, the unbind when evicted would still
>>>> -     * install a fence to resv. Hence it's safe to
>>>> -     * destroy the pagetables immediately.
>>>> -     */
>>>> -    for_each_tile(tile, xe, id) {
>>>> -        if (vm->scratch_bo[id]) {
>>>> -            u32 i;
>>>> -
>>>> -            xe_bo_unpin(vm->scratch_bo[id]);
>>>> -            xe_bo_put(vm->scratch_bo[id]);
>>>> -            for (i = 0; i < vm->pt_root[id]->level; i++)
>>>> -                xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
>>>> -                          NULL);
>>>> -        }
>>>> -    }
>>>>     xe_vm_unlock(vm);
>>>>
>>>>     /*
>>>> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
>>>> index 9a0ae19c47b7..b92d8f54011d 100644
>>>> --- a/drivers/gpu/drm/xe/xe_vm.h
>>>> +++ b/drivers/gpu/drm/xe/xe_vm.h
>>>> @@ -61,6 +61,11 @@ static inline bool 
>>>> xe_vm_is_closed_or_banned(struct xe_vm *vm)
>>>>     return xe_vm_is_closed(vm) || xe_vm_is_banned(vm);
>>>> }
>>>>
>>>> +static inline bool xe_vm_has_scratch(const struct xe_vm *vm)
>>>> +{
>>>> +    return vm->flags & XE_VM_FLAG_SCRATCH_PAGE;
>>>> +}
>>>> +
>>>> struct xe_vma *
>>>> xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
>>>>
>>>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h 
>>>> b/drivers/gpu/drm/xe/xe_vm_types.h
>>>> index 23abdfd8622f..a5762e99e6aa 100644
>>>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>>>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>>>> @@ -158,8 +158,6 @@ struct xe_vm {
>>>>     u64 size;
>>>>
>>>>     struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
>>>> -    struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
>>>> -    struct xe_pt 
>>>> *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
>>>>
>>>>     /**
>>>>      * @flags: flags for this VM, statically setup a creation time 
>>>> aside
>>>> -- 2.42.0
>>>>