[PATCH 4/5] drm/amdgpu: Support page directory update via CPU
zhoucm1
david1.zhou at amd.com
Wed May 17 01:54:52 UTC 2017
On 2017年05月17日 05:02, Kasiviswanathan, Harish wrote:
>
>
> -----Original Message-----
> From: Zhou, David(ChunMing)
> Sent: Monday, May 15, 2017 10:50 PM
> To: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>;
> amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
>
>
>
> On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
> > If amdgpu.vm_update_context param is set to use CPU, then Page
> > Directories will be updated by CPU instead of SDMA
> >
> > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151
> ++++++++++++++++++++++++---------
> > 1 file changed, 109 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 9c89cb2..d72a624 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct
> amdgpu_device *adev,
> > uint64_t saddr, uint64_t eaddr,
> > unsigned level)
> > {
> > + u64 flags;
> > unsigned shift = (adev->vm_manager.num_level - level) *
> > adev->vm_manager.block_size;
> > unsigned pt_idx, from, to;
> > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct
> amdgpu_device *adev,
> > saddr = saddr & ((1 << shift) - 1);
> > eaddr = eaddr & ((1 << shift) - 1);
> >
> > + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > + AMDGPU_GEM_CREATE_VRAM_CLEARED;
> > + if (vm->use_cpu_for_update)
> > + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> I think shadow flag is need for CPU case as well, which is used to
> backup VM bo and meaningful when gpu reset.
> same comment for pd bo.
>
> [HK]: Yes support for shadow BOs are desirable and it could be
> implemented as a separate commit. For supporting shadow BOs the caller
> should explicitly add shadow BOs into ttm_eu_reserve_buffer(..) to
> remove the BO from TTM swap list or ttm_bo_kmap has to be modified.
> This implementation for CPU update of VM page tables is mainly for KFD
> usage. Graphics will use for experimental and testing purpose. From
> KFD's view point shadow BO are not useful because if GPU is reset then
> all queue information is lost (since submissions are done by user
> space) and it is not possible to recover.
Either way is fine to me.
David Zhou
>
> Regards,
> David Zhou
> > + else
> > + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > + AMDGPU_GEM_CREATE_SHADOW);
> > +
> > /* walk over the address space and allocate the page tables */
> > for (pt_idx = from; pt_idx <= to; ++pt_idx) {
> > struct reservation_object *resv = vm->root.bo->tbo.resv;
> > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct
> amdgpu_device *adev,
> > amdgpu_vm_bo_size(adev, level),
> > AMDGPU_GPU_PAGE_SIZE, true,
> > AMDGPU_GEM_DOMAIN_VRAM,
> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > - AMDGPU_GEM_CREATE_SHADOW |
> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> > + flags,
> > NULL, resv, &pt);
> > if (r)
> > return r;
> > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const
> dma_addr_t *pages_addr, uint64_t addr)
> > return result;
> > }
> >
> > +/**
> > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> > + *
> > + * @params: see amdgpu_pte_update_params definition
> > + * @pe: kmap addr of the page entry
> > + * @addr: dst addr to write into pe
> > + * @count: number of page entries to update
> > + * @incr: increase next addr by incr bytes
> > + * @flags: hw access flags
> > + */
> > +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params
> *params,
> > + uint64_t pe, uint64_t addr,
> > + unsigned count, uint32_t incr,
> > + uint64_t flags)
> > +{
> > + unsigned int i;
> > +
> > + for (i = 0; i < count; i++) {
> > + amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> > + i, addr, flags);
> > + addr += incr;
> > + }
> > +
> > + mb();
> > + amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> > +}
> > +
> > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct
> amdgpu_bo *bo)
> > +{
> > + struct amdgpu_sync sync;
> > +
> > + amdgpu_sync_create(&sync);
> > + amdgpu_sync_resv(adev, &sync, bo->tbo.resv,
> AMDGPU_FENCE_OWNER_VM);
> > + amdgpu_sync_wait(&sync);
> > + amdgpu_sync_free(&sync);
> > +}
> > +
> > /*
> > * amdgpu_vm_update_level - update a single level in the hierarchy
> > *
> > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct
> amdgpu_device *adev,
> >
> > if (!parent->entries)
> > return 0;
> > - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
> >
> > - /* padding, etc. */
> > - ndw = 64;
> > + memset(¶ms, 0, sizeof(params));
> > + params.adev = adev;
> > + shadow = parent->bo->shadow;
> >
> > - /* assume the worst case */
> > - ndw += parent->last_entry_used * 6;
> > + WARN_ON(vm->use_cpu_for_update && shadow);
> > + if (vm->use_cpu_for_update && !shadow) {
> > + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> > + if (r)
> > + return r;
> > + amdgpu_vm_bo_wait(adev, parent->bo);
> > + params.func = amdgpu_vm_cpu_set_ptes;
> > + } else {
> > + if (shadow) {
> > + r = amdgpu_ttm_bind(&shadow->tbo,
> &shadow->tbo.mem);
> > + if (r)
> > + return r;
> > + }
> > + ring = container_of(vm->entity.sched, struct amdgpu_ring,
> > + sched);
> >
> > - pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> > + /* padding, etc. */
> > + ndw = 64;
> >
> > - shadow = parent->bo->shadow;
> > - if (shadow) {
> > - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> > + /* assume the worst case */
> > + ndw += parent->last_entry_used * 6;
> > +
> > + pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> > +
> > + if (shadow) {
> > + shadow_addr = amdgpu_bo_gpu_offset(shadow);
> > + ndw *= 2;
> > + } else {
> > + shadow_addr = 0;
> > + }
> > +
> > + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> > if (r)
> > return r;
> > - shadow_addr = amdgpu_bo_gpu_offset(shadow);
> > - ndw *= 2;
> > - } else {
> > - shadow_addr = 0;
> > - }
> >
> > - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> > - if (r)
> > - return r;
> > + params.ib = &job->ibs[0];
> > + params.func = amdgpu_vm_do_set_ptes;
> > + }
> >
> > - memset(¶ms, 0, sizeof(params));
> > - params.adev = adev;
> > - params.ib = &job->ibs[0];
> >
> > /* walk over the address space and update the directory */
> > for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct
> amdgpu_device *adev,
> > amdgpu_vm_adjust_mc_addr(adev, last_pt);
> >
> > if (shadow)
> > - amdgpu_vm_do_set_ptes(¶ms,
> > - last_shadow,
> > - pt_addr, count,
> > - incr,
> > - AMDGPU_PTE_VALID);
> > -
> > - amdgpu_vm_do_set_ptes(¶ms, last_pde,
> > - pt_addr, count, incr,
> > - AMDGPU_PTE_VALID);
> > + params.func(¶ms,
> > + last_shadow,
> > + pt_addr, count,
> > + incr,
> > + AMDGPU_PTE_VALID);
> > +
> > + params.func(¶ms, last_pde,
> > + pt_addr, count, incr,
> > + AMDGPU_PTE_VALID);
> > }
> >
> > count = 1;
> > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct
> amdgpu_device *adev,
> > uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev,
> last_pt);
> >
> > if (vm->root.bo->shadow)
> > - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr,
> > - count, incr,
> AMDGPU_PTE_VALID);
> > + params.func(¶ms, last_shadow, pt_addr,
> > + count, incr, AMDGPU_PTE_VALID);
> >
> > - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr,
> > - count, incr, AMDGPU_PTE_VALID);
> > + params.func(¶ms, last_pde, pt_addr,
> > + count, incr, AMDGPU_PTE_VALID);
> > }
> >
> > - if (params.ib->length_dw == 0) {
> > + if (params.func == amdgpu_vm_cpu_set_ptes)
> > + amdgpu_bo_kunmap(parent->bo);
> > + else if (params.ib->length_dw == 0) {
> > amdgpu_job_free(job);
> > } else {
> > amdgpu_ring_pad_ib(ring, params.ib);
> > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> > struct amdgpu_ring *ring;
> > struct amd_sched_rq *rq;
> > int r, i;
> > + u64 flags;
> >
> > vm->va = RB_ROOT;
> > vm->client_id =
> atomic64_inc_return(&adev->vm_manager.client_counter);
> > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
> > "CPU update of VM recommended only for large BAR
> system\n");
> > vm->last_dir_update = NULL;
> >
> > + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > + AMDGPU_GEM_CREATE_VRAM_CLEARED;
> > + if (vm->use_cpu_for_update)
> > + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> > + else
> > + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > + AMDGPU_GEM_CREATE_SHADOW);
> > +
> > r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align,
> true,
> > AMDGPU_GEM_DOMAIN_VRAM,
> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > - AMDGPU_GEM_CREATE_SHADOW |
> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> > + flags,
> > NULL, NULL, &vm->root.bo);
> > if (r)
> > goto error_free_sched_entity;
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170517/c3f0fc26/attachment-0001.html>
More information about the amd-gfx
mailing list