[PATCH 5/8] drm/amdgpu: new VM update backends

Mon Mar 25 11:38:16 UTC 2019

Am 20.03.19 um 12:57 schrieb Kuehling, Felix:
> As far as I can tell, the whole series is a small cleanup and big
> refactor to enable CPU clearing of PTs without a lot of ugliness or code
> duplication.

It's a bit more than that. Key point is that I can now easily add a 
parameter for direct submission during page fault handling :)

Christian.

> It looks good to me. I haven't reviewed all the moved SDMA
> update code to make sure it all works correctly, but at least the
> prepare and commit functions look sane to me.
>
> For this patch I have a suggestion (inline) to remove params->ib, which
> seems redundant with params->job. That would also ripple through the
> remaining patches.
>
> Other than that, the series is Reviewed-by: Felix Kuehling
> <Felix.Kuehling at amd.com>
>
> [+Kent], Look out for this patch series in an upcoming merge to
> amd-kfd-staging. I don't think it'll cause conflicts, but has a risk of
> regressions (like all big amdgpu_vm changes IME).
>
> Regards,
>     Felix
>
> On 3/19/2019 8:44 AM, Christian König wrote:
>> Separate out all functions for SDMA and CPU based page table
>> updates into separate backends.
>>
>> This way we can keep most of the complexity of those from the
>> core VM code.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/Makefile         |   3 +-
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |   7 +-
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  30 ++-
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 116 +++++++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 ++++++++++++++++++++
>>    5 files changed, 401 insertions(+), 3 deletions(-)
>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> [snip]
>> +/**
>> + * amdgpu_vm_sdma_prepare - prepare SDMA command submission
>> + *
>> + * @p: see amdgpu_vm_update_params definition
>> + * @owner: owner we need to sync to
>> + * @exclusive: exclusive move fence we need to sync to
>> + *
>> + * Returns:
>> + * Negativ errno, 0 for success.
>> + */
>> +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>> +				  void *owner, struct dma_fence *exclusive)
>> +{
>> +	struct amdgpu_bo *root = p->vm->root.base.bo;
>> +	unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
>> +	int r;
>> +
>> +	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
>> +	if (r)
>> +		return r;
>> +
>> +	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>> +	if (r)
>> +		return r;
>> +
>> +	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>> +			     owner, false);
>> +	if (r)
>> +		return r;
>> +
>> +	p->num_dw_left = ndw;
>> +	p->ib = &p->job->ibs[0];
> With p->job added, do we still need p->ib? We could just use
> &p->job->ibs[0] directly, which should perform the same or be more
> efficient since it's just a constant offset from p->job.
>
>
>> +	return 0;
>> +}
>> +
>> +/**
>> + * amdgpu_vm_sdma_commit - commit SDMA command submission
>> + *
>> + * @p: see amdgpu_vm_update_params definition
>> + * @fence: resulting fence
>> + *
>> + * Returns:
>> + * Negativ errno, 0 for success.
>> + */
>> +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>> +				 struct dma_fence **fence)
>> +{
>> +	struct amdgpu_bo *root = p->vm->root.base.bo;
>> +	struct amdgpu_ring *ring;
>> +	struct dma_fence *f;
>> +	int r;
>> +
>> +	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
>> +
>> +	WARN_ON(p->ib->length_dw == 0);
>> +	amdgpu_ring_pad_ib(ring, p->ib);
>> +	WARN_ON(p->ib->length_dw > p->num_dw_left);
>> +	r = amdgpu_job_submit(p->job, &p->vm->entity,
>> +			      AMDGPU_FENCE_OWNER_VM, &f);
>> +	if (r)
>> +		goto error;
>> +
>> +	amdgpu_bo_fence(root, f, true);
>> +	if (fence)
>> +		swap(*fence, f);
>> +	dma_fence_put(f);
>> +	return 0;
>> +
>> +error:
>> +	amdgpu_job_free(p->job);
>> +	return r;
>> +}
>> +
>> +
>> +/**
>> + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>> + *
>> + * @p: see amdgpu_vm_update_params definition
>> + * @bo: PD/PT to update
>> + * @pe: addr of the page entry
>> + * @count: number of page entries to copy
>> + *
>> + * Traces the parameters and calls the DMA function to copy the PTEs.
>> + */
>> +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
>> +				     struct amdgpu_bo *bo, uint64_t pe,
>> +				     unsigned count)
>> +{
>> +	uint64_t src = p->ib->gpu_addr;
>> +
>> +	src += p->num_dw_left * 4;
>> +
>> +	pe += amdgpu_bo_gpu_offset(bo);
>> +	trace_amdgpu_vm_copy_ptes(pe, src, count);
>> +
>> +	amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count);
>> +}
>> +
>> +/**
>> + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
>> + *
>> + * @p: see amdgpu_vm_update_params definition
>> + * @bo: PD/PT to update
>> + * @pe: addr of the page entry
>> + * @addr: dst addr to write into pe
>> + * @count: number of page entries to update
>> + * @incr: increase next addr by incr bytes
>> + * @flags: hw access flags
>> + *
>> + * Traces the parameters and calls the right asic functions
>> + * to setup the page table using the DMA.
>> + */
>> +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
>> +				    struct amdgpu_bo *bo, uint64_t pe,
>> +				    uint64_t addr, unsigned count,
>> +				    uint32_t incr, uint64_t flags)
>> +{
>> +	pe += amdgpu_bo_gpu_offset(bo);
>> +	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
>> +	if (count < 3) {
>> +		amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags,
>> +				    count, incr);
>> +	} else {
>> +		amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr,
>> +				      count, incr, flags);
>> +	}
>> +}
>> +
>> +/**
>> + * amdgpu_vm_sdma_update - execute VM update
>> + *
>> + * @p: see amdgpu_vm_update_params definition
>> + * @bo: PD/PT to update
>> + * @pe: addr of the page entry
>> + * @addr: dst addr to write into pe
>> + * @count: number of page entries to update
>> + * @incr: increase next addr by incr bytes
>> + * @flags: hw access flags
>> + *
>> + * Reserve space in the IB, setup mapping buffer on demand and write commands to
>> + * the IB.
>> + */
>> +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
>> +				 struct amdgpu_bo *bo, uint64_t pe,
>> +				 uint64_t addr, unsigned count, uint32_t incr,
>> +				 uint64_t flags)
>> +{
>> +	unsigned int i, ndw, nptes;
>> +	uint64_t *pte;
>> +	int r;
>> +
>> +	do {
>> +		ndw = p->num_dw_left;
>> +		ndw -= p->ib->length_dw;
>> +
>> +		if (ndw < 32) {
>> +			r = amdgpu_vm_sdma_commit(p, NULL);
>> +			if (r)
>> +				return r;
>> +
>> +			/* estimate how many dw we need */
>> +			ndw = 32;
>> +			if (p->pages_addr)
>> +				ndw += count * 2;
>> +			ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
>> +			ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
>> +
>> +			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
>> +			if (r)
>> +				return r;
>> +
>> +			p->num_dw_left = ndw;
>> +			p->ib = &p->job->ibs[0];
>> +		}
>> +
>> +		if (!p->pages_addr) {
>> +			/* set page commands needed */
>> +			if (bo->shadow)
>> +				amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr,
>> +							count, incr, flags);
>> +			amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
>> +						incr, flags);
>> +			return 0;
>> +		}
>> +
>> +		/* copy commands needed */
>> +		ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
>> +			(bo->shadow ? 2 : 1);
>> +
>> +		/* for padding */
>> +		ndw -= 7;
>> +
>> +		nptes = min(count, ndw / 2);
>> +
>> +		/* Put the PTEs at the end of the IB. */
>> +		p->num_dw_left -= nptes * 2;
>> +		pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]);
>> +		for (i = 0; i < nptes; ++i, addr += incr) {
>> +			pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
>> +			pte[i] |= flags;
>> +		}
>> +
>> +		if (bo->shadow)
>> +			amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes);
>> +		amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
>> +
>> +		pe += nptes * 8;
>> +		count -= nptes;
>> +	} while (count);
>> +
>> +	return 0;
>> +}
>> +
>> +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
>> +	.prepare = amdgpu_vm_sdma_prepare,
>> +	.update = amdgpu_vm_sdma_update,
>> +	.commit = amdgpu_vm_sdma_commit
>> +};