[PATCH 2/2] drm/amdgpu: bind GTT on demand
Christian König
deathsimple at vodafone.de
Wed Sep 7 08:18:54 UTC 2016
Am 07.09.2016 um 09:18 schrieb zhoucm1:
>
>
> On 2016年09月06日 18:48, Christian König wrote:
>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>
>>>
>>> On 2016年09月06日 17:41, Christian König wrote:
>>>> From: Christian König <christian.koenig at amd.com>
>>>>
>>>> We don't really need the GTT table any more most of the time.
>>> Why? I thought GTT bo is always needed to be bound when GPU is
>>> trying to access it, doesn't it?
>>
>> We only need it to be bound when we try to access it from the system
>> VM (UVD/VCE, ring buffers, fences etc...).
>>
>> The VMs for each process can work without it and it is just overhead
>> to bind it all the time.
> Yes, I got your mean, copying pte from GART to VM confused me, I
> thought binding GTT table is a must, that's wrong.
> Go though vm code again, pte can be created by amdgpu_vm_map_gart if
> no GTT pte source when we don't bind it at all under VM process, am I
> right now?
Yes exactly. I still need to measure the performance impact of that,
adding the copy from the GTT made quite a difference when I introduced that.
I hope that doing the write when the update the VM is still faster than
doing the GTT write.
Regards,
Christian.
>
> Regards,
> David Zhou
>
>>
>> Regards,
>> Christian.
>>
>>
>>>
>>> Regards,
>>> David Zhou
>>>> So bind it
>>>> only on demand.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 34
>>>> ++++++++++++++++++++++++++++--
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 ++++++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 33
>>>> ++++++++++++++++++++++++++---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 6 +++++-
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++-
>>>> 8 files changed, 84 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 9d39fa8..28d4a67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct
>>>> amdgpu_device *adev) { }
>>>> struct amdgpu_bo_va_mapping *
>>>> amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>> uint64_t addr, struct amdgpu_bo **bo);
>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>>> #if defined(CONFIG_DRM_AMD_DAL)
>>>> int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index 20a1962..e069978 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct
>>>> amdgpu_cs_parser *p,
>>>> }
>>>> }
>>>> - if (p->uf_entry.robj)
>>>> - p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>> + if (!r && p->uf_entry.robj) {
>>>> + struct amdgpu_bo *uf = p->uf_entry.robj;
>>>> +
>>>> + r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>> + p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>> + }
>>>> error_validate:
>>>> if (r) {
>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct
>>>> amdgpu_cs_parser *parser,
>>>> return NULL;
>>>> }
>>>> +
>>>> +/**
>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the
>>>> system VM
>>>> + *
>>>> + * @parser: command submission parser context
>>>> + *
>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible
>>>> by the system VM.
>>>> + */
>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>> +{
>>>> + unsigned i;
>>>> + int r;
>>>> +
>>>> + if (!parser->bo_list)
>>>> + return 0;
>>>> +
>>>> + for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>> + struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>> +
>>>> + r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>> + if (unlikely(r))
>>>> + return r;
>>>> + }
>>>> +
>>>> + return 0;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index b17734e..dc73f11 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
>>>> *bo, u32 domain,
>>>> dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>> goto error;
>>>> }
>>>> + r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>> + if (unlikely(r)) {
>>>> + dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>> + goto error;
>>>> + }
>>>> bo->pin_count = 1;
>>>> if (gpu_addr != NULL)
>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,
>>>> struct fence *fence,
>>>> u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>> {
>>>> WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>> + WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>> + !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>> !bo->pin_count);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 4337b3a..c294aa9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct
>>>> ttm_buffer_object *bo,
>>>> new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>> switch (old_mem->mem_type) {
>>>> - case TTM_PL_VRAM:
>>>> case TTM_PL_TT:
>>>> + r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>> + if (r)
>>>> + return r;
>>>> +
>>>> + case TTM_PL_VRAM:
>>>> old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>> break;
>>>> default:
>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct
>>>> ttm_buffer_object *bo,
>>>> return -EINVAL;
>>>> }
>>>> switch (new_mem->mem_type) {
>>>> - case TTM_PL_VRAM:
>>>> case TTM_PL_TT:
>>>> + r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>> + if (r)
>>>> + return r;
>>>> +
>>>> + case TTM_PL_VRAM:
>>>> new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>> break;
>>>> default:
>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct
>>>> ttm_tt *ttm,
>>>> struct ttm_mem_reg *bo_mem)
>>>> {
>>>> struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>> - uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>> int r;
>>>> if (gtt->userptr) {
>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct
>>>> ttm_tt *ttm,
>>>> bo_mem->mem_type == AMDGPU_PL_OA)
>>>> return -EINVAL;
>>>> + return 0;
>>>> +}
>>>> +
>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>> +{
>>>> + struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>> +
>>>> + return gtt && !list_empty(>t->list);
>>>> +}
>>>> +
>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>> +{
>>>> + struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>> + uint32_t flags;
>>>> + int r;
>>>> +
>>>> + if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>> + return 0;
>>>> +
>>>> + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>> r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>> ttm->pages, gtt->ttm.dma_address, flags);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> index 72f6bfc..214bae9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>> struct fence **fence);
>>>> int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>> +
>>>> #endif
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> index 5888e8a..fab7bb1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct
>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>> return -EINVAL;
>>>> }
>>>> + r = amdgpu_cs_sysvm_access_required(parser);
>>>> + if (r)
>>>> + return r;
>>>> +
>>>> ctx.parser = parser;
>>>> ctx.buf_sizes = buf_sizes;
>>>> ctx.ib_idx = ib_idx;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> index 9b71d6c..b0c6702 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct
>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>> uint32_t allocated = 0;
>>>> uint32_t tmp, handle = 0;
>>>> uint32_t *size = &tmp;
>>>> - int i, r = 0, idx = 0;
>>>> + int i, r, idx = 0;
>>>> +
>>>> + r = amdgpu_cs_sysvm_access_required(p);
>>>> + if (r)
>>>> + return r;
>>>> while (idx < ib->length_dw) {
>>>> uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 7660f82..a549abd 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev,
>>>> }
>>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm,
>>>> mem);
>>>> - gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>> + gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>> + adev == bo_va->bo->adev) ? flags : 0;
>>>> spin_lock(&vm->status_lock);
>>>> if (!list_empty(&bo_va->vm_status))
>>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
More information about the amd-gfx
mailing list