[PATCH 2/2] drm/amdgpu: add full TMZ support into amdgpu_ttm_map_buffer v2

Mon Mar 23 12:24:03 UTC 2020

Am 23.03.20 um 09:29 schrieb Huang Rui:
> On Sun, Mar 22, 2020 at 04:48:35PM +0100, Christian König wrote:
>> This should allow us to also support VRAM->GTT moves.
>>
>> v2: fix missing vram_base_adjustment
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 ++++++++++++++++++++++++++-------
>>   1 file changed, 30 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 53de99dbaead..e15a343a944b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -309,21 +309,21 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>>   				 unsigned window, struct amdgpu_ring *ring,
>>   				 bool tmz, uint64_t *addr)
>>   {
>> -	struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
>>   	struct amdgpu_device *adev = ring->adev;
>>   	struct amdgpu_job *job;
>>   	unsigned num_dw, num_bytes;
>> -	dma_addr_t *dma_address;
>>   	struct dma_fence *fence;
>>   	uint64_t src_addr, dst_addr;
>> +	void *cpu_addr;
>>   	uint64_t flags;
>> +	unsigned int i;
>>   	int r;
>>   
>>   	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
>>   	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
>>   
>>   	/* Map only what can't be accessed directly */
>> -	if (mem->start != AMDGPU_BO_INVALID_OFFSET) {
>> +	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
>>   		*addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset;
>>   		return 0;
>>   	}
>> @@ -351,15 +351,37 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>>   	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>>   	WARN_ON(job->ibs[0].length_dw > num_dw);
>>   
>> -	dma_address = &dma->dma_address[offset >> PAGE_SHIFT];
>>   	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
>>   	if (tmz)
>>   		flags |= AMDGPU_PTE_TMZ;
>>   
>> -	r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
>> -			    &job->ibs[0].ptr[num_dw]);
>> -	if (r)
>> -		goto error_free;
>> +	cpu_addr = &job->ibs[0].ptr[num_dw];
>> +
>> +	if (mem->mem_type == TTM_PL_TT) {
>> +		struct ttm_dma_tt *dma;
>> +		dma_addr_t *dma_address;
>> +
>> +		dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
>> +		dma_address = &dma->dma_address[offset >> PAGE_SHIFT];
>> +		r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
>> +				    cpu_addr);
>> +		if (r)
>> +			goto error_free;
>> +	} else {
>> +		dma_addr_t dma_address;
>> +
>> +		dma_address = (mm_node->start << PAGE_SHIFT) + offset;
>> +		dma_address += adev->vm_manager.vram_base_offset;
>> +
>> +		for (i = 0; i < num_pages; ++i) {
>> +			r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
>> +					    &dma_address, flags, cpu_addr);
> May I know why do we need map the page one by one here? Is it because if
> not PL_TT, the buffer might not be continuous?

The problem is actually the other way around.

amdgpu_gart_map() expects an array with not continuous addresses for 
PL_TT, but we have an continuous address here we want to map.

At some point we should probably switch that to using sg_tables or some 
other better structure, but for now this should be sufficient.

Regards,
Christian.

>
> Thanks,
> Ray
>
>> +			if (r)
>> +				goto error_free;
>> +
>> +			dma_address += PAGE_SIZE;
>> +		}
>> +	}
>>   
>>   	r = amdgpu_job_submit(job, &adev->mman.entity,
>>   			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>> -- 
>> 2.14.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cray.huang%40amd.com%7C456a9e561ae44b6bb94508d7ce787f99%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637204889263466725&sdata=zoVAOn4UX4Y3voMhyI4OwEKte7TgzGLC5ZmAj9TkW%2FU%3D&reserved=0