[PATCH 16/22] drm/amdgpu: cleanup SA inti and fini

Christian König christian.koenig at amd.com
Mon Feb 26 11:29:23 UTC 2018


> I cannot remind the details right now ...

Ok, well it is still a rather nice to have cleanup.

Just one comment, please keep the amdgpu_sa_bo_manager_fini function 
instead of the amdgpu_sa_bo_manager_suspend function.

We can just return void and we still want the error message in case 
somebody forgot to free something.

With that fixed the patch looks good to me,
Christian.

Am 26.02.2018 um 11:48 schrieb Liu, Monk:
>> Root cause is simple: many engine always accessing some wptr polling address before They are disabled, so SA must always be valid since it is created, so must use bo_create_kernel to make sure it is pinned already before someone use it, Ortherwise during kmd reloading test there will be lot of DMAR reading error
> Sorry, above description is wrong, I have too much patches and I mistook the root cause of them
>
> And this sentence is correct: " I judge by comparing the DMAR error page address with SA MC address, and they shared the same PAGE"
>
> I cannot remind the details right now ...
>
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf Of Liu, Monk
> Sent: 2018年2月26日 18:41
> To: Koenig, Christian <Christian.Koenig at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: RE: [PATCH 16/22] drm/amdgpu: cleanup SA inti and fini
>
> In SRIOV, I found hypervisor continues report DMAR read error, and finally one of that error Related with SA, (I judge by comparing the DMAR error page address with SA MC address, and they shared the same PAGE!!)
>
> With this patch applied, one of the DMAR reading error gone,
>
> Root cause is simple: many engine always accessing some wptr polling address before They are disabled, so SA must always be valid since it is created, so must use bo_create_kernel to make sure it is pinned already before someone use it, Ortherwise during kmd reloading test there will be lot of DMAR reading error
>
> -----Original Message-----
> From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com]
> Sent: 2018年2月26日 18:13
> To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH 16/22] drm/amdgpu: cleanup SA inti and fini
>
> Am 26.02.2018 um 06:18 schrieb Monk Liu:
>> should use bo_create_kernel instead of split to two function that
>> create and pin the SA bo
>>
>> issue:
>> before this patch, there are DMAR read error in host side when running
>> SRIOV test, the DMAR address dropped in the range of SA bo.
>>
>> fix:
>> after this cleanups of SA init and fini, above DMAR eror gone.
> Well the change looks like a valid cleanup to me, but the explanation why that should be a problem doesn't looks like it makes sense.
>
> Please explain further what this should fix?
>
> Regards,
> Christian.
>
>> Change-Id: I3f299a3342bd7263776bff69e4b31b0d3816749a
>> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  6 ----
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 61 +++-------------------------------
>>    2 files changed, 4 insertions(+), 63 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 7f2c314..4709d13 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -279,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>>    		return r;
>>    	}
>>    
>> -	r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
>> -	if (r) {
>> -		return r;
>> -	}
>> -
>>    	adev->ib_pool_ready = true;
>>    	if (amdgpu_debugfs_sa_init(adev)) {
>>    		dev_err(adev->dev, "failed to register debugfs file for SA\n"); @@
>> -303,7 +298,6 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
>>    {
>>    	if (adev->ib_pool_ready) {
>>    		amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
>> -		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
>>    		adev->ib_pool_ready = false;
>>    	}
>>    }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> index 5ca75a4..695c639 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> @@ -63,80 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>>    	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>>    		INIT_LIST_HEAD(&sa_manager->flist[i]);
>>    
>> -	r = amdgpu_bo_create(adev, size, align, true, domain,
>> -			     0, NULL, NULL, &sa_manager->bo);
>> +	r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
>> +				&sa_manager->gpu_addr, &sa_manager->cpu_ptr);
>>    	if (r) {
>>    		dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
>>    		return r;
>>    	}
>>    
>> -	return r;
>> -}
>> -
>> -void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>> -			       struct amdgpu_sa_manager *sa_manager)
>> -{
>> -	struct amdgpu_sa_bo *sa_bo, *tmp;
>> -
>> -	if (!list_empty(&sa_manager->olist)) {
>> -		sa_manager->hole = &sa_manager->olist,
>> -		amdgpu_sa_bo_try_free(sa_manager);
>> -		if (!list_empty(&sa_manager->olist)) {
>> -			dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
>> -		}
>> -	}
>> -	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
>> -		amdgpu_sa_bo_remove_locked(sa_bo);
>> -	}
>> -	amdgpu_bo_unref(&sa_manager->bo);
>> -	sa_manager->size = 0;
>> -}
>> -
>> -int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
>> -			       struct amdgpu_sa_manager *sa_manager)
>> -{
>> -	int r;
>> -
>> -	if (sa_manager->bo == NULL) {
>> -		dev_err(adev->dev, "no bo for sa manager\n");
>> -		return -EINVAL;
>> -	}
>> -
>> -	/* map the buffer */
>> -	r = amdgpu_bo_reserve(sa_manager->bo, false);
>> -	if (r) {
>> -		dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r);
>> -		return r;
>> -	}
>> -	r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
>> -	if (r) {
>> -		amdgpu_bo_unreserve(sa_manager->bo);
>> -		dev_err(adev->dev, "(%d) failed to pin manager bo\n", r);
>> -		return r;
>> -	}
>> -	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
>>    	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
>> -	amdgpu_bo_unreserve(sa_manager->bo);
>>    	return r;
>>    }
>>    
>>    int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
>>    				 struct amdgpu_sa_manager *sa_manager)
>>    {
>> -	int r;
>> -
>>    	if (sa_manager->bo == NULL) {
>>    		dev_err(adev->dev, "no bo for sa manager\n");
>>    		return -EINVAL;
>>    	}
>>    
>> -	r = amdgpu_bo_reserve(sa_manager->bo, true);
>> -	if (!r) {
>> -		amdgpu_bo_kunmap(sa_manager->bo);
>> -		amdgpu_bo_unpin(sa_manager->bo);
>> -		amdgpu_bo_unreserve(sa_manager->bo);
>> -	}
>> -	return r;
>> +	amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
>> +	return 0;
>>    }
>>    
>>    static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



More information about the amd-gfx mailing list