[PATCH 1/4] drm/ttm: add a pointer to the allocating BO into ttm_resource

Fri Jun 11 11:30:38 UTC 2021

Am 11.06.21 um 07:34 schrieb Thomas Hellström (Intel):
> Hi, Christian,
>
> I know you have a lot on your plate, and that the drm community is a 
> bit lax about following the kernel patch submitting guidelines, but 
> now that we're also spinning up a number of Intel developers on TTM 
> could we please make a better effort with cover letters and commit 
> messages so that they understand what the purpose and end goal of the 
> series is. A reviewer shouldn't have to look at the last patch to try 
> to get an understanding what the series is doing and why.

Sorry, that was send out this early unintentionally. See it more like an 
RFC.

>
> On 6/10/21 1:05 PM, Christian König wrote:
>> We are going to need this for the next patch
>
>
>> and it allows us to clean
>> up amdgpu as well.
>
> The amdgpu changes are not reflected in the commit title.
>
>
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 47 ++++++++-------------
>>   drivers/gpu/drm/ttm/ttm_resource.c          |  1 +
>>   include/drm/ttm/ttm_resource.h              |  1 +
>>   3 files changed, 19 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> index 194f9eecf89c..8e3f5da44e4f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> @@ -26,23 +26,12 @@
>>     #include "amdgpu.h"
>>   -struct amdgpu_gtt_node {
>> -    struct ttm_buffer_object *tbo;
>> -    struct ttm_range_mgr_node base;
>> -};
>> -
>>   static inline struct amdgpu_gtt_mgr *
>>   to_gtt_mgr(struct ttm_resource_manager *man)
>>   {
>>       return container_of(man, struct amdgpu_gtt_mgr, manager);
>>   }
>>   -static inline struct amdgpu_gtt_node *
>> -to_amdgpu_gtt_node(struct ttm_resource *res)
>> -{
>> -    return container_of(res, struct amdgpu_gtt_node, base.base);
>> -}
>> -
>>   /**
>>    * DOC: mem_info_gtt_total
>>    *
>> @@ -107,9 +96,9 @@ const struct attribute_group 
>> amdgpu_gtt_mgr_attr_group = {
>>    */
>>   bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
>>   {
>> -    struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
>> +    struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
>>   -    return drm_mm_node_allocated(&node->base.mm_nodes[0]);
>> +    return drm_mm_node_allocated(&node->mm_nodes[0]);
>>   }
>>     /**
>> @@ -129,7 +118,7 @@ static int amdgpu_gtt_mgr_new(struct 
>> ttm_resource_manager *man,
>>   {
>>       struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
>>       uint32_t num_pages = PFN_UP(tbo->base.size);
>> -    struct amdgpu_gtt_node *node;
>> +    struct ttm_range_mgr_node *node;
>>       int r;
>>         spin_lock(&mgr->lock);
>> @@ -141,19 +130,17 @@ static int amdgpu_gtt_mgr_new(struct 
>> ttm_resource_manager *man,
>>       atomic64_sub(num_pages, &mgr->available);
>>       spin_unlock(&mgr->lock);
>>   -    node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
>> +    node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
>>       if (!node) {
>>           r = -ENOMEM;
>>           goto err_out;
>>       }
>>   -    node->tbo = tbo;
>> -    ttm_resource_init(tbo, place, &node->base.base);
>> -
>> +    ttm_resource_init(tbo, place, &node->base);
>>       if (place->lpfn) {
>>           spin_lock(&mgr->lock);
>>           r = drm_mm_insert_node_in_range(&mgr->mm,
>> -                        &node->base.mm_nodes[0],
>> +                        &node->mm_nodes[0],
>>                           num_pages, tbo->page_alignment,
>>                           0, place->fpfn, place->lpfn,
>>                           DRM_MM_INSERT_BEST);
>> @@ -161,14 +148,14 @@ static int amdgpu_gtt_mgr_new(struct 
>> ttm_resource_manager *man,
>>           if (unlikely(r))
>>               goto err_free;
>>   -        node->base.base.start = node->base.mm_nodes[0].start;
>> +        node->base.start = node->mm_nodes[0].start;
>>       } else {
>> -        node->base.mm_nodes[0].start = 0;
>> -        node->base.mm_nodes[0].size = node->base.base.num_pages;
>> -        node->base.base.start = AMDGPU_BO_INVALID_OFFSET;
>> +        node->mm_nodes[0].start = 0;
>> +        node->mm_nodes[0].size = node->base.num_pages;
>> +        node->base.start = AMDGPU_BO_INVALID_OFFSET;
>>       }
>>   -    *res = &node->base.base;
>> +    *res = &node->base;
>>       return 0;
>>     err_free:
>> @@ -191,12 +178,12 @@ static int amdgpu_gtt_mgr_new(struct 
>> ttm_resource_manager *man,
>>   static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
>>                      struct ttm_resource *res)
>>   {
>> -    struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
>> +    struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
>>       struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
>>         spin_lock(&mgr->lock);
>> -    if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
>> -        drm_mm_remove_node(&node->base.mm_nodes[0]);
>> +    if (drm_mm_node_allocated(&node->mm_nodes[0]))
>> +        drm_mm_remove_node(&node->mm_nodes[0]);
>>       spin_unlock(&mgr->lock);
>>       atomic64_add(res->num_pages, &mgr->available);
>>   @@ -228,14 +215,14 @@ uint64_t amdgpu_gtt_mgr_usage(struct 
>> ttm_resource_manager *man)
>>   int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
>>   {
>>       struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
>> -    struct amdgpu_gtt_node *node;
>> +    struct ttm_range_mgr_node *node;
>>       struct drm_mm_node *mm_node;
>>       int r = 0;
>>         spin_lock(&mgr->lock);
>>       drm_mm_for_each_node(mm_node, &mgr->mm) {
>> -        node = container_of(mm_node, typeof(*node), base.mm_nodes[0]);
>> -        r = amdgpu_ttm_recover_gart(node->tbo);
>> +        node = container_of(mm_node, typeof(*node), mm_nodes[0]);
>> +        r = amdgpu_ttm_recover_gart(node->base.bo);
>>           if (r)
>>               break;
>>       }
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
>> b/drivers/gpu/drm/ttm/ttm_resource.c
>> index 2431717376e7..7ff6194154fe 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -41,6 +41,7 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>>       res->bus.offset = 0;
>>       res->bus.is_iomem = false;
>>       res->bus.caching = ttm_cached;
>> +    res->bo = bo;
>>   }
>>   EXPORT_SYMBOL(ttm_resource_init);
>>   diff --git a/include/drm/ttm/ttm_resource.h 
>> b/include/drm/ttm/ttm_resource.h
>> index 140b6b9a8bbe..6d0b7a6d2169 100644
>> --- a/include/drm/ttm/ttm_resource.h
>> +++ b/include/drm/ttm/ttm_resource.h
>> @@ -171,6 +171,7 @@ struct ttm_resource {
>>       uint32_t mem_type;
>>       uint32_t placement;
>>       struct ttm_bus_placement bus;
>> +    struct ttm_buffer_object *bo;
>
> Not that I'm against this change by itself, but this bo pointer is not 
> refcounted, and therefore needs a description when it's needed and 
> why. What happens, for example when the resource is moved to a ghost 
> object, or the bo is killed while the resource is remaining on a lru 
> list (which I understand was one of the main purposes with 
> free-standing resources). Weak references need a guarantee that the 
> object they pointed to is alive. What is that guarantee?

The basic idea is that we want to get rid of ghost objects and all the 
related workarounds in the mid term. But yes for the interim we probably 
need more logic here to make sure the BO is not destroyed.

>
> Also could we introduce new TTM structure members where they are first 
> used /referenced by TTM and not where they are used by amdgpu? Without 
> finding out in patch 3 that this member is needed to look up the bo 
> from a lru list the correct response to this patch would have been: 
> That bo is amdgpu-specific and needs to be in a driver private struct...

That was indeed not supposed like this.

The question I rather wanted to raise if it's ok to make the resource 
object fully depend on the BO for allocation/destruction?

I've seen some code in the DG1 branch where a mock BO is used to 
allocate some resource, but that approach won't work with this here.

On the other hand this fixes a long outstanding and very annoying 
problem we had.

Christian.

>
>
> Thanks,
>
> /Thomas
>
>
>>   };
>>     /**