[PATCH v6 1/2] drm/ttm: Add a flag to allow drivers to skip clear-on-free

Wed Aug 21 07:47:50 UTC 2024

Am 20.08.24 um 18:46 schrieb Nirmoy Das:
> Hi Thomas, Christian,
>
> On 8/20/2024 5:47 PM, Christian König wrote:
>> Am 20.08.24 um 17:45 schrieb Thomas Hellström:
>>> On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
>>>> Am 20.08.24 um 15:33 schrieb Thomas Hellström:
>>>>> Hi, Nirmoy, Christian
>>>>>
>>>>> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>>>>>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
>>>>>> releasing backing stores if they want to skip clear-on-free.
>>>>>>
>>>>>> Cc: Matthew Auld <matthew.auld at intel.com>
>>>>>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>>>>>> Suggested-by: Christian König <christian.koenig at amd.com>
>>>>>> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
>>>>>> Reviewed-by: Christian König <christian.koenig at amd.com>
>>>>> What happens if two devices share the same global TTM pool
>>>>>    type and one that does its own clearing. Wouldn't there be a
>>>>> pretty
>>>>> high chance that the the device that doesn't clear its own pages
>>>>> allocate non-cleared memory from the pool?
>>>> That's completely unproblematic. The flag indicates that the released
>>>> pages are already cleared, if that isn't the case then the flag
>>>> shouldn't be set on the TT object.
>>> Yeah, this patch is OK, but the way the follow-up xe patch uses it is
>>> problematic since, AFAICT, xe dma clears on alloc, meaning the pool
>>> pages are not cleared after use.
>>
>> Yeah that is clearly invalid behavior.
>
>
> I was only thinking about one device use-case which won't leak any 
> data though I am now miss-using the flag.
>
> If I skip dma clear for pooled BO then this flag  is not really 
> needed. Shall I revert the this and usage of TTM_TT_FLAG_CLEARED_ON_FREE
>
> and re-introduce it after I get a working clear on free implementation 
> for XE?

Yes absolutely.

I though that I made it clear that the handling should be that the 
driver clears the pages and *then* sets the TTM_TT_FLAG_CLEARED_ON_FREE 
flag.

So if you don't have the handling implemented like that then that's 
clearly invalid behavior.

Regards,
Christian.

>
>
> Regards,
>
> Nirmoy
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> /Thomas
>>>
>>>> If one device clear it's pages and another device doesn't clear it's
>>>> pages then we would just clear the pages of the device which doesn't
>>>> do
>>>> it with a hardware DMA.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>>>>>> ---
>>>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>>>>>    include/drm/ttm/ttm_tt.h       |  6 +++++-
>>>>>>    2 files changed, 16 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> index 8504dbe19c1a..935ab3cfd046 100644
>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
>>>>>> *pool, dma_addr_t dma_addr,
>>>>>>    }
>>>>>>       /* Give pages into a specific pool_type */
>>>>>> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>>>> page
>>>>>> *p)
>>>>>> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>>>> page
>>>>>> *p,
>>>>>> +                   bool cleared)
>>>>>>    {
>>>>>>        unsigned int i, num_pages = 1 << pt->order;
>>>>>>    -    for (i = 0; i < num_pages; ++i) {
>>>>>> -        if (PageHighMem(p))
>>>>>> -            clear_highpage(p + i);
>>>>>> -        else
>>>>>> -            clear_page(page_address(p + i));
>>>>>> +    if (!cleared) {
>>>>>> +        for (i = 0; i < num_pages; ++i) {
>>>>>> +            if (PageHighMem(p))
>>>>>> +                clear_highpage(p + i);
>>>>>> +            else
>>>>>> +                clear_page(page_address(p + i));
>>>>>> +        }
>>>>>>        }
>>>>>>           spin_lock(&pt->lock);
>>>>>> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct
>>>>>> ttm_pool
>>>>>> *pool, struct ttm_tt *tt,
>>>>>>                    pgoff_t start_page, pgoff_t
>>>>>> end_page)
>>>>>>    {
>>>>>>        struct page **pages = &tt->pages[start_page];
>>>>>> +    bool cleared = tt->page_flags &
>>>>>> TTM_TT_FLAG_CLEARED_ON_FREE;
>>>>>>        unsigned int order;
>>>>>>        pgoff_t i, nr;
>>>>>>    @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct
>>>>>> ttm_pool
>>>>>> *pool, struct ttm_tt *tt,
>>>>>>               pt = ttm_pool_select_type(pool, caching, order);
>>>>>>            if (pt)
>>>>>> -            ttm_pool_type_give(pt, *pages);
>>>>>> +            ttm_pool_type_give(pt, *pages, cleared);
>>>>>>            else
>>>>>>                ttm_pool_free_page(pool, caching, order,
>>>>>> *pages);
>>>>>>        }
>>>>>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>>>>>> index 2b9d856ff388..cfaf49de2419 100644
>>>>>> --- a/include/drm/ttm/ttm_tt.h
>>>>>> +++ b/include/drm/ttm/ttm_tt.h
>>>>>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>>>>>         * fault handling abuses the DMA api a bit and
>>>>>> dma_map_attrs
>>>>>> can't be
>>>>>>         * used to assure pgprot always matches.
>>>>>>         *
>>>>>> +     * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
>>>>>> handles
>>>>>> +     * clearing backing store
>>>>>> +     *
>>>>>>         * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
>>>>>> USE. This is
>>>>>>         * set by TTM after ttm_tt_populate() has successfully
>>>>>> returned, and is
>>>>>>         * then unset when TTM calls ttm_tt_unpopulate().
>>>>>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>>>>>    #define TTM_TT_FLAG_EXTERNAL        BIT(2)
>>>>>>    #define TTM_TT_FLAG_EXTERNAL_MAPPABLE    BIT(3)
>>>>>>    #define TTM_TT_FLAG_DECRYPTED        BIT(4)
>>>>>> +#define TTM_TT_FLAG_CLEARED_ON_FREE    BIT(5)
>>>>>>    -#define TTM_TT_FLAG_PRIV_POPULATED    BIT(5)
>>>>>> +#define TTM_TT_FLAG_PRIV_POPULATED    BIT(6)
>>>>>>        uint32_t page_flags;
>>>>>>        /** @num_pages: Number of pages in the page array. */
>>>>>>        uint32_t num_pages;
>>