[Mesa-dev] [PATCH 14/25] radeonsi: implement PIPE_FLUSH_{TOP, BOTTOM}_OF_PIPE
Nicolai Hähnle
nhaehnle at gmail.com
Fri Nov 3 19:42:21 UTC 2017
On 03.11.2017 19:46, Marek Olšák wrote:
> On Fri, Nov 3, 2017 at 3:48 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> On 31.10.2017 17:21, Marek Olšák wrote:
>>>
>>> On Sun, Oct 22, 2017 at 9:07 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>>> wrote:
>>>>
>>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>>
>>>> ---
>>>> src/gallium/drivers/radeonsi/si_fence.c | 83
>>>> ++++++++++++++++++++++++++++++++-
>>>> 1 file changed, 82 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/src/gallium/drivers/radeonsi/si_fence.c
>>>> b/src/gallium/drivers/radeonsi/si_fence.c
>>>> index b7b02b55831..bc0ae302945 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_fence.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_fence.c
>>>> @@ -22,33 +22,41 @@
>>>> *
>>>> */
>>>>
>>>> #include <libsync.h>
>>>>
>>>> #include "util/os_time.h"
>>>> #include "util/u_memory.h"
>>>> #include "util/u_queue.h"
>>>>
>>>> #include "si_pipe.h"
>>>> +#include "radeon/r600_cs.h"
>>>> +
>>>> +struct si_fine_fence {
>>>> + struct r600_resource *buf;
>>>> + unsigned offset;
>>>> +};
>>>>
>>>> struct si_multi_fence {
>>>> struct pipe_reference reference;
>>>> struct pipe_fence_handle *gfx;
>>>> struct pipe_fence_handle *sdma;
>>>> struct tc_unflushed_batch_token *tc_token;
>>>> struct util_queue_fence ready;
>>>>
>>>> /* If the context wasn't flushed at fence creation, this is
>>>> non-NULL. */
>>>> struct {
>>>> struct r600_common_context *ctx;
>>>> unsigned ib_index;
>>>> } gfx_unflushed;
>>>> +
>>>> + struct si_fine_fence fine;
>>>> };
>>>>
>>>> static void si_add_fence_dependency(struct r600_common_context *rctx,
>>>> struct pipe_fence_handle *fence)
>>>> {
>>>> struct radeon_winsys *ws = rctx->ws;
>>>>
>>>> if (rctx->dma.cs)
>>>> ws->cs_add_fence_dependency(rctx->dma.cs, fence);
>>>> ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
>>>> @@ -59,20 +67,21 @@ static void si_fence_reference(struct pipe_screen
>>>> *screen,
>>>> struct pipe_fence_handle *src)
>>>> {
>>>> struct radeon_winsys *ws = ((struct
>>>> r600_common_screen*)screen)->ws;
>>>> struct si_multi_fence **rdst = (struct si_multi_fence **)dst;
>>>> struct si_multi_fence *rsrc = (struct si_multi_fence *)src;
>>>>
>>>> if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
>>>> ws->fence_reference(&(*rdst)->gfx, NULL);
>>>> ws->fence_reference(&(*rdst)->sdma, NULL);
>>>> tc_unflushed_batch_token_reference(&(*rdst)->tc_token,
>>>> NULL);
>>>> + r600_resource_reference(&(*rdst)->fine.buf, NULL);
>>>> FREE(*rdst);
>>>> }
>>>> *rdst = rsrc;
>>>> }
>>>>
>>>> static struct si_multi_fence *si_create_multi_fence()
>>>> {
>>>> struct si_multi_fence *fence = CALLOC_STRUCT(si_multi_fence);
>>>> if (!fence)
>>>> return NULL;
>>>> @@ -132,20 +141,66 @@ static void si_fence_server_sync(struct
>>>> pipe_context *ctx,
>>>> * this fence dependency is signalled.
>>>> *
>>>> * Should we flush the context to allow more GPU parallelism?
>>>> */
>>>> if (rfence->sdma)
>>>> si_add_fence_dependency(rctx, rfence->sdma);
>>>> if (rfence->gfx)
>>>> si_add_fence_dependency(rctx, rfence->gfx);
>>>> }
>>>>
>>>> +static bool si_fine_fence_signaled(struct radeon_winsys *rws,
>>>> + const struct si_fine_fence *fine)
>>>> +{
>>>> + char *map = rws->buffer_map(fine->buf->buf, NULL,
>>>> PIPE_TRANSFER_READ |
>>>> +
>>>> PIPE_TRANSFER_UNSYNCHRONIZED);
>>>> + if (!map)
>>>> + return false;
>>>> +
>>>> + uint32_t *fence = (uint32_t*)(map + fine->offset);
>>>> + return *fence != 0;
>>>
>>>
>>> I think this is not coherent on GFX9 if MTYPE != UC. MTYPE is set by the
>>> GEM_CREATE ioctl.
>>
>>
>> Hmm, this stuff tends to give me headaches.
>>
>> For both top-of-pipe and bottom-of-pipe, a direct write to memory is used
>> (WRITE_DATA with dst_sel=5 (memory); RELEASE_MEM with dst_sel=0
>> (memory_controller)), bypassing the GPU L2.
>>
>> So the only issue could be with CPU caches, and with HDP if the buffer lands
>> in VRAM for some reason. The buffer we're using here is the same as for
>> queries, and AFAIK we don't do any special flushing for those either, so I
>> don't think this is really a problem?
>
> Right. It's not a problem. In the worst case, the fence write will be
> visible after the next L2 flush on GFX9. I don't know the L2 cache
> policy for CP packets on GFX9, but I guess it's "stream" or something
> stronger even.
>
> Note that allocator_zeroed_memory allocates from VRAM and the buffer
> is actually never mapped (i.e. we could make it unmappable). We don't
> have any suballocator instance for cached GTT memory at the moment.
>
> Also, u_suballocator clears memory using the GPU. If the clear hasn't
> been started by the GPU, the memory contains garbage, which is not
> ideal for CPU fences.
Good points, I'll rework the allocation.
Cheers,
Nicolai
>
> Marek
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list