[PATCH 10/21] drm/etnaviv: use 'sync points' for performance monitor requests
Christian Gmeiner
christian.gmeiner at gmail.com
Sun Jul 2 18:53:29 UTC 2017
2017-06-26 15:41 GMT+02:00 Lucas Stach <l.stach at pengutronix.de>:
> Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner:
>> With 'sync points' we can sample the reqeustes perform signals
>> before and/or after the submited command buffer.
>>
>> Signed-off-by: Christian Gmeiner <christian.gmeiner at gmail.com>
>> ---
>> drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 112
>> +++++++++++++++++++++++++++++-----
>> drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 4 ++
>> 2 files changed, 102 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> index 0766861..2e9f031 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> @@ -1313,12 +1313,47 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu
>> *gpu)
>> pm_runtime_put_autosuspend(gpu->dev);
>> }
>>
>> +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
>> + struct etnaviv_event *event, unsigned int flags)
>> +{
>> + unsigned int i;
>> +
>> + for (i = 0; i < event->nr_pmrs; i++) {
>> + const struct etnaviv_perfmon_request *pmr = event-
>> >pmrs + i;
>> +
>> + if (pmr->flags == flags)
>> + etnaviv_perfmon_process(gpu, pmr);
>> + }
>> +}
>> +
>> +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
>> + struct etnaviv_event *event)
>> +{
>> + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
>> +}
>> +
>> +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
>> + struct etnaviv_event *event)
>> +{
>> + unsigned int i;
>> +
>> + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
>> +
>> + for (i = 0; i < event->nr_pmrs; i++) {
>> + const struct etnaviv_perfmon_request *pmr = event-
>> >pmrs + i;
>> +
>> + *pmr->bo_vma = pmr->sequence;
>> + }
>> +}
>> +
>> +
>> /* add bo's to gpu's ring, and kick gpu: */
>> int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>> struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf
>> *cmdbuf)
>> {
>> struct dma_fence *fence;
>> unsigned int event, i;
>> + unsigned int sync[2] = { ~0U, ~0U };
>> int ret;
>>
>> ret = etnaviv_gpu_pm_get_sync(gpu);
>> @@ -1341,6 +1376,39 @@ int etnaviv_gpu_submit(struct etnaviv_gpu
>> *gpu,
>> goto out_pm_put;
>> }
>>
>> + /*
>> + * if there are performance monitor requests we need to have
>> a sync point to
>> + * re-configure gpu and process ETNA_PM_PROCESS_PRE
>> requests.
>> + */
>> + if (cmdbuf->nr_pmrs) {
>> + sync[0] = event_alloc(gpu);
>> +
>> + if (unlikely(sync[0] == ~0U)) {
>> + DRM_ERROR("no free events for sync point
>> 0\n");
>> + event_free(gpu, event);
>> + ret = -EBUSY;
>> + goto out_pm_put;
>> + }
>> + }
>> +
>> + /*
>> + * if there are performance monitor requests we need to have
>> sync point to
>> + * re-configure gpu, process ETNA_PM_PROCESS_POST requests
>> and update the
>> + * sequence number for userspace.
>> + */
>> + if (cmdbuf->nr_pmrs) {
>> + sync[1] = event_alloc(gpu);
>> +
>> + if (unlikely(sync[1] == ~0U)) {
>> + DRM_ERROR("no free events for sync point
>> 1\n");
>> + event_free(gpu, event);
>> + if (unlikely(sync[0] == ~0U))
>> + event_free(gpu, sync[0]);
>> + ret = -EBUSY;
>> + goto out_pm_put;
>> + }
>> + }
>> +
>
> This is dangerous. We aren't holding the GPU lock at this point (and we
> can't because of livelocks with the GPU hangchecker), so given enough
> parallel submits with PMRs all the submits might abort as they can't
> allocate enough events, as each one might hold one out of the available
> events.
>
> I think what we need here is to extend the event_alloc API to take the
> number of events we need and grab them all at once under the event
> spinlock.
>
That is a good idea - will change the event_alloc API in a separate patch.
>> mutex_lock(&gpu->lock);
>>
>> fence = etnaviv_gpu_fence_alloc(gpu);
>> @@ -1360,8 +1428,22 @@ int etnaviv_gpu_submit(struct etnaviv_gpu
>> *gpu,
>> gpu->lastctx = cmdbuf->ctx;
>> }
>>
>> + if (sync[0] != ~0U) {
>> + gpu->event[sync[0]].sync_point =
>> &sync_point_perfmon_sample_pre;
>> + gpu->event[sync[0]].nr_pmrs = cmdbuf->nr_pmrs;
>> + gpu->event[sync[0]].pmrs = cmdbuf->pmrs;
>> + etnaviv_sync_point_queue(gpu, sync[0]);
>> + }
>> +
>> etnaviv_buffer_queue(gpu, event, cmdbuf);
>>
>> + if (sync[1] != ~0U) {
>> + gpu->event[sync[1]].sync_point =
>> &sync_point_perfmon_sample_post;
>> + gpu->event[sync[1]].nr_pmrs = cmdbuf->nr_pmrs;
>> + gpu->event[sync[1]].pmrs = cmdbuf->pmrs;
>> + etnaviv_sync_point_queue(gpu, sync[1]);
>> + }
>> +
>> cmdbuf->fence = fence;
>> list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
>>
>> @@ -1455,20 +1537,22 @@ static irqreturn_t irq_handler(int irq, void
>> *data)
>> etnaviv_process_sync_point(gpu,
>> &gpu->event[event]);
>>
>> fence = gpu->event[event].fence;
>> - gpu->event[event].fence = NULL;
>> - dma_fence_signal(fence);
>> -
>> - /*
>> - * Events can be processed out of
>> order. Eg,
>> - * - allocate and queue event 0
>> - * - allocate event 1
>> - * - event 0 completes, we process it
>> - * - allocate and queue event 0
>> - * - event 1 and event 0 complete
>> - * we can end up processing event 0 first,
>> then 1.
>> - */
>> - if (fence_after(fence->seqno, gpu-
>> >completed_fence))
>> - gpu->completed_fence = fence->seqno;
>> + if (fence) {
>> + gpu->event[event].fence = NULL;
>> + dma_fence_signal(fence);
>> +
>> + /*
>> + * Events can be processed out of
>> order. Eg,
>> + * - allocate and queue event 0
>> + * - allocate event 1
>> + * - event 0 completes, we process
>> it
>> + * - allocate and queue event 0
>> + * - event 1 and event 0 complete
>> + * we can end up processing event 0
>> first, then 1.
>> + */
>> + if (fence_after(fence->seqno, gpu-
>> >completed_fence))
>> + gpu->completed_fence =
>> fence->seqno;
>> + }
>>
>> event_free(gpu, event);
>> }
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> index fee6ed9..71375ab 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> @@ -92,6 +92,10 @@ struct etnaviv_event {
>> struct dma_fence *fence;
>>
>> void (*sync_point)(struct etnaviv_gpu *gpu, struct
>> etnaviv_event *event);
>> +
>> + /* performance monitor requests */
>> + unsigned int nr_pmrs;
>> + struct etnaviv_perfmon_request *pmrs;
>
> This should be a pointer to the cmdbuf itself, so we don't copy the
> information to various places.
Makes sense - will be changed in v2.
>> };
>>
>> struct etnaviv_cmdbuf_suballoc;
greets
--
Christian Gmeiner, MSc
https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner
More information about the dri-devel
mailing list