[PATCH V2 12/23] drm/etnaviv: use 'sync points' for performance monitor requests

Tue Aug 8 10:49:58 UTC 2017

Am Samstag, den 22.07.2017, 11:53 +0200 schrieb Christian Gmeiner:
> With 'sync points' we can sample the reqeustes perform signals
> before and/or after the submited command buffer.
> 
> Signed-off-by: Christian Gmeiner <christian.gmeiner at gmail.com>
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 106 +++++++++++++++++++++++++++-------
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h |   1 +
>  2 files changed, 86 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index 93e3f14c0599..c176781788ac 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -1318,12 +1318,48 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
>  	pm_runtime_put_autosuspend(gpu->dev);
>  }
>  
> +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event, unsigned int flags)
> +{
> +	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
> +	unsigned int i;
> +
> +	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
> +		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
> +
> +		if (pmr->flags == flags)
> +			etnaviv_perfmon_process(gpu, pmr);
> +	}
> +}
> +
> +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event)
> +{
> +	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
> +}
> +
> +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event)
> +{
> +	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
> +	unsigned int i;
> +
> +	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
> +
> +	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
> +		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
> +
> +		*pmr->bo_vma = pmr->sequence;
> +	}
> +}
> +
> +
>  /* add bo's to gpu's ring, and kick gpu: */
>  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
>  {
>  	struct dma_fence *fence;
> -	unsigned int event, i;
> +	unsigned int i, nr_events, event[3];
>  	int ret;
>  
>  	ret = etnaviv_gpu_pm_get_sync(gpu);
> @@ -1339,9 +1375,21 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  	 *
>  	 */
>  
> -	ret = event_alloc(gpu, 1, &event);
> -	if (!ret) {
> -		DRM_ERROR("no free event\n");
> +	 /*
> + 	 * if there are performance monitor requests we need to have
> +	 * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE
> +	 *   requests.
> +	 * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
> +	 *   and update the sequence number for userspace.
> + 	 */
> +	 if (cmdbuf->nr_pmrs)
> +	 	nr_events = 3;
> +	else
> +		nr_events = 1;

Indentation of comment and code is off here. Also I would prefer if
nr_events is just initialized to 1, so we can spare the else path.

> +
> +	ret = event_alloc(gpu, nr_events, event);
> +	if (ret < 0) {
> +		DRM_ERROR("no free events\n");
>  		goto out_pm_put;
>  	}
>  
> @@ -1349,12 +1397,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  
>  	fence = etnaviv_gpu_fence_alloc(gpu);
>  	if (!fence) {
> -		event_free(gpu, event);
> +		for (i = 0; i < nr_events; i++)
> +			event_free(gpu, event[i]);
> +
>  		ret = -ENOMEM;
>  		goto out_unlock;
>  	}
>  
> -	gpu->event[event].fence = fence;
> +	gpu->event[event[0]].fence = fence;
>  	submit->fence = dma_fence_get(fence);
>  	gpu->active_fence = submit->fence->seqno;
>  
> @@ -1364,7 +1414,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  		gpu->lastctx = cmdbuf->ctx;
>  	}
>  
> -	etnaviv_buffer_queue(gpu, event, cmdbuf);
> +	if (cmdbuf->nr_pmrs) {
> +		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
> +		gpu->event[event[1]].cmdbuf = cmdbuf;
> +		etnaviv_sync_point_queue(gpu, event[1]);
> +	}
> +
> +	etnaviv_buffer_queue(gpu, event[0], cmdbuf);
> +
> +	if (cmdbuf->nr_pmrs) {
> +		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
> +		gpu->event[event[2]].cmdbuf = cmdbuf;
> +		etnaviv_sync_point_queue(gpu, event[2]);
> +	}
>  
>  	cmdbuf->fence = fence;
>  	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
> @@ -1469,20 +1531,22 @@ static irqreturn_t irq_handler(int irq, void *data)
>  			}
>  
>  			fence = gpu->event[event].fence;
> -			gpu->event[event].fence = NULL;
> -			dma_fence_signal(fence);
> -
> -			/*
> -			 * Events can be processed out of order.  Eg,
> -			 * - allocate and queue event 0
> -			 * - allocate event 1
> -			 * - event 0 completes, we process it
> -			 * - allocate and queue event 0
> -			 * - event 1 and event 0 complete
> -			 * we can end up processing event 0 first, then 1.
> -			 */
> -			if (fence_after(fence->seqno, gpu->completed_fence))
> -				gpu->completed_fence = fence->seqno;
> +			if (fence) {
> +				gpu->event[event].fence = NULL;
> +				dma_fence_signal(fence);
> +
> +				/*
> +				 * Events can be processed out of order.  Eg,
> +				 * - allocate and queue event 0
> +				 * - allocate event 1
> +				 * - event 0 completes, we process it
> +				 * - allocate and queue event 0
> +				 * - event 1 and event 0 complete
> +				 * we can end up processing event 0 first, then 1.
> +				 */
> +				if (fence_after(fence->seqno, gpu->completed_fence))
> +					gpu->completed_fence = fence->seqno;
> +			}
>  
>  			event_free(gpu, event);
>  		}
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index 7d5f785b0f08..c75a7b5c397c 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -89,6 +89,7 @@ struct etnaviv_chip_identity {
>  
>  struct etnaviv_event {
>  	struct dma_fence *fence;
> +	struct etnaviv_cmdbuf *cmdbuf;
>  
>  	void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
>  };