[PATCH v3 8/8] drm/etnaviv: implement per-process address spaces on MMUv2

Wed Aug 14 10:00:05 UTC 2019

Hi,
On Fri, Aug 09, 2019 at 02:04:24PM +0200, Lucas Stach wrote:
> This builds on top of the MMU contexts introduced earlier. Instead of having
> one context per GPU core, each GPU client receives its own context.
> 
> On MMUv1 this still means a single shared pagetable set is used by all
> clients, but on MMUv2 there is now a distinct set of pagetables for each
> client. As the command fetch is also translated via the MMU on MMUv2 the
> kernel command ringbuffer is mapped into each of the client pagetables.
> 
> As the MMU context switch is a bit of a heavy operation, due to the needed
> cache and TLB flushing, this patch implements a lazy way of switching the
> MMU context. The kernel does not have its own MMU context, but reuses the
> last client context for all of its operations. This has some visible impact,
> as the GPU can now only be started once a client has submitted some work and
> we got the client MMU context assigned. Also the MMU context has a different
> lifetime than the general client context, as the GPU might still execute the
> kernel command buffer in the context of a client even after the client has
> completed all GPU work and has been terminated. Only when the GPU is runtime
> suspended or switches to another clients MMU context is the old context
> freed up.
> 
> Signed-off-by: Lucas Stach <l.stach at pengutronix.de>

Reviewed-by: Guido Günther <agx at sigxcpu.org> 

> ---
> v3: Don't call etnaviv_cmdbuf_suballoc_unmap when mapping failed.
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_buffer.c     |  64 ++++++++---
>  drivers/gpu/drm/etnaviv/etnaviv_drv.c        |  38 ++++++-
>  drivers/gpu/drm/etnaviv/etnaviv_drv.h        |   6 +-
>  drivers/gpu/drm/etnaviv/etnaviv_dump.c       |   4 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem.c        |   7 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem.h        |   4 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  11 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 105 ++++++++-----------
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |   4 -
>  drivers/gpu/drm/etnaviv/etnaviv_iommu.c      |  10 +-
>  drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c   |  17 ++-
>  drivers/gpu/drm/etnaviv/etnaviv_mmu.c        |  42 ++++++--
>  drivers/gpu/drm/etnaviv/etnaviv_mmu.h        |  11 +-
>  13 files changed, 208 insertions(+), 115 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> index 4324b098689f..876a035ee1a2 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> @@ -118,7 +118,8 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
>  	u32 *ptr = buf->vaddr + off;
>  
>  	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
> -			ptr, etnaviv_cmdbuf_get_va(buf, &gpu->cmdbuf_mapping) +
> +			ptr, etnaviv_cmdbuf_get_va(buf,
> +			&gpu->mmu_context->cmdbuf_mapping) +
>  			off, size - len * 4 - off);
>  
>  	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
> @@ -152,7 +153,8 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
>  	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
>  		buffer->user_size = 0;
>  
> -	return etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping) +
> +	return etnaviv_cmdbuf_get_va(buffer,
> +				     &gpu->mmu_context->cmdbuf_mapping) +
>  	       buffer->user_size;
>  }
>  
> @@ -166,7 +168,8 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
>  	buffer->user_size = 0;
>  
>  	CMD_WAIT(buffer);
> -	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping)
> +	CMD_LINK(buffer, 2,
> +		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
>  		 + buffer->user_size - 4);
>  
>  	return buffer->user_size / 8;
> @@ -293,7 +296,8 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
>  
>  	/* Append waitlink */
>  	CMD_WAIT(buffer);
> -	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping)
> +	CMD_LINK(buffer, 2,
> +		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
>  		 + buffer->user_size - 4);
>  
>  	/*
> @@ -308,7 +312,8 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
>  
>  /* Append a command buffer to the ring buffer. */
>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
> -	unsigned int event, struct etnaviv_cmdbuf *cmdbuf)
> +	struct etnaviv_iommu_context *mmu_context, unsigned int event,
> +	struct etnaviv_cmdbuf *cmdbuf)
>  {
>  	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
>  	unsigned int waitlink_offset = buffer->user_size - 16;
> @@ -317,17 +322,19 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
>  	bool switch_context = gpu->exec_state != exec_state;
>  	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
>  	bool need_flush = gpu->flush_seq != new_flush_seq;
> +	bool switch_mmu_context = gpu->mmu_context != mmu_context;
>  
>  	lockdep_assert_held(&gpu->lock);
>  
>  	if (drm_debug & DRM_UT_DRIVER)
>  		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
>  
> -	link_target = etnaviv_cmdbuf_get_va(cmdbuf, &gpu->cmdbuf_mapping);
> +	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
> +					    &gpu->mmu_context->cmdbuf_mapping);
>  	link_dwords = cmdbuf->size / 8;
>  
>  	/*
> -	 * If we need maintanence prior to submitting this buffer, we will
> +	 * If we need maintenance prior to submitting this buffer, we will
>  	 * need to append a mmu flush load state, followed by a new
>  	 * link to this buffer - a total of four additional words.
>  	 */
> @@ -349,7 +356,24 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
>  		if (switch_context)
>  			extra_dwords += 4;
>  
> +		/* PTA load command */
> +		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
> +			extra_dwords += 1;
> +
>  		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
> +		/*
> +		 * Switch MMU context if necessary. Must be done after the
> +		 * link target has been calculated, as the jump forward in the
> +		 * kernel ring still uses the last active MMU context before
> +		 * the switch.
> +		 */
> +		if (switch_mmu_context) {
> +			struct etnaviv_iommu_context *old_context = gpu->mmu_context;
> +
> +			etnaviv_iommu_context_get(mmu_context);
> +			gpu->mmu_context = mmu_context;
> +			etnaviv_iommu_context_put(old_context);
> +		}
>  
>  		if (need_flush) {
>  			/* Add the MMU flush */
> @@ -361,10 +385,23 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
>  					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
>  					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
>  			} else {
> +				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
> +					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
> +
> +				if (switch_mmu_context &&
> +				    gpu->sec_mode == ETNA_SEC_KERNEL) {
> +					unsigned short id =
> +						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
> +					CMD_LOAD_STATE(buffer,
> +						VIVS_MMUv2_PTA_CONFIG,
> +						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
> +				}
> +
> +				if (gpu->sec_mode == ETNA_SEC_NONE)
> +					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
> +
>  				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
> -					VIVS_MMUv2_CONFIGURATION_MODE_MASK |
> -					VIVS_MMUv2_CONFIGURATION_ADDRESS_MASK |
> -					VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH);
> +					       flush);
>  				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
>  					SYNC_RECIPIENT_PE);
>  				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
> @@ -380,6 +417,8 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
>  		}
>  
>  		/* And the link to the submitted buffer */
> +		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
> +					&gpu->mmu_context->cmdbuf_mapping);
>  		CMD_LINK(buffer, link_dwords, link_target);
>  
>  		/* Update the link target to point to above instructions */
> @@ -416,13 +455,14 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
>  	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
>  		       VIVS_GL_EVENT_FROM_PE);
>  	CMD_WAIT(buffer);
> -	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping)
> +	CMD_LINK(buffer, 2,
> +		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
>  		 + buffer->user_size - 4);
>  
>  	if (drm_debug & DRM_UT_DRIVER)
>  		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
>  			return_target,
> -			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->cmdbuf_mapping),
> +			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
>  			cmdbuf->vaddr);
>  
>  	if (drm_debug & DRM_UT_DRIVER) {
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> index eb0c23fe979a..80f1edcbbea0 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> @@ -50,12 +50,19 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
>  {
>  	struct etnaviv_drm_private *priv = dev->dev_private;
>  	struct etnaviv_file_private *ctx;
> -	int i;
> +	int ret, i;
>  
>  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>  	if (!ctx)
>  		return -ENOMEM;
>  
> +	ctx->mmu = etnaviv_iommu_context_init(priv->mmu_global,
> +					      priv->cmdbuf_suballoc);
> +	if (!ctx->mmu) {
> +		ret = -ENOMEM;
> +		goto out_free;
> +	}
> +
>  	for (i = 0; i < ETNA_MAX_PIPES; i++) {
>  		struct etnaviv_gpu *gpu = priv->gpu[i];
>  		struct drm_sched_rq *rq;
> @@ -70,6 +77,10 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
>  	file->driver_priv = ctx;
>  
>  	return 0;
> +
> +out_free:
> +	kfree(ctx);
> +	return ret;
>  }
>  
>  static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
> @@ -85,6 +96,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
>  			drm_sched_entity_destroy(&ctx->sched_entity[i]);
>  	}
>  
> +	etnaviv_iommu_context_put(ctx->mmu);
> +
>  	kfree(ctx);
>  }
>  
> @@ -116,12 +129,29 @@ static int etnaviv_mm_show(struct drm_device *dev, struct seq_file *m)
>  static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
>  {
>  	struct drm_printer p = drm_seq_file_printer(m);
> +	struct etnaviv_iommu_context *mmu_context;
>  
>  	seq_printf(m, "Active Objects (%s):\n", dev_name(gpu->dev));
>  
> -	mutex_lock(&gpu->mmu_context->lock);
> -	drm_mm_print(&gpu->mmu_context->mm, &p);
> -	mutex_unlock(&gpu->mmu_context->lock);
> +	/*
> +	 * Lock the GPU to avoid a MMU context switch just now and elevate
> +	 * the refcount of the current context to avoid it disappearing from
> +	 * under our feet.
> +	 */
> +	mutex_lock(&gpu->lock);
> +	mmu_context = gpu->mmu_context;
> +	if (mmu_context)
> +		etnaviv_iommu_context_get(mmu_context);
> +	mutex_unlock(&gpu->lock);
> +
> +	if (!mmu_context)
> +		return 0;
> +
> +	mutex_lock(&mmu_context->lock);
> +	drm_mm_print(&mmu_context->mm, &p);
> +	mutex_unlock(&mmu_context->lock);
> +
> +	etnaviv_iommu_context_put(mmu_context);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> index 5f8db08f1c17..a488cfdb6bbf 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> @@ -25,10 +25,7 @@ struct etnaviv_gem_submit;
>  struct etnaviv_iommu_global;
>  
>  struct etnaviv_file_private {
> -	/*
> -	 * When per-context address spaces are supported we'd keep track of
> -	 * the context's page-tables here.
> -	 */
> +	struct etnaviv_iommu_context	*mmu;
>  	struct drm_sched_entity		sched_entity[ETNA_MAX_PIPES];
>  };
>  
> @@ -75,6 +72,7 @@ u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id);
>  void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
>  void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
> +	struct etnaviv_iommu_context *mmu,
>  	unsigned int event, struct etnaviv_cmdbuf *cmdbuf);
>  void etnaviv_validate_init(void);
>  bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> index 7e6791517693..698db540972c 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> @@ -173,12 +173,12 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
>  	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
>  			      gpu->buffer.size,
>  			      etnaviv_cmdbuf_get_va(&gpu->buffer,
> -						    &gpu->cmdbuf_mapping));
> +					&gpu->mmu_context->cmdbuf_mapping));
>  
>  	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
>  			      submit->cmdbuf.vaddr, submit->cmdbuf.size,
>  			      etnaviv_cmdbuf_get_va(&submit->cmdbuf,
> -						    &gpu->cmdbuf_mapping));
> +					&gpu->mmu_context->cmdbuf_mapping));
>  
>  	/* Reserve space for the bomap */
>  	if (n_bomap_pages) {
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> index 04c8170f76cd..e79f6ef3659a 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> @@ -248,8 +248,7 @@ void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
>  }
>  
>  struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
> -	struct drm_gem_object *obj, struct etnaviv_gpu *gpu,
> -	struct etnaviv_iommu_context *mmu_context)
> +	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context)
>  {
>  	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
>  	struct etnaviv_vram_mapping *mapping;
> @@ -308,8 +307,8 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
>  	mapping->context = mmu_context;
>  	mapping->use = 1;
>  
> -	ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj, gpu->memory_base,
> -				    mapping);
> +	ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj,
> +				    mmu_context->global->memory_base, mapping);
>  	if (ret < 0)
>  		kfree(mapping);
>  	else
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> index f342560b5938..1e11659a8842 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> @@ -93,6 +93,7 @@ struct etnaviv_gem_submit {
>  	struct kref refcount;
>  	struct etnaviv_file_private *ctx;
>  	struct etnaviv_gpu *gpu;
> +	struct etnaviv_iommu_context *mmu_context, *prev_mmu_context;
>  	struct dma_fence *out_fence, *in_fence;
>  	int out_fence_id;
>  	struct list_head node; /* GPU active submit list */
> @@ -119,8 +120,7 @@ struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
>  void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
>  
>  struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
> -	struct drm_gem_object *obj, struct etnaviv_gpu *gpu,
> -	struct etnaviv_iommu_context *mmu_context);
> +	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context);
>  void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping);
>  
>  #endif /* __ETNAVIV_GEM_H__ */
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index 7929d258daf8..16e7d371a7ef 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -224,8 +224,7 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
>  		struct etnaviv_vram_mapping *mapping;
>  
>  		mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
> -						  submit->gpu,
> -						  submit->gpu->mmu_context);
> +						  submit->mmu_context);
>  		if (IS_ERR(mapping)) {
>  			ret = PTR_ERR(mapping);
>  			break;
> @@ -362,6 +361,12 @@ static void submit_cleanup(struct kref *kref)
>  	if (submit->cmdbuf.suballoc)
>  		etnaviv_cmdbuf_free(&submit->cmdbuf);
>  
> +	if (submit->mmu_context)
> +		etnaviv_iommu_context_put(submit->mmu_context);
> +
> +	if (submit->prev_mmu_context)
> +		etnaviv_iommu_context_put(submit->prev_mmu_context);
> +
>  	for (i = 0; i < submit->nr_bos; i++) {
>  		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
>  
> @@ -503,6 +508,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
>  		goto err_submit_objects;
>  
>  	submit->ctx = file->driver_priv;
> +	etnaviv_iommu_context_get(submit->ctx->mmu);
> +	submit->mmu_context = submit->ctx->mmu;
>  	submit->exec_state = args->exec_state;
>  	submit->flags = args->flags;
>  
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index 3af72a4127b0..d8a83ebfce47 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -602,7 +602,8 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
>  
>  static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu)
>  {
> -	u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->cmdbuf_mapping);
> +	u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer,
> +				&gpu->mmu_context->cmdbuf_mapping);
>  	u16 prefetch;
>  
>  	/* setup the MMU */
> @@ -693,8 +694,6 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
>  	etnaviv_gpu_setup_pulse_eater(gpu);
>  
>  	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
> -
> -	etnaviv_gpu_start_fe_idleloop(gpu);
>  }
>  
>  int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
> @@ -724,28 +723,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  		goto fail;
>  	}
>  
> -	/*
> -	 * Set the GPU linear window to be at the end of the DMA window, where
> -	 * the CMA area is likely to reside. This ensures that we are able to
> -	 * map the command buffers while having the linear window overlap as
> -	 * much RAM as possible, so we can optimize mappings for other buffers.
> -	 *
> -	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
> -	 * to different views of the memory on the individual engines.
> -	 */
> -	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
> -	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
> -		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
> -		if (dma_mask < PHYS_OFFSET + SZ_2G)
> -			gpu->memory_base = PHYS_OFFSET;
> -		else
> -			gpu->memory_base = dma_mask - SZ_2G + 1;
> -	} else if (PHYS_OFFSET >= SZ_2G) {
> -		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
> -		gpu->memory_base = PHYS_OFFSET;
> -		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
> -	}
> -
>  	/*
>  	 * On cores with security features supported, we claim control over the
>  	 * security states.
> @@ -764,20 +741,26 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  	if (ret)
>  		goto fail;
>  
> -	gpu->mmu_context = etnaviv_iommu_context_init(priv->mmu_global);
> -	if (IS_ERR(gpu->mmu_context)) {
> -		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
> -		ret = PTR_ERR(gpu->mmu_context);
> -		goto iommu_global_fini;
> -	}
> -
> -	ret = etnaviv_cmdbuf_suballoc_map(priv->cmdbuf_suballoc,
> -					  gpu->mmu_context,
> -					  &gpu->cmdbuf_mapping,
> -					  gpu->memory_base);
> -	if (ret) {
> -		dev_err(gpu->dev, "failed to map cmdbuf suballoc\n");
> -		goto destroy_iommu;
> +	/*
> +	 * Set the GPU linear window to be at the end of the DMA window, where
> +	 * the CMA area is likely to reside. This ensures that we are able to
> +	 * map the command buffers while having the linear window overlap as
> +	 * much RAM as possible, so we can optimize mappings for other buffers.
> +	 *
> +	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
> +	 * to different views of the memory on the individual engines.
> +	 */
> +	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
> +	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
> +		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
> +		if (dma_mask < PHYS_OFFSET + SZ_2G)
> +			priv->mmu_global->memory_base = PHYS_OFFSET;
> +		else
> +			priv->mmu_global->memory_base = dma_mask - SZ_2G + 1;
> +	} else if (PHYS_OFFSET >= SZ_2G) {
> +		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
> +		priv->mmu_global->memory_base = PHYS_OFFSET;
> +		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
>  	}
>  
>  	/* Create buffer: */
> @@ -785,15 +768,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  				  PAGE_SIZE);
>  	if (ret) {
>  		dev_err(gpu->dev, "could not create command buffer\n");
> -		goto unmap_suballoc;
> -	}
> -
> -	if (!(gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION) &&
> -	    etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->cmdbuf_mapping) > 0x80000000) {
> -		ret = -EINVAL;
> -		dev_err(gpu->dev,
> -			"command buffer outside valid memory window\n");
> -		goto free_buffer;
> +		goto fail;
>  	}
>  
>  	/* Setup event management */
> @@ -816,14 +791,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  
>  	return 0;
>  
> -free_buffer:
> -	etnaviv_cmdbuf_free(&gpu->buffer);
> -unmap_suballoc:
> -	etnaviv_cmdbuf_suballoc_unmap(gpu->mmu_context, &gpu->cmdbuf_mapping);
> -destroy_iommu:
> -	etnaviv_iommu_context_put(gpu->mmu_context);
> -iommu_global_fini:
> -	etnaviv_iommu_global_fini(gpu);
>  fail:
>  	pm_runtime_mark_last_busy(gpu->dev);
>  	pm_runtime_put_autosuspend(gpu->dev);
> @@ -1017,6 +984,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
>  
>  	etnaviv_gpu_hw_init(gpu);
>  	gpu->exec_state = -1;
> +	gpu->mmu_context = NULL;
>  
>  	mutex_unlock(&gpu->lock);
>  	pm_runtime_mark_last_busy(gpu->dev);
> @@ -1323,6 +1291,15 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
>  		goto out_unlock;
>  	}
>  
> +	if (!gpu->mmu_context) {
> +		etnaviv_iommu_context_get(submit->mmu_context);
> +		gpu->mmu_context = submit->mmu_context;
> +		etnaviv_gpu_start_fe_idleloop(gpu);
> +	} else {
> +		etnaviv_iommu_context_get(gpu->mmu_context);
> +		submit->prev_mmu_context = gpu->mmu_context;
> +	}
> +
>  	if (submit->nr_pmrs) {
>  		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
>  		kref_get(&submit->refcount);
> @@ -1332,8 +1309,8 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
>  
>  	gpu->event[event[0]].fence = gpu_fence;
>  	submit->cmdbuf.user_size = submit->cmdbuf.size - 8;
> -	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
> -			     &submit->cmdbuf);
> +	etnaviv_buffer_queue(gpu, submit->exec_state, submit->mmu_context,
> +			     event[0], &submit->cmdbuf);
>  
>  	if (submit->nr_pmrs) {
>  		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
> @@ -1535,7 +1512,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms)
>  
>  static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
>  {
> -	if (gpu->initialized) {
> +	if (gpu->initialized && gpu->mmu_context) {
>  		/* Replace the last WAIT with END */
>  		mutex_lock(&gpu->lock);
>  		etnaviv_buffer_end(gpu);
> @@ -1547,8 +1524,13 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
>  		 * we fail, just warn and continue.
>  		 */
>  		etnaviv_gpu_wait_idle(gpu, 100);
> +
> +		etnaviv_iommu_context_put(gpu->mmu_context);
> +		gpu->mmu_context = NULL;
>  	}
>  
> +	gpu->exec_state = -1;
> +
>  	return etnaviv_gpu_clk_disable(gpu);
>  }
>  
> @@ -1564,8 +1546,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
>  	etnaviv_gpu_update_clock(gpu);
>  	etnaviv_gpu_hw_init(gpu);
>  
> -	gpu->exec_state = -1;
> -
>  	mutex_unlock(&gpu->lock);
>  
>  	return 0;
> @@ -1696,9 +1676,6 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
>  
>  	if (gpu->initialized) {
>  		etnaviv_cmdbuf_free(&gpu->buffer);
> -		etnaviv_cmdbuf_suballoc_unmap(gpu->mmu_context,
> -					      &gpu->cmdbuf_mapping);
> -		etnaviv_iommu_context_put(gpu->mmu_context);
>  		etnaviv_iommu_global_fini(gpu);
>  		gpu->initialized = false;
>  	}
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index c0bd6018d53b..8f9bd4edc96a 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -103,13 +103,9 @@ struct etnaviv_gpu {
>  	bool initialized;
>  
>  	/* 'ring'-buffer: */
> -	struct etnaviv_vram_mapping cmdbuf_mapping;
>  	struct etnaviv_cmdbuf buffer;
>  	int exec_state;
>  
> -	/* bus base address of memory  */
> -	u32 memory_base;
> -
>  	/* event management: */
>  	DECLARE_BITMAP(event_bitmap, ETNA_NR_EVENTS);
>  	struct etnaviv_event event[ETNA_NR_EVENTS];
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
> index a2f1ff151822..aac8dbf3ea56 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
> @@ -93,11 +93,11 @@ static void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu,
>  	u32 pgtable;
>  
>  	/* set base addresses */
> -	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
> -	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
> -	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
> -	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
> -	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
> +	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, context->global->memory_base);
> +	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, context->global->memory_base);
> +	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, context->global->memory_base);
> +	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, context->global->memory_base);
> +	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, context->global->memory_base);
>  
>  	/* set page table address in MC */
>  	pgtable = (u32)v1_context->pgtable_dma;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> index 5ca2077c148d..043111a1d60c 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> @@ -206,7 +206,7 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu,
>  		  VIVS_MMUv2_SAFE_ADDRESS_CONFIG_SEC_SAFE_ADDR_HIGH(
>  		  upper_32_bits(context->global->bad_page_dma)));
>  
> -	context->global->v2.pta_cpu[0] = v2_context->mtlb_dma |
> +	context->global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma |
>  				 	 VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K;
>  
>  	/* trigger a PTA load through the FE */
> @@ -218,6 +218,19 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu,
>  	gpu_write(gpu, VIVS_MMUv2_SEC_CONTROL, VIVS_MMUv2_SEC_CONTROL_ENABLE);
>  }
>  
> +u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context)
> +{
> +	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
> +
> +	return v2_context->mtlb_dma;
> +}
> +
> +unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context *context)
> +{
> +	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
> +
> +	return v2_context->id;
> +}
>  static void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu,
>  				    struct etnaviv_iommu_context *context)
>  {
> @@ -272,6 +285,8 @@ etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global)
>  	memset32(v2_context->mtlb_cpu, MMUv2_PTE_EXCEPTION,
>  		 MMUv2_MAX_STLB_ENTRIES);
>  
> +	global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma;
> +
>  	context = &v2_context->base;
>  	context->global = global;
>  	kref_init(&context->refcount);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
> index 2f64eef773ed..82822e30bf30 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
> @@ -290,6 +290,8 @@ static void etnaviv_iommu_context_free(struct kref *kref)
>  	struct etnaviv_iommu_context *context =
>  		container_of(kref, struct etnaviv_iommu_context, refcount);
>  
> +	etnaviv_cmdbuf_suballoc_unmap(context, &context->cmdbuf_mapping);
> +
>  	context->global->ops->free(context);
>  }
>  void etnaviv_iommu_context_put(struct etnaviv_iommu_context *context)
> @@ -298,12 +300,28 @@ void etnaviv_iommu_context_put(struct etnaviv_iommu_context *context)
>  }
>  
>  struct etnaviv_iommu_context *
> -etnaviv_iommu_context_init(struct etnaviv_iommu_global *global)
> +etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
> +			   struct etnaviv_cmdbuf_suballoc *suballoc)
>  {
> +	struct etnaviv_iommu_context *ctx;
> +	int ret;
> +
>  	if (global->version == ETNAVIV_IOMMU_V1)
> -		return etnaviv_iommuv1_context_alloc(global);
> +		ctx = etnaviv_iommuv1_context_alloc(global);
>  	else
> -		return etnaviv_iommuv2_context_alloc(global);
> +		ctx = etnaviv_iommuv2_context_alloc(global);
> +
> +	if (!ctx)
> +		return NULL;
> +
> +	ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
> +					  global->memory_base);
> +	if (ret) {
> +		global->ops->free(ctx);
> +		return NULL;
> +	}
> +
> +	return ctx;
>  }
>  
>  void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
> @@ -319,6 +337,12 @@ int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *context,
>  {
>  	mutex_lock(&context->lock);
>  
> +	if (mapping->use > 0) {
> +		mapping->use++;
> +		mutex_unlock(&context->lock);
> +		return 0;
> +	}
> +
>  	/*
>  	 * For MMUv1 we don't add the suballoc region to the pagetables, as
>  	 * those GPUs can only work with cmdbufs accessed through the linear
> @@ -340,7 +364,6 @@ int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *context,
>  		mapping->iova = node->start;
>  		ret = etnaviv_context_map(context, node->start, paddr, size,
>  					  ETNAVIV_PROT_READ);
> -
>  		if (ret < 0) {
>  			drm_mm_remove_node(node);
>  			mutex_unlock(&context->lock);
> @@ -363,15 +386,14 @@ void etnaviv_iommu_put_suballoc_va(struct etnaviv_iommu_context *context,
>  {
>  	struct drm_mm_node *node = &mapping->vram_node;
>  
> -	if (!mapping->use)
> -		return;
> -
> -	mapping->use = 0;
> +	mutex_lock(&context->lock);
> +	mapping->use--;
>  
> -	if (context->global->version == ETNAVIV_IOMMU_V1)
> +	if (mapping->use > 0 || context->global->version == ETNAVIV_IOMMU_V1) {
> +		mutex_unlock(&context->lock);
>  		return;
> +	}
>  
> -	mutex_lock(&context->lock);
>  	etnaviv_context_unmap(context, node->start, node->size);
>  	drm_mm_remove_node(node);
>  	mutex_unlock(&context->lock);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
> index 4438d66db6ab..c01491a6c4d8 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
> @@ -47,6 +47,8 @@ struct etnaviv_iommu_global {
>  	void *bad_page_cpu;
>  	dma_addr_t bad_page_dma;
>  
> +	u32 memory_base;
> +
>  	/*
>  	 * This union holds members needed by either MMUv1 or MMUv2, which
>  	 * can not exist at the same time.
> @@ -74,6 +76,9 @@ struct etnaviv_iommu_context {
>  	struct list_head mappings;
>  	struct drm_mm mm;
>  	unsigned int flush_seq;
> +
> +	/* Not part of the context, but needs to have the same lifetime */
> +	struct etnaviv_vram_mapping cmdbuf_mapping;
>  };
>  
>  int etnaviv_iommu_global_init(struct etnaviv_gpu *gpu);
> @@ -98,7 +103,8 @@ size_t etnaviv_iommu_dump_size(struct etnaviv_iommu_context *ctx);
>  void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf);
>  
>  struct etnaviv_iommu_context *
> -etnaviv_iommu_context_init(struct etnaviv_iommu_global *global);
> +etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
> +			   struct etnaviv_cmdbuf_suballoc *suballoc);
>  static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx)
>  {
>  	kref_get(&ctx->refcount);
> @@ -112,4 +118,7 @@ etnaviv_iommuv1_context_alloc(struct etnaviv_iommu_global *global);
>  struct etnaviv_iommu_context *
>  etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global);
>  
> +u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context);
> +unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context *context);
> +
>  #endif /* __ETNAVIV_MMU_H__ */
> -- 
> 2.20.1
> 
> _______________________________________________
> etnaviv mailing list
> etnaviv at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/etnaviv