[Intel-gfx] [PATCH] drm/i915: Put all permanent stolen allocations together

Ville Syrjälä ville.syrjala at linux.intel.com
Tue Sep 11 16:13:18 UTC 2018


On Tue, Sep 11, 2018 at 04:14:39PM +0100, Chris Wilson wrote:
> Whilst reviewing another new user of stolen memory, Ville made the
> observation that we should try to ensure that all permanent allocations
> within stolen memory are clustered together at either end of the stolen
> region, in order to reduce fragmentation. In the depths of
> i915_gem_stolen.c it is not always clear what manner of allocation we
> need, so expose the drm_mm search parameter and push the decision to our
> callers.
> 
> Suggested-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 15 +++++++++------
>  drivers/gpu/drm/i915/i915_gem_stolen.c  | 19 ++++++++++++-------
>  drivers/gpu/drm/i915/intel_engine_cs.c  |  3 ++-
>  drivers/gpu/drm/i915/intel_fbc.c        | 13 ++++++++-----
>  drivers/gpu/drm/i915/intel_fbdev.c      |  3 ++-
>  drivers/gpu/drm/i915/intel_overlay.c    |  3 ++-
>  drivers/gpu/drm/i915/intel_pm.c         |  3 ++-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
>  8 files changed, 38 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7ea442033a57..e68102141067 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3302,19 +3302,22 @@ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
>  
>  /* i915_gem_stolen.c */
>  int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
> -				struct drm_mm_node *node, u64 size,
> -				unsigned alignment);
> +				struct drm_mm_node *node,
> +				u64 size, unsigned int alignment,
> +				unsigned int search);
>  int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
> -					 struct drm_mm_node *node, u64 size,
> -					 unsigned alignment, u64 start,
> -					 u64 end);
> +					 struct drm_mm_node *node,
> +					 u64 size, unsigned int alignment,
> +					 u64 start, u64 end,
> +					 unsigned int search);
>  void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
>  				 struct drm_mm_node *node);
>  int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
>  void i915_gem_cleanup_stolen(struct drm_device *dev);
>  struct drm_i915_gem_object *
>  i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
> -			      resource_size_t size);
> +			      resource_size_t size,
> +			      unsigned int search);
>  struct drm_i915_gem_object *
>  i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
>  					       resource_size_t stolen_offset,
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 53440bf87650..ed440e280dd0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -43,8 +43,10 @@
>   */
>  
>  int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
> -					 struct drm_mm_node *node, u64 size,
> -					 unsigned alignment, u64 start, u64 end)
> +					 struct drm_mm_node *node,
> +					 u64 size, unsigned int alignment,
> +					 u64 start, u64 end,
> +					 unsigned int search)
>  {
>  	int ret;
>  
> @@ -58,7 +60,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
>  	mutex_lock(&dev_priv->mm.stolen_lock);
>  	ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
>  					  size, alignment, 0,
> -					  start, end, DRM_MM_INSERT_BEST);
> +					  start, end, search);
>  	mutex_unlock(&dev_priv->mm.stolen_lock);
>  
>  	return ret;
> @@ -66,10 +68,12 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
>  
>  int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
>  				struct drm_mm_node *node, u64 size,
> -				unsigned alignment)
> +				unsigned int alignment,
> +				unsigned int search)
>  {
>  	return i915_gem_stolen_insert_node_in_range(dev_priv, node, size,
> -						    alignment, 0, U64_MAX);
> +						    alignment, 0, U64_MAX,
> +						    search);
>  }
>  
>  void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
> @@ -591,7 +595,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
>  
>  struct drm_i915_gem_object *
>  i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
> -			      resource_size_t size)
> +			      resource_size_t size,
> +			      unsigned int search)
>  {
>  	struct drm_i915_gem_object *obj;
>  	struct drm_mm_node *stolen;
> @@ -607,7 +612,7 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
>  	if (!stolen)
>  		return NULL;
>  
> -	ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096);
> +	ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096, search);
>  	if (ret) {
>  		kfree(stolen);
>  		return NULL;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 10cd051ba29e..c945a9fb54ae 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -499,7 +499,8 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine,
>  
>  	WARN_ON(engine->scratch);
>  
> -	obj = i915_gem_object_create_stolen(engine->i915, size);
> +	obj = i915_gem_object_create_stolen(engine->i915,
> +					    size, DRM_MM_INSERT_LOW);
>  	if (!obj)
>  		obj = i915_gem_object_create_internal(engine->i915, size);
>  	if (IS_ERR(obj)) {
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> index 01d1d2088f04..bf62d3d08e82 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -457,8 +457,9 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv,
>  	 */
>  
>  	/* Try to over-allocate to reduce reallocations and fragmentation. */
> -	ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size <<= 1,
> -						   4096, 0, end);
> +	ret = i915_gem_stolen_insert_node_in_range(dev_priv, node,
> +						   size <<= 1, 4096,
> +						   0, end, DRM_MM_INSERT_BEST);
>  	if (ret == 0)
>  		return compression_threshold;
>  
> @@ -468,8 +469,9 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv,
>  	    (fb_cpp == 2 && compression_threshold == 2))
>  		return 0;
>  
> -	ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1,
> -						   4096, 0, end);
> +	ret = i915_gem_stolen_insert_node_in_range(dev_priv, node,
> +						   size >>= 1, 4096,
> +						   0, end, DRM_MM_INSERT_BEST);
>  	if (ret && INTEL_GEN(dev_priv) <= 4) {
>  		return 0;
>  	} else if (ret) {
> @@ -513,7 +515,8 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
>  			goto err_fb;
>  
>  		ret = i915_gem_stolen_insert_node(dev_priv, compressed_llb,
> -						  4096, 4096);
> +						  4096, 4096,
> +						  DRM_MM_INSERT_LOW);

We seem to alloc/free the line length buffer alongside the cfb.
So should this use best instead?

>  		if (ret)
>  			goto err_fb;
>  
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index fb2f9fce34cd..0e1ddbf1c5a0 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -140,7 +140,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  	 * features. */
>  	obj = NULL;
>  	if (size * 2 < dev_priv->stolen_usable_size)
> -		obj = i915_gem_object_create_stolen(dev_priv, size);
> +		obj = i915_gem_object_create_stolen(dev_priv,
> +						    size, DRM_MM_INSERT_LOW);
>  	if (obj == NULL)
>  		obj = i915_gem_object_create(dev_priv, size);
>  	if (IS_ERR(obj)) {
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 72eb7e48e8bc..b134b9cabf93 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1306,7 +1306,8 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
>  	struct i915_vma *vma;
>  	int err;
>  
> -	obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE);
> +	obj = i915_gem_object_create_stolen(overlay->i915,
> +					    PAGE_SIZE, DRM_MM_INSERT_LOW);
>  	if (obj == NULL)
>  		obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE);
>  	if (IS_ERR(obj))
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index d99e5fabe93c..5d18301ba079 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7382,7 +7382,8 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
>  	 * overlap with other ranges, such as the frame buffer, protected
>  	 * memory, or any other relevant ranges.
>  	 */
> -	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
> +	pctx = i915_gem_object_create_stolen(dev_priv,
> +					     pctx_size, DRM_MM_INSERT_LOW);

I guess there was no special requirement for the placement of this.
AFAIK the BIOS always allocates it just below the wopcm, but I suppose
it doesn't matter if we take a different approach.

>  	if (!pctx) {
>  		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
>  		goto out;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 472939f5c18f..e6a23a241cf3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1104,7 +1104,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
>  	struct drm_i915_gem_object *obj;
>  	struct i915_vma *vma;
>  
> -	obj = i915_gem_object_create_stolen(dev_priv, size);
> +	obj = i915_gem_object_create_stolen(dev_priv, size, DRM_MM_INSERT_BEST);

Should these go low? We never reallocate them, right?

>  	if (!obj)
>  		obj = i915_gem_object_create_internal(dev_priv, size);
>  	if (IS_ERR(obj))
> -- 
> 2.19.0.rc2

-- 
Ville Syrjälä
Intel


More information about the Intel-gfx mailing list