[Intel-gfx] [PATCH v5 1/7] drm/i915: Implement a framework for batch buffer pools

Mon Dec 8 10:29:27 PST 2014

On 12/08/2014 07:19 AM, Bloomfield, Jon wrote:
>> -----Original Message-----
>> From: Nguyen, Michael H
>> Sent: Wednesday, November 26, 2014 9:54 PM
>> To: intel-gfx at lists.freedesktop.org
>> Cc: Bloomfield, Jon; Volkin, Bradley D
>> Subject: [PATCH v5 1/7] drm/i915: Implement a framework for batch buffer
>> pools
>>
>> From: Brad Volkin <bradley.d.volkin at intel.com>
>>
>> This adds a small module for managing a pool of batch buffers.
>> The only current use case is for the command parser, as described in the
>> kerneldoc in the patch. The code is simple, but separating it out makes it
>> easier to change the underlying algorithms and to extend to future use cases
>> should they arise.
>>
>> The interface is simple: init to create an empty pool, fini to clean it up, get to
>> obtain a new buffer. Note that all buffers are expected to be inactive before
>> cleaning up the pool.
>>
>> Locking is currently based on the caller holding the struct_mutex.
>> We already do that in the places where we will use the batch pool for the
>> command parser.
>>
>> v2:
>> - s/BUG_ON/WARN_ON/ for locking assertions
>> - Remove the cap on pool size
>> - Switch from alloc/free to init/fini
>>
>> v3:
>> - Idiomatic looping structure in _fini
>> - Correct handling of purged objects
>> - Don't return a buffer that's too much larger than needed
>>
>> v4:
>> - Rebased to latest -nightly
>>
>> v5:
>> - Remove _put() function and clean up comments to match
>>
>> v6:
>> - Move purged check inside the loop (danvet, from v4 1/7 feedback)
>>
>> Issue: VIZ-4719
>> Signed-off-by: Brad Volkin <bradley.d.volkin at intel.com>
>> ---
>>   Documentation/DocBook/drm.tmpl             |   5 +
>>   drivers/gpu/drm/i915/Makefile              |   1 +
>>   drivers/gpu/drm/i915/i915_drv.h            |  15 +++
>>   drivers/gpu/drm/i915/i915_gem.c            |   1 +
>>   drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152
>> +++++++++++++++++++++++++++++
>>   5 files changed, 174 insertions(+)
>>   create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>
>> diff --git a/Documentation/DocBook/drm.tmpl
>> b/Documentation/DocBook/drm.tmpl index 944e8ed..b5943d4 100644
>> --- a/Documentation/DocBook/drm.tmpl
>> +++ b/Documentation/DocBook/drm.tmpl
>> @@ -4028,6 +4028,11 @@ int num_ioctls;</synopsis>
>> !Idrivers/gpu/drm/i915/i915_cmd_parser.c
>>         </sect2>
>>         <sect2>
>> +        <title>Batchbuffer Pools</title>
>> +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
>> +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
>> +      </sect2>
>> +      <sect2>
>>           <title>Logical Rings, Logical Ring Contexts and Execlists</title>
>> !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and
>> Execlists  !Idrivers/gpu/drm/i915/intel_lrc.c
>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>> index e4083e4..c8dbb37d 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
>>
>>   # GEM code
>>   i915-y += i915_cmd_parser.o \
>> +	  i915_gem_batch_pool.o \
>>   	  i915_gem_context.o \
>>   	  i915_gem_render_state.o \
>>   	  i915_gem_debug.o \
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h index c4f2cb6..b12a062 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1154,6 +1154,12 @@ struct intel_l3_parity {
>>   	int which_slice;
>>   };
>>
>> +struct i915_gem_batch_pool {
>> +	struct drm_device *dev;
>> +	struct list_head active_list;
>> +	struct list_head inactive_list;
>> +};
>> +
>>   struct i915_gem_mm {
>>   	/** Memory allocator for GTT stolen memory */
>>   	struct drm_mm stolen;
>> @@ -1885,6 +1891,8 @@ struct drm_i915_gem_object {
>>   	/** Used in execbuf to temporarily hold a ref */
>>   	struct list_head obj_exec_link;
>>
>> +	struct list_head batch_pool_list;
>> +
>>   	/**
>>   	 * This is set if the object is on the active lists (has pending
>>   	 * rendering and so a non-zero seqno), and is not set if it i s on @@ -
>> 2849,6 +2857,13 @@ void i915_destroy_error_state(struct drm_device
>> *dev);  void i915_get_extra_instdone(struct drm_device *dev, uint32_t
>> *instdone);  const char *i915_cache_level_str(struct drm_i915_private *i915,
>> int type);
>>
>> +/* i915_gem_batch_pool.c */
>> +void i915_gem_batch_pool_init(struct drm_device *dev,
>> +			      struct i915_gem_batch_pool *pool); void
>> +i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct
>> +drm_i915_gem_object* i915_gem_batch_pool_get(struct
>> i915_gem_batch_pool
>> +*pool, size_t size);
>> +
>>   /* i915_cmd_parser.c */
>>   int i915_cmd_parser_get_version(void);
>>   int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git
>> a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 86cf428..b19dd06 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -4433,6 +4433,7 @@ void i915_gem_object_init(struct
>> drm_i915_gem_object *obj,
>>   	INIT_LIST_HEAD(&obj->ring_list);
>>   	INIT_LIST_HEAD(&obj->obj_exec_link);
>>   	INIT_LIST_HEAD(&obj->vma_list);
>> +	INIT_LIST_HEAD(&obj->batch_pool_list);
>>
>>   	obj->ops = ops;
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>> b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>> new file mode 100644
>> index 0000000..ccbe8f9
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>> @@ -0,0 +1,152 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> +obtaining a
>> + * copy of this software and associated documentation files (the
>> +"Software"),
>> + * to deal in the Software without restriction, including without
>> +limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> +sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom
>> +the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> +next
>> + * paragraph) shall be included in all copies or substantial portions
>> +of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> +EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> +MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>> EVENT
>> +SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
>> DAMAGES OR
>> +OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> +ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> OTHER
>> +DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "i915_drv.h"
>> +
>> +/**
>> + * DOC: batch pool
>> + *
>> + * In order to submit batch buffers as 'secure', the software command
>> +parser
>> + * must ensure that a batch buffer cannot be modified after parsing. It
>> +does
>> + * this by copying the user provided batch buffer contents to a kernel
>> +owned
>> + * buffer from which the hardware will actually execute, and by
>> +carefully
>> + * managing the address space bindings for such buffers.
>> + *
>> + * The batch pool framework provides a mechanism for the driver to
>> +manage a
>> + * set of scratch buffers to use for this purpose. The framework can be
>> + * extended to support other uses cases should they arise.
>> + */
>> +
>> +/**
>> + * i915_gem_batch_pool_init() - initialize a batch buffer pool
>> + * @dev: the drm device
>> + * @pool: the batch buffer pool
>> + */
>> +void i915_gem_batch_pool_init(struct drm_device *dev,
>> +			      struct i915_gem_batch_pool *pool) {
>> +	pool->dev = dev;
>> +	INIT_LIST_HEAD(&pool->active_list);
>> +	INIT_LIST_HEAD(&pool->inactive_list);
>> +}
>> +
>> +/**
>> + * i915_gem_batch_pool_fini() - clean up a batch buffer pool
>> + * @pool: the pool to clean up
>> + *
>> + * Note: Callers must hold the struct_mutex.
>> + */
>> +void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) {
>> +	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>> +
>> +	while (!list_empty(&pool->active_list)) {
>> +		struct drm_i915_gem_object *obj =
>> +			list_first_entry(&pool->active_list,
>> +					 struct drm_i915_gem_object,
>> +					 batch_pool_list);
>> +
>> +		WARN_ON(obj->active);
>> +
>> +		list_del_init(&obj->batch_pool_list);
>> +		drm_gem_object_unreference(&obj->base);
>> +	}
>> +
>> +	while (!list_empty(&pool->inactive_list)) {
>> +		struct drm_i915_gem_object *obj =
>> +			list_first_entry(&pool->inactive_list,
>> +					 struct drm_i915_gem_object,
>> +					 batch_pool_list);
>> +
>> +		list_del_init(&obj->batch_pool_list);
>> +		drm_gem_object_unreference(&obj->base);
>> +	}
>> +}
>> +
>> +/**
>> + * i915_gem_batch_pool_get() - select a buffer from the pool
>> + * @pool: the batch buffer pool
>> + * @size: the minimum desired size of the returned buffer
>> + *
>> + * Finds or allocates a batch buffer in the pool with at least the
>> +requested
>> + * size. The caller is responsible for any domain, active/inactive, or
>> + * purgeability management for the returned buffer.
>> + *
>> + * Note: Callers must hold the struct_mutex
>> + *
>> + * Return: the selected batch buffer object  */ struct
>> +drm_i915_gem_object * i915_gem_batch_pool_get(struct
>> +i915_gem_batch_pool *pool,
>> +			size_t size)
>> +{
>> +	struct drm_i915_gem_object *obj = NULL;
>> +	struct drm_i915_gem_object *tmp, *next;
>> +	bool was_purged;
>> +
>> +	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>> +
>> +	list_for_each_entry_safe(tmp, next,
>> +				 &pool->active_list, batch_pool_list) {
>> +		if (!tmp->active)
>> +			list_move_tail(&tmp->batch_pool_list,
>> +				       &pool->inactive_list);
>> +	}
>> +
>> +	do {
>> +		was_purged = false;
>> +
>> +		list_for_each_entry_safe(tmp, next,
>> +				&pool->inactive_list, batch_pool_list) {
>> +			/*
>> +			 * Select a buffer that is at least as big as needed
>> +			 * but not 'too much' bigger. A better way to do this
>> +			 * might be to bucket the pool objects based on size.
>> +			 */
>> +			if (tmp->base.size >= size &&
>> +			    tmp->base.size <= (2 * size)) {
>> +				obj = tmp;
>> +				break;
>> +			}
>> +
>> +			if (tmp->madv == __I915_MADV_PURGED) {
>> +				was_purged = true;
>> +				list_del(&tmp->batch_pool_list);
>> +				drm_gem_object_unreference(&tmp-
>>> base);
>> +			}
>> +		}
>> +	} while (was_purged);
>
> I don't understand what the outer-loop/was_purged is trying to do here.

There was some unhappiness about this from Daniel and Chris. Taking 
their feedback, ended up w/ an RFC snippet mentioned here intended to 
simplify and make it easier to discern the logic.

http://lists.freedesktop.org/archives/intel-gfx/2014-December/056579.html

In a single loop, we accomplish 2 goals (a) do some house cleaning while 
in the loop and (b) find a suitable obj from the pool (one that is not 
active and not purged).

They seemed happy w/ it. Will send it the next rev of the series.

Thanks,
Mike

>
> I might be missing something subtle, but the following scenarios seem to have issues:
>
> 	1) There are no suitable objects in the pool, but there are purgeable objects
> 		We'll loop through the whole list, purging any puregable objects.
> 		On completion of the inner loop, was_purged==true, so we'll go back through the
> 		whole list again. We still won't find any suitable objects, and won't find any more
> 		purgeable objects either, so the second pass is redundant.
>
> 	2) There is a suitable object, but there are purgeable objects before it
> 		We'll purge objects up to the first suitable object, and break out of the inner loop
> 		We'll then go back through a second time because was_purged is set, and select
> 		the exact same object as we selected last time.
> 		Any purgeable objects after the selected object will remain un-purged
>
> If the intention here is to ensure we always purge all purgeable objects, then I
> think we just need to remove the break, and the outer-loop, along with was_purged.
> This will always traverse the entire list, and end with obj set to the last suitable object
> in the list (if any).
>
> If you want to be optimal you could adjust the condition to only choose objects that are
> smaller than any previously selected object.
>
>> +
>> +	if (!obj) {
>> +		obj = i915_gem_alloc_object(pool->dev, size);
>> +		if (!obj)
>> +			return ERR_PTR(-ENOMEM);
>> +
>> +		list_add_tail(&obj->batch_pool_list, &pool->inactive_list);
>> +	}
>> +
>> +	list_move_tail(&obj->batch_pool_list, &pool->active_list);
>> +
>> +	return obj;
>> +}
>> --
>> 1.9.1
>