[RFC PATCH v3 15/21] drm/exec: Add a snapshot capability

Thomas Hellström thomas.hellstrom at linux.intel.com
Wed May 22 14:41:04 UTC 2024


On Wed, 2024-05-22 at 15:54 +0200, Thomas Hellström wrote:
> On Wed, 2024-05-22 at 13:27 +0200, Christian König wrote:
> > Am 21.05.24 um 09:16 schrieb Thomas Hellström:
> > > When validating a buffer object for submission, we might need to
> > > lock
> > > a number of object for eviction to make room for the validation.
> > > 
> > > This makes it pretty likely that validation will eventually
> > > succeed,
> > > since eventually the validating process will hold most dma_resv
> > > locks
> > > of the buffer objects residing in the memory type being validated
> > > for.
> > > 
> > > However, once validation of a single object has succeeded it
> > > might
> > > not
> > > be beneficial to hold on to those locks anymore, and the
> > > validator
> > > would want to drop the locks of all objects taken during
> > > validation.
> > 
> > Exactly avoiding that was one of the goals of developing the
> > drm_exec
> > object.
> > 
> > When objects are unlocked after evicting them it just gives
> > concurrent 
> > operations an opportunity to lock them and re-validate them into
> > the 
> > contended domain.
> > 
> > So why should that approach here be beneficial at all?
> 
> It's a matter of being nice to the rest of the system while *still
> guaranteeing progress*. For each object we're trying to validate, we
> keep on evicting other objects until we make progress even if we lock
> all the objects in the domain.
> 
> If we were unlocking after each eviction, we can't really guarantee
> progress.
> 
> OTOH, a concurrent locker of the object may well be one with higher
> priority (lower ticket number) just wanting to perform a pagefault
> 
> So it's a tradeoff between locking just locking other processes out
> to
> allow us to make one step of progress and to in addition hit them
> with
> the big sledgehammer.

I thought I'd also mention that the ideal solution here I think would
be to have an rw_mutex per manager. Ordinary allocations take it in
read mode, evictions take it in write mode. Now the bad thing is it
sits in between ww_mutexes so it would have to be a ww_rw_mutex which
would probably be too nasty to implement.

/Thomas

> 
> /Thomas
> 
> > 
> > Regards,
> > Christian.
> > 
> > > 
> > > Introduce a drm_exec snapshot functionality that can be used to
> > > record the locks held at a certain time, and a restore
> > > functionality
> > > that restores the drm_exec state to the snapshot by dropping all
> > > locks.
> > > 
> > > Snapshots can be nested if needed.
> > > 
> > > Cc: Christian König <christian.koenig at amd.com>
> > > Cc: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> > > Cc: Matthew Brost <matthew.brost at intel.com>
> > > Cc: <dri-devel at lists.freedesktop.org>
> > > Signed-off-by: Thomas Hellström
> > > <thomas.hellstrom at linux.intel.com>
> > > ---
> > >   drivers/gpu/drm/drm_exec.c | 55
> > > +++++++++++++++++++++++++++++++++++++-
> > >   include/drm/drm_exec.h     | 23 +++++++++++++++-
> > >   2 files changed, 76 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/drm_exec.c
> > > b/drivers/gpu/drm/drm_exec.c
> > > index 1383680ffa4a..9eea5d0d3a98 100644
> > > --- a/drivers/gpu/drm/drm_exec.c
> > > +++ b/drivers/gpu/drm/drm_exec.c
> > > @@ -57,6 +57,7 @@ static void drm_exec_unlock_all(struct drm_exec
> > > *exec)
> > >   	struct drm_gem_object *obj;
> > >   	unsigned long index;
> > >   
> > > +	WARN_ON(exec->snap);
> > >   	drm_exec_for_each_locked_object_reverse(exec, index,
> > > obj)
> > > {
> > >   		dma_resv_unlock(obj->resv);
> > >   		drm_gem_object_put(obj);
> > > @@ -90,6 +91,7 @@ void drm_exec_init(struct drm_exec *exec, u32
> > > flags, unsigned nr)
> > >   	exec->num_objects = 0;
> > >   	exec->contended = DRM_EXEC_DUMMY;
> > >   	exec->prelocked = NULL;
> > > +	exec->snap = NULL;
> > >   }
> > >   EXPORT_SYMBOL(drm_exec_init);
> > >   
> > > @@ -301,7 +303,6 @@ int drm_exec_lock_obj(struct drm_exec *exec,
> > > struct drm_gem_object *obj)
> > >   		goto error_unlock;
> > >   
> > >   	return 0;
> > > -
> > >   error_unlock:
> > >   	dma_resv_unlock(obj->resv);
> > >   	return ret;
> > > @@ -395,5 +396,57 @@ int drm_exec_prepare_array(struct drm_exec
> > > *exec,
> > >   }
> > >   EXPORT_SYMBOL(drm_exec_prepare_array);
> > >   
> > > +/**
> > > + * drm_exec_restore() - Restore the drm_exec state to the point
> > > of
> > > a snapshot.
> > > + * @exec: The drm_exec object with the state.
> > > + * @snap: The snapshot state.
> > > + *
> > > + * Restores the drm_exec object by means of unlocking and
> > > dropping
> > > references
> > > + * to objects locked after the snapshot.
> > > + */
> > > +void drm_exec_restore(struct drm_exec *exec, struct
> > > drm_exec_snapshot *snap)
> > > +{
> > > +	struct drm_gem_object *obj;
> > > +	unsigned int index;
> > > +
> > > +	exec->snap = snap->saved_snap;
> > > +
> > > +	drm_exec_for_each_locked_object_reverse(exec, index,
> > > obj)
> > > {
> > > +		if (index + 1 == snap->num_locked)
> > > +			break;
> > > +
> > > +		dma_resv_unlock(obj->resv);
> > > +		drm_gem_object_put(obj);
> > > +		exec->objects[index] = NULL;
> > > +	}
> > > +
> > > +	exec->num_objects = snap->num_locked;
> > > +
> > > +	if (!exec->prelocked)
> > > +		exec->prelocked = snap->prelocked;
> > > +	else
> > > +		drm_gem_object_put(snap->prelocked);
> > > +}
> > > +EXPORT_SYMBOL(drm_exec_restore);
> > > +
> > > +/**
> > > + * drm_exec_snapshot() - Take a snapshot of the drm_exec state
> > > + * @exec: The drm_exec object with the state.
> > > + * @snap: The snapshot state.
> > > + *
> > > + * Records the @exec state in @snap. The @snap object is
> > > typically
> > > allocated
> > > + * in the stack of the caller.
> > > + */
> > > +void drm_exec_snapshot(struct drm_exec *exec, struct
> > > drm_exec_snapshot *snap)
> > > +{
> > > +	snap->num_locked = exec->num_objects;
> > > +	snap->prelocked = exec->prelocked;
> > > +	if (snap->prelocked)
> > > +		drm_gem_object_get(snap->prelocked);
> > > +	snap->saved_snap = exec->snap;
> > > +	exec->snap = snap;
> > > +}
> > > +EXPORT_SYMBOL(drm_exec_snapshot);
> > > +
> > >   MODULE_DESCRIPTION("DRM execution context");
> > >   MODULE_LICENSE("Dual MIT/GPL");
> > > diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
> > > index ea0f2117ee0c..0ce4d749511b 100644
> > > --- a/include/drm/drm_exec.h
> > > +++ b/include/drm/drm_exec.h
> > > @@ -19,7 +19,6 @@ struct drm_exec {
> > >   	 * @flags: Flags to control locking behavior
> > >   	 */
> > >   	u32                     flags;
> > > -
> > >   	/**
> > >   	 * @ticket: WW ticket used for acquiring locks
> > >   	 */
> > > @@ -49,6 +48,25 @@ struct drm_exec {
> > >   	 * @prelocked: already locked GEM object due to
> > > contention
> > >   	 */
> > >   	struct drm_gem_object *prelocked;
> > > +
> > > +	/**
> > > +	 * @snap: Pointer to the last snapshot taken or NULL if
> > > none.
> > > +	 */
> > > +	struct drm_exec_snapshot *snap;
> > > +};
> > > +
> > > +/**
> > > + * struct drm_exec_snapshot - drm_exec snapshot information
> > > + */
> > > +struct drm_exec_snapshot {
> > > +	/** @saved_snap: Pointer to the previous snapshot or
> > > NULL.
> > > */
> > > +	struct drm_exec_snapshot *saved_snap;
> > > +
> > > +	/** @prelocked: Refcounted pointer to the prelocked
> > > object
> > > at snapshot time. */
> > > +	struct drm_gem_object *prelocked;
> > > +
> > > +	/** @num_locked: Number of locked objects at snapshot
> > > time. */
> > > +	unsigned long num_locked;
> > >   };
> > >   
> > >   int drm_exec_handle_contended(struct drm_exec *exec);
> > > @@ -160,5 +178,8 @@ int drm_exec_prepare_array(struct drm_exec
> > > *exec,
> > >   			   struct drm_gem_object **objects,
> > >   			   unsigned int num_objects,
> > >   			   unsigned int num_fences);
> > > +void drm_exec_snapshot(struct drm_exec *exec, struct
> > > drm_exec_snapshot *snap);
> > > +void drm_exec_restore(struct drm_exec *exec, struct
> > > drm_exec_snapshot *snap);
> > > +
> > >   
> > >   #endif
> > 
> 



More information about the dri-devel mailing list