[PATCH v5 07/19] media: vsp1: dl: Support one-shot entries in the display list

Laurent Pinchart laurent.pinchart at ideasonboard.com
Thu Mar 7 13:48:23 UTC 2019


Hi Liviu,

On Thu, Mar 07, 2019 at 11:52:18AM +0000, Liviu Dudau wrote:
> On Wed, Mar 06, 2019 at 08:01:53PM +0200, Laurent Pinchart wrote:
> > On Wed, Mar 06, 2019 at 02:20:51PM +0000, Liviu Dudau wrote:
> >> On Wed, Mar 06, 2019 at 01:14:40AM +0200, Laurent Pinchart wrote:
> >>> On Fri, Feb 22, 2019 at 03:06:19PM +0000, Brian Starkey wrote:
> >>>> On Fri, Feb 22, 2019 at 04:46:29PM +0200, Laurent Pinchart wrote:
> >>>>> On Fri, Feb 22, 2019 at 02:30:03PM +0000, Brian Starkey wrote:
> >>>>>> On Thu, Feb 21, 2019 at 12:32:00PM +0200, Laurent Pinchart wrote:
> >>>>>>> One-shot entries are used as an alternative to committing a complete new
> >>>>>>> display list when a couple of registers need to be written for one frame
> >>>>>>> and then reset to another value for all subsequent frames. This will be
> >>>>>>> used to implement writeback support that will need to enable writeback
> >>>>>>> for the duration of a single frame.
> >>>>>>> 
> >>>>>>> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
> >>>>>>> ---
> >>>>>>>  drivers/media/platform/vsp1/vsp1_dl.c | 78 +++++++++++++++++++++++++++
> >>>>>>>  drivers/media/platform/vsp1/vsp1_dl.h |  3 ++
> >>>>>>>  2 files changed, 81 insertions(+)
> >>>>>>> 
> >>>>>>> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c
> >>>>>>> index 886b3a69d329..7b4d252bfde7 100644
> >>>>>>> --- a/drivers/media/platform/vsp1/vsp1_dl.c
> >>>>>>> +++ b/drivers/media/platform/vsp1/vsp1_dl.c
> >>>>>>> @@ -115,6 +115,12 @@ struct vsp1_dl_body {
> >>>>>>>  
> >>>>>>>  	unsigned int num_entries;
> >>>>>>>  	unsigned int max_entries;
> >>>>>>> +
> >>>>>>> +	unsigned int num_patches;
> >>>>>>> +	struct {
> >>>>>>> +		struct vsp1_dl_entry *entry;
> >>>>>>> +		u32 data;
> >>>>>>> +	} patches[2];
> >>>>>>>  };
> >>>>>>>  
> >>>>>>>  /**
> >>>>>>> @@ -361,6 +367,7 @@ void vsp1_dl_body_put(struct vsp1_dl_body *dlb)
> >>>>>>>  		return;
> >>>>>>>  
> >>>>>>>  	dlb->num_entries = 0;
> >>>>>>> +	dlb->num_patches = 0;
> >>>>>>>  
> >>>>>>>  	spin_lock_irqsave(&dlb->pool->lock, flags);
> >>>>>>>  	list_add_tail(&dlb->free, &dlb->pool->free);
> >>>>>>> @@ -388,6 +395,47 @@ void vsp1_dl_body_write(struct vsp1_dl_body *dlb, u32 reg, u32 data)
> >>>>>>>  	dlb->num_entries++;
> >>>>>>>  }
> >>>>>>>  
> >>>>>>> +/**
> >>>>>>> + * vsp1_dl_body_write_oneshot - Write a register to a display list body for a
> >>>>>>> + *	single frame
> >>>>>>> + * @dlb: The body
> >>>>>>> + * @reg: The register address
> >>>>>>> + * @value: The register value
> >>>>>>> + * @reset_value: The value to reset the register to at the next vblank
> >>>>>>> + *
> >>>>>>> + * Display lists in continuous mode are re-used by the hardware for successive
> >>>>>>> + * frames until a new display list is committed. Changing the VSP configuration
> >>>>>>> + * normally requires creating and committing a new display list. This function
> >>>>>>> + * offers an alternative race-free way by writing a @value to the @register in
> >>>>>>> + * the display list body for a single frame, specifying in @reset_value the
> >>>>>>> + * value to reset the register to one vblank after the display list is
> >>>>>>> + * committed.
> >>>>>>> + *
> >>>>>>> + * The maximum number of one-shot entries is limited to 2 per display list body,
> >>>>>>> + * and one-shot entries are counted in the total number of entries specified
> >>>>>>> + * when the body is allocated by vsp1_dl_body_alloc().
> >>>>>>> + */
> >>>>>>> +void vsp1_dl_body_write_oneshot(struct vsp1_dl_body *dlb, u32 reg, u32 value,
> >>>>>>> +				u32 reset_value)
> >>>>>>> +{
> >>>>>>> +	if (WARN_ONCE(dlb->num_entries >= dlb->max_entries,
> >>>>>>> +		      "DLB size exceeded (max %u)", dlb->max_entries))
> >>>>>>> +		return;
> >>>>>>> +
> >>>>>>> +	if (WARN_ONCE(dlb->num_patches >= ARRAY_SIZE(dlb->patches),
> >>>>>>> +		      "DLB patches size exceeded (max %zu)",
> >>>>>>> +		      ARRAY_SIZE(dlb->patches)))
> >>>>>>> +		return;
> >>>>>>> +
> >>>>>>> +	dlb->patches[dlb->num_patches].entry = &dlb->entries[dlb->num_entries];
> >>>>>>> +	dlb->patches[dlb->num_patches].data = reset_value;
> >>>>>>> +	dlb->num_patches++;
> >>>>>>> +
> >>>>>>> +	dlb->entries[dlb->num_entries].addr = reg;
> >>>>>>> +	dlb->entries[dlb->num_entries].data = value;
> >>>>>>> +	dlb->num_entries++;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  /* -----------------------------------------------------------------------------
> >>>>>>>   * Display List Extended Command Management
> >>>>>>>   */
> >>>>>>> @@ -652,6 +700,7 @@ static void __vsp1_dl_list_put(struct vsp1_dl_list *dl)
> >>>>>>>  	 * has at least one body, thus we reinitialise the entries list.
> >>>>>>>  	 */
> >>>>>>>  	dl->body0->num_entries = 0;
> >>>>>>> +	dl->body0->num_patches = 0;
> >>>>>>>  
> >>>>>>>  	list_add_tail(&dl->list, &dl->dlm->free);
> >>>>>>>  }
> >>>>>>> @@ -930,6 +979,35 @@ void vsp1_dl_list_commit(struct vsp1_dl_list *dl, unsigned int dl_flags)
> >>>>>>>   * Display List Manager
> >>>>>>>   */
> >>>>>>>  
> >>>>>>> +/**
> >>>>>>> + * vsp1_dlm_irq_display_start - Display list handler for the display start
> >>>>>>> + *	interrupt
> >>>>>>> + * @dlm: the display list manager
> >>>>>>> + *
> >>>>>>> + * Apply all one-shot patches registered for the active display list.
> >>>>>>> + */
> >>>>>>> +void vsp1_dlm_irq_display_start(struct vsp1_dl_manager *dlm)
> >>>>>>> +{
> >>>>>>> +	struct vsp1_dl_body *dlb;
> >>>>>>> +	struct vsp1_dl_list *dl;
> >>>>>>> +	unsigned int i;
> >>>>>>> +
> >>>>>>> +	spin_lock(&dlm->lock);
> >>>>>>> +
> >>>>>>> +	dl = dlm->active;
> >>>>>>> +	if (!dl)
> >>>>>>> +		goto done;
> >>>>>>> +
> >>>>>>> +	list_for_each_entry(dlb, &dl->bodies, list) {
> >>>>>>> +		for (i = 0; i < dlb->num_patches; ++i)
> >>>>>>> +			dlb->patches[i].entry->data = dlb->patches[i].data;
> >>>>>>> +		dlb->num_patches = 0;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>> +done:
> >>>>>>> +	spin_unlock(&dlm->lock);
> >>>>>>> +}
> >>>>>>> +
> >>>>>> 
> >>>>>> We've got some HW which doesn't support one-shot writeback, and use a
> >>>>>> similar trick to try and disable writeback immediately after the flip.
> >>>>>> 
> >>>>>> We ran into issues where the "start" interrupt wouldn't run in time to
> >>>>>> make sure the writeback disable was committed before the next frame.
> >>>>>> We have to keep track of whether the disable really happened in time,
> >>>>>> before we release the output buffer.
> >>>>>> 
> >>>>>> Might you have a similar problem here?
> >>>>> 
> >>>>> We may, but there's no provision at the hardware level to check if the
> >>>>> configuration updated happened in time. I could add some safety checks
> >>>>> but I believe they would be racy in the best case :-(
> >>>> 
> >>>> We managed to find (what I believe to be...) a non-racy way, but it
> >>>> will of course depend a lot on the HW behaviour, so I'll leave it to
> >>>> your best judgement.
> >>>> 
> >>>> We basically have a "configuration committed" interrupt which we can
> >>>> use to set a flag indicating writeback was disabled.
> >>> 
> >>> The way my hardware is operated is, roughly, as follows:
> >>> 
> >>> - The driver prepares a list of values to write to registers and stores
> >>>   them in DMA-able memory. This doesn't involve the hardware, and so is
> >>>   completely asynchronous to hardware operation.
> >>> 
> >>> - The driver then sets a register with the pointer to the registers
> >>>   list.
> >>> 
> >>> - At the end of the current frame, the hardware reads the memory address
> >>>   of the registers list and DMAs values to register. This operation is
> >>>   fast and occurs fully during vertical blanking.
> >>> 
> >>> - The hardware then waits for the start of the frame, and begins
> >>>   processing the framebuffers to send the frame to the display.
> >>> 
> >>> - If no new registers list is provided for the next frame, the current
> >>>   list is reused.
> >> 
> >> Does this mean the hardware goes back to step 3 and re-reads the memory
> >> using DMA?
> > 
> > That's correct. At frame end the hardware will always read and apply a
> > registers list, either a new one or the old one if no new list is
> > programmed.
> > 
> >>> Two interrupts are provided, one at the start of the frame and one at
> >>> the end of the frame. The driver uses the end of frame interrupt to
> >>> signal vertical blanking and page flip completion to DRM.
> >>> 
> >>> The end of frame interrupt is also used to schedule atomic commits. The
> >>> hardware queue depth for atomic commits is just one, so the driver has
> >>> to wait until the previous commit has been processed before writing the
> >>> new registers list address to the hardware. To solve the race between
> >>> the end of frame interrupt and the address write, the hardware provides
> >>> a status bit that is set to 1 when the address is written, and reset to
> >>> 0 when the hardware starts processing the registers list.
> >> 
> >> For my understanding, the following simplication is then true: status = 0
> >> means driver is allowed to update the pointer register, status = 1 means
> >> currently programmed pointer is used to fetch the register values, right?
> > 
> > Almost. status = 1 means that the pointer has been programmed by the
> > CPU, but not taken into account by the hardware yet. status = 0 means
> > that the pointer hasn't been written to by the CPU since the last time
> > the hardware read it.
> 
> Right, so CPU writes to the pointer register and that sets status to 1,
> and when the HW has copied that value into internal DMA registers it clears
> it, correct?

Yes, that's correct.

> >> What happens if you update the pointer while status = 1? DMA transfer gets
> >> interrupted and a new one is scheduled? Does the DMA transfer get cancelled?
> > 
> > You should not do that, as it's racy. The hardware will use either the
> > old value of the pointer or new new one, based on the timings of the
> > reprogrammation, and you can't tell which one is used. Nothing can
> > cancel the DMA, when it's time to start the DMA the hardware will
> > transfer the value of the pointer to an internal register and use that
> > for DMA. Reprogramming the pointer during the DMA will not affect the
> > DMA, the new value will be taken into account for the next DMA.
> 
> It looks to me like you can do the in-place update of the writeback
> framebuffer address at the end of the current frame (vblank time(?)). If
> you wait until status = 0 then you know the next frame parameters are
> being "internalised", so you can set in the commit you're about to queue
> the disabling of the writeback if that commit doesn't have a fresh
> writeback request (phew, that's a mouthful).

My problem is that there may not be a next commit, if userspace hasn't
queued one. Otherwise this is what I would do. My initial implementation
was queuing two commits for writeback, one with writeback enabled, and
immediately after a second one copied from the first but with writeback
disabled. This halved the frame rate, as the next commit from userspace
had to first wait for completion of the writeback disabling commit. The
real issue here is that I would need to queue the writeback disabling
commit right after the writeback enabling commit to make sure it gets
processed for the next frame, but once its queued any userspace commit
would need to wait one extra frame as a queued commit can be processed
at any time and thus can't be replaced. That's why I decided to modify
the registers list in place instead of queueing a new commit to disable
writeback.

> I don't think you need to wait until the next start of frame interrupt
> to do that. Also, vblank time is probably the time you want to signal
> the completion of the previous writeback anyway, right?

Correct, that's when I signal it.

> >>> We thus have three states for an atomic commit:
> >>> 
> >>> - active, where the corresponding registers list address has been
> >>>   written to the hardware, and processed
> >>> 
> >>> - queued, where the corresponding registers list address has been
> >>>   written to the hardware but not processed yet
> >>> 
> >>> - pending, where the corresponding registers list address hasn't been
> >>>   written to the hardware yet
> >>> 
> >>> The status bit mentioned above allows us to tell if a list exists in the
> >>> queued state.
> >>> 
> >>> At frame end time, if the status bit is set, we have potentially lost
> >>> the race between writing the new registers list and the frame end
> >>> interrupt, so we wait for one more vblank. Otherwise, if a list was
> >>> queued, we move it to the active state, and retire the active list. If a
> >>> list was pending, we write its address to the hardware, and move it to
> >>> the queued state.
> 
> It looks to me like the moving of the pending state into queued state is your
> opportunity to also in-place modify the writeback registers if the state does
> not have its own writeback request.

Moving from pending to queued means the pointer has been given to the
hardware, but not processed yet. I need to wait until the commit that
enables writeback is fully processed before modifying it in-place to
disable writeback, and that's at the frame start following the move from
the queued state to the active state.

> >>> To schedule writeback I ended up using the frame start interrupt.
> >>> Writeback has a specific need in that it requires enabling the memory
> >>> write interface for a single frame, and that is not something the
> >>> hardware supports directly. I can't either queue a new registers list
> >>> with the write interface disabled right after the list with the
> >>> writeback request became active, as any atomic commit would then be
> >>> delayed by an extra frame. I thus ended up modifying the registers list
> >>> in place at frame start time, which is right after the active register
> >>> list has been DMA'ed to hardware registers. We have the duration of a
> >>> whole frame to do so, before the hardware transfers the same list again
> >>> at the beginning of the next frame.
> >> 
> >> Yes, that is like to what we do for DP500 in mali-dp, where we have a
> >> similar situation. HW will continue to stream to the given buffer until
> >> stopped, but the writeback API is one-shot mode, so on vblank (when we
> >> know the writeback will start at the beginning of the new frame) we
> >> program the memwrite to stop, but that only takes effect when we also
> >> program the GO bit (CONFIG_VALID in our case).
> >> 
> >> One problem we had to take care for DP500 was when the next commit (the
> >> queued one in your case) also contains a writeback buffer. In that case,
> >> we don't stop the writeback but "re-start" it.
> >> 
> >>> There is a race condition there, as the in-place modification of the
> >>> active list could be done after the end of the frame if the interrupt
> >>> latency gets too large. I don't see how I could solve that, as I could
> >>> write the value after the hardware starts processing the active list at
> >>> frame end time, but before the interrupt is notified.
> >> 
> >> I think that would be a better idea if the HW finishes to transfer
> >> before the end of frame interrupt is signalled. Otherwise if your
> >> interrupt handler is scheduled too fast, you might overwrite the active
> >> frame values?
> > 
> > That can't happen, as I patch the active registers list in-place in the
> > frame start interrupt handler, and frame start occurs after the hardware
> > finishes the DMA of the registers list.
> > 
> >>>>> Note that we have the duration of a complete frame to disable writeback,
> >>>>> as we receive an interrupt when the frame starts, and have until vblank
> >>>>> to update the configuration. It's thus slightly better than having to
> >>>>> disable writeback between vblank and the start of the next frame.
> >>>> 
> >>>> Yeah... we have a whole frame too. I'm struggling to find our wiki
> >>>> page with the data, but anecdotally there's some (out-of-tree) drivers
> >>>> which keep interrupts masked for a _really long_ time. It's nice if
> >>>> you don't have to care about those :-)
> >>> 
> >>> I only provide two options: not my problem, or give me the source code
> >>> and documentation and let me upstream a clean version of the out-of-tree
> >>> drivers :-)
> >>> 
> >>>>>>>  /**
> >>>>>>>   * vsp1_dlm_irq_frame_end - Display list handler for the frame end interrupt
> >>>>>>>   * @dlm: the display list manager
> >>>>>>> diff --git a/drivers/media/platform/vsp1/vsp1_dl.h b/drivers/media/platform/vsp1/vsp1_dl.h
> >>>>>>> index e0fdb145e6ed..f845607abc4c 100644
> >>>>>>> --- a/drivers/media/platform/vsp1/vsp1_dl.h
> >>>>>>> +++ b/drivers/media/platform/vsp1/vsp1_dl.h
> >>>>>>> @@ -54,6 +54,7 @@ struct vsp1_dl_manager *vsp1_dlm_create(struct vsp1_device *vsp1,
> >>>>>>>  					unsigned int prealloc);
> >>>>>>>  void vsp1_dlm_destroy(struct vsp1_dl_manager *dlm);
> >>>>>>>  void vsp1_dlm_reset(struct vsp1_dl_manager *dlm);
> >>>>>>> +void vsp1_dlm_irq_display_start(struct vsp1_dl_manager *dlm);
> >>>>>>>  unsigned int vsp1_dlm_irq_frame_end(struct vsp1_dl_manager *dlm);
> >>>>>>>  struct vsp1_dl_body *vsp1_dlm_dl_body_get(struct vsp1_dl_manager *dlm);
> >>>>>>>  
> >>>>>>> @@ -71,6 +72,8 @@ struct vsp1_dl_body *vsp1_dl_body_get(struct vsp1_dl_body_pool *pool);
> >>>>>>>  void vsp1_dl_body_put(struct vsp1_dl_body *dlb);
> >>>>>>>  
> >>>>>>>  void vsp1_dl_body_write(struct vsp1_dl_body *dlb, u32 reg, u32 data);
> >>>>>>> +void vsp1_dl_body_write_oneshot(struct vsp1_dl_body *dlb, u32 reg, u32 value,
> >>>>>>> +				u32 reset_value);
> >>>>>>>  int vsp1_dl_list_add_body(struct vsp1_dl_list *dl, struct vsp1_dl_body *dlb);
> >>>>>>>  int vsp1_dl_list_add_chain(struct vsp1_dl_list *head, struct vsp1_dl_list *dl);
> >>>>>>>  

-- 
Regards,

Laurent Pinchart


More information about the dri-devel mailing list