[RFC PATCH 4/4] drm/panthor: Use GPU_COMMAND.FLUSH_CACHES for cache maintenance

Mon Dec 23 17:05:06 UTC 2024

On 19/12/2024 17:05, Karunika Choo wrote:
> This patch adds support for performing cache maintenance operations via
> the GPU_CONTROL.GPU_COMMAND register instead of using FLUSH_PT or
> FLUSH_MEM commands from the AS_COMMAND register. This feature is enabled
> when the HW feature bit (PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH) is
> set.
> 
> Signed-off-by: Karunika Choo <karunika.choo at arm.com>
> ---
>  drivers/gpu/drm/panthor/panthor_gpu.c |  2 +-
>  drivers/gpu/drm/panthor/panthor_hw.c  |  3 ++
>  drivers/gpu/drm/panthor/panthor_hw.h  |  4 +++
>  drivers/gpu/drm/panthor/panthor_mmu.c | 46 +++++++++++++++++++++++++--
>  4 files changed, 52 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 9dadcea67a39..30dcb50409dd 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -276,7 +276,7 @@ int panthor_gpu_flush_caches(struct panthor_device *ptdev,
>  
>  	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
>  				!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
> -				msecs_to_jiffies(100))) {
> +				msecs_to_jiffies(1000))) {

Unrelated change (or at least not mentioned in the commit message).

>  		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
>  		if ((ptdev->gpu->pending_reqs &
>  		     GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
> diff --git a/drivers/gpu/drm/panthor/panthor_hw.c b/drivers/gpu/drm/panthor/panthor_hw.c
> index 0fb3adc093bc..3738f7fd106e 100644
> --- a/drivers/gpu/drm/panthor/panthor_hw.c
> +++ b/drivers/gpu/drm/panthor/panthor_hw.c
> @@ -20,6 +20,9 @@ static struct panthor_hw panthor_hw_devices[] = {
>  	[PANTHOR_ARCH_10_8] = {
>  		.arch_id = GPU_ARCH_ID_MAKE(10, 8, 0),
>  		.arch_mask = GPU_ARCH_ID_MAKE(0xFF, 0, 0),
> +		.features = {
> +			BIT(PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH)
> +		},

It's a little odd to have a 'feature bit' to declare something that (so
far) every GPU does. Do we need this feature bit?

>  		.map = {
>  			.mmu_as_base = ARCH_10_8_MMU_AS_BASE,
>  			.mmu_as_stride = ARCH_10_8_MMU_AS_STRIDE,
> diff --git a/drivers/gpu/drm/panthor/panthor_hw.h b/drivers/gpu/drm/panthor/panthor_hw.h
> index 3409083d09d0..69fa8f51a8c9 100644
> --- a/drivers/gpu/drm/panthor/panthor_hw.h
> +++ b/drivers/gpu/drm/panthor/panthor_hw.h
> @@ -13,6 +13,10 @@ struct panthor_device;
>   * New feature flags will be added with support for newer GPU architectures.
>   */
>  enum panthor_hw_feature {
> +	/** @PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH: Cache maintenance via GPU_CONTROL*/
> +	PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH,
> +
> +	/** @PANTHOR_HW_FEATURES_END: Number of HW feature bits */
>  	PANTHOR_HW_FEATURES_END
>  };
>  
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 8a190dd2e06c..91c420538e02 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -29,6 +29,7 @@
>  
>  #include "panthor_device.h"
>  #include "panthor_gem.h"
> +#include "panthor_gpu.h"
>  #include "panthor_heap.h"
>  #include "panthor_hw.h"
>  #include "panthor_mmu.h"
> @@ -533,12 +534,19 @@ static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
>  
>  	/* write AS_COMMAND when MMU is ready to accept another command */
>  	status = wait_ready(ptdev, as_nr);
> -	if (!status)
> -		gpu_write(ptdev, MMU_AS(ptdev, as_nr) + AS_COMMAND, cmd);
> +	if (status)
> +		return status;
> +
> +	gpu_write(ptdev, MMU_AS(ptdev, as_nr) + AS_COMMAND, cmd);

Please try to put simple cleanups like this in a separate patch -
there's no functional change here.

Steve

>  
>  	return status;
>  }
>  
> +static int unlock_region(struct panthor_device *ptdev, u32 as_nr)
> +{
> +	return write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK);
> +}
> +
>  static void lock_region(struct panthor_device *ptdev, u32 as_nr,
>  			u64 region_start, u64 size)
>  {
> @@ -573,6 +581,36 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr,
>  	write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
>  }
>  
> +static int mmu_hw_do_flush_on_gpu_ctrl(struct panthor_device *ptdev, int as_nr,
> +				       u32 op)
> +{
> +	const u32 l2_flush = CACHE_CLEAN | CACHE_INV;
> +	u32 lsc_flush = 0;
> +	int ret;
> +
> +	if (op == AS_COMMAND_FLUSH_MEM)
> +		lsc_flush = CACHE_CLEAN | CACHE_INV;
> +
> +	ret = wait_ready(ptdev, as_nr);
> +	if (ret)
> +		return ret;
> +
> +	ret = panthor_gpu_flush_caches(ptdev, l2_flush, lsc_flush, 0);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Explicitly unlock the region as the AS is not unlocked
> +	 * automatically at the end of the operation, unlike FLUSH_MEM
> +	 * or FLUSH_PT.
> +	 */
> +	ret = unlock_region(ptdev, as_nr);
> +	if (ret)
> +		return ret;
> +
> +	return wait_ready(ptdev, as_nr);
> +}
> +
>  static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
>  				      u64 iova, u64 size, u32 op)
>  {
> @@ -590,6 +628,10 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
>  	if (op != AS_COMMAND_UNLOCK)
>  		lock_region(ptdev, as_nr, iova, size);
>  
> +	if (panthor_hw_supports(ptdev, PANTHOR_HW_FEATURE_GPU_CTRL_CACHE_FLUSH) &&
> +	    (op == AS_COMMAND_FLUSH_PT || op == AS_COMMAND_FLUSH_MEM))
> +		return mmu_hw_do_flush_on_gpu_ctrl(ptdev, as_nr, op);
> +
>  	/* Run the MMU operation */
>  	write_cmd(ptdev, as_nr, op);
>