[Intel-xe] [PATCH v3 2/9] drm/xe/rtp: Split rtp process initialization

Wed May 17 17:11:26 UTC 2023

On Tue, May 16, 2023 at 03:19:43PM -0700, Lucas De Marchi wrote:
> The selection between hwe and gt is exposed to the outside of rtp, by
> the xe_rtp_process() function. It doesn't make sense to pass a hwe and a
> gt as argument since the gt should always be the one containing the hwe.
> 
> This clarifies the interface by separating the context creation into an
> initializer. The initializer then passes the correct value and there
> should never be a case with hwe and gt set. Internally the functions
> continue receiving the argument separately.
> 
> Also, since most of the rtp rules match against device characteristics,
> add support for processing rules based solely on the device. This will
> be used by follow up commits to process some workarounds.
> 
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>  drivers/gpu/drm/xe/tests/xe_rtp_test.c |  3 +-
>  drivers/gpu/drm/xe/xe_hw_engine.c      |  8 +--
>  drivers/gpu/drm/xe/xe_reg_whitelist.c  |  4 +-
>  drivers/gpu/drm/xe/xe_rtp.c            | 71 ++++++++++++++++++++------
>  drivers/gpu/drm/xe/xe_rtp.h            | 10 +++-
>  drivers/gpu/drm/xe/xe_rtp_types.h      | 15 ++++++
>  drivers/gpu/drm/xe/xe_tuning.c         |  8 ++-
>  drivers/gpu/drm/xe/xe_wa.c             | 12 +++--
>  8 files changed, 102 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
> index 4b2aac5ccf28..f96ef1987719 100644
> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
> @@ -238,10 +238,11 @@ static void xe_rtp_process_tests(struct kunit *test)
>  	struct xe_device *xe = test->priv;
>  	struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr;
>  	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]);
>  	unsigned long idx, count = 0;
>  
>  	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
> -	xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL);
> +	xe_rtp_process(&ctx, param->entries, reg_sr);
>  
>  	xa_for_each(&reg_sr->xa, idx, sre) {
>  		if (idx == param->expected_reg.addr)
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 751f6c3bba17..1bf28b1ad319 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -281,6 +281,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
>  	const u8 mocs_read_idx = gt->mocs.uc_index;
>  	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
>  			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
>  	const struct xe_rtp_entry lrc_was[] = {
>  		/*
>  		 * Some blitter commands do not have a field for MOCS, those
> @@ -299,7 +300,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
>  		{}
>  	};
>  
> -	xe_rtp_process(lrc_was, &hwe->reg_lrc, gt, hwe);
> +	xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
>  }
>  
>  static void
> @@ -311,7 +312,8 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
>  	const u8 mocs_read_idx = gt->mocs.uc_index;
>  	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
>  			        REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
> -	const struct xe_rtp_entry engine_was[] = {
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
> +	const struct xe_rtp_entry engine_entries[] = {
>  		/*
>  		 * RING_CMD_CCTL specifies the default MOCS entry that will be
>  		 * used by the command streamer when executing commands that
> @@ -332,7 +334,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
>  		{}
>  	};
>  
> -	xe_rtp_process(engine_was, &hwe->reg_sr, gt, hwe);
> +	xe_rtp_process(&ctx, engine_entries, &hwe->reg_sr);
>  }
>  
>  static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
> diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
> index 7a2bb60ebd85..98f678d74445 100644
> --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
> +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
> @@ -63,7 +63,9 @@ static const struct xe_rtp_entry register_whitelist[] = {
>   */
>  void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
>  {
> -	xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
> +
> +	xe_rtp_process(&ctx, register_whitelist, &hwe->reg_whitelist);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
> index 0c6a23e14a71..648a38bf8f0c 100644
> --- a/drivers/gpu/drm/xe/xe_rtp.c
> +++ b/drivers/gpu/drm/xe/xe_rtp.c
> @@ -23,11 +23,11 @@
>   * the values to the registers that have matching rules.
>   */
>  
> -static bool rule_matches(struct xe_gt *gt,
> +static bool rule_matches(const struct xe_device *xe,
> +			 struct xe_gt *gt,
>  			 struct xe_hw_engine *hwe,
>  			 const struct xe_rtp_entry *entry)
>  {
> -	const struct xe_device *xe = gt_to_xe(gt);
>  	const struct xe_rtp_rule *r;
>  	unsigned int i;
>  	bool match;
> @@ -62,22 +62,30 @@ static bool rule_matches(struct xe_gt *gt,
>  			match = xe->info.step.graphics >= r->step_start &&
>  				xe->info.step.graphics < r->step_end;
>  			break;
> +		case XE_RTP_MATCH_INTEGRATED:
> +			match = !xe->info.is_dgfx;
> +			break;
> +		case XE_RTP_MATCH_DISCRETE:
> +			match = xe->info.is_dgfx;
> +			break;
>  		case XE_RTP_MATCH_ENGINE_CLASS:
> +			if (drm_WARN_ON(&xe->drm, !hwe))
> +				return false;
> +
>  			match = hwe->class == r->engine_class;
>  			break;
>  		case XE_RTP_MATCH_NOT_ENGINE_CLASS:
> +			if (drm_WARN_ON(&xe->drm, !hwe))
> +				return false;
> +
>  			match = hwe->class != r->engine_class;
>  			break;
>  		case XE_RTP_MATCH_FUNC:
> +			if (drm_WARN_ON(&xe->drm, !gt))
> +				return false;
> +
>  			match = r->match_func(gt, hwe);

Would it make more sense to adjust the signature of match_func to take
xe as an additional parameter to allow general matching without a
gt/hwe?

Actually I wonder if we should just split out the addition of
device-only support into its own commit later in the series closer to
where it gets used.

Anyway, not a big deal either way; up to you whether to make any changes
or not.  Either way,

Reviewed-by: Matt Roper <matthew.d.roper at intel.com>

Matt

>  			break;
> -		case XE_RTP_MATCH_INTEGRATED:
> -			match = !xe->info.is_dgfx;
> -			break;
> -		case XE_RTP_MATCH_DISCRETE:
> -			match = xe->info.is_dgfx;
> -			break;
> -
>  		default:
>  			XE_WARN_ON(r->match_type);
>  		}
> @@ -105,14 +113,15 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
>  	xe_reg_sr_add(sr, &sr_entry);
>  }
>  
> -static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
> +static void rtp_process_one(const struct xe_rtp_entry *entry,
> +			    struct xe_device *xe, struct xe_gt *gt,
>  			    struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
>  {
>  	const struct xe_rtp_action *action;
>  	u32 mmio_base;
>  	unsigned int i;
>  
> -	if (!rule_matches(gt, hwe, entry))
> +	if (!rule_matches(xe, gt, hwe, entry))
>  		return;
>  
>  	for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
> @@ -126,23 +135,51 @@ static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
>  	}
>  }
>  
> +static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
> +			    struct xe_hw_engine **hwe,
> +			    struct xe_gt **gt,
> +			    struct xe_device **xe)
> +{
> +	switch (ctx->type) {
> +	case XE_RTP_PROCESS_TYPE_DEVICE:
> +		*hwe = NULL;
> +		*gt = NULL;
> +		*xe = ctx->xe;
> +		break;
> +	case XE_RTP_PROCESS_TYPE_GT:
> +		*hwe = NULL;
> +		*gt = ctx->gt;
> +		*xe = gt_to_xe(*gt);
> +		break;
> +	case XE_RTP_PROCESS_TYPE_ENGINE:
> +		*hwe = ctx->hwe;
> +		*gt = (*hwe)->gt;
> +		*xe = gt_to_xe(*gt);
> +		break;
> +	};
> +}
> +
>  /**
>   * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
> + * @ctx: The context for processing the table, with one of device, gt or hwe
>   * @entries: Table with RTP definitions
>   * @sr: Where to add an entry to with the values for matching. This can be
>   *      viewed as the "coalesced view" of multiple the tables. The bits for each
>   *      register set are expected not to collide with previously added entries
> - * @gt: The GT to be used for matching rules
> - * @hwe: Engine instance to use for matching rules and as mmio base
>   *
>   * Walk the table pointed by @entries (with an empty sentinel) and add all
>   * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
>   * used to calculate the right register offset
>   */
> -void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
> -		    struct xe_gt *gt, struct xe_hw_engine *hwe)
> +void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
> +		    const struct xe_rtp_entry *entries, struct xe_reg_sr *sr)
>  {
>  	const struct xe_rtp_entry *entry;
> +	struct xe_hw_engine *hwe;
> +	struct xe_gt *gt;
> +	struct xe_device *xe;
> +
> +	rtp_get_context(ctx, &hwe, &gt, &xe);
>  
>  	for (entry = entries; entry && entry->name; entry++) {
>  		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
> @@ -150,9 +187,9 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
>  			enum xe_hw_engine_id id;
>  
>  			for_each_hw_engine(each_hwe, gt, id)
> -				rtp_process_one(entry, gt, each_hwe, sr);
> +				rtp_process_one(entry, xe, gt, each_hwe, sr);
>  		} else {
> -			rtp_process_one(entry, gt, hwe, sr);
> +			rtp_process_one(entry, xe, gt, hwe, sr);
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
> index 8bc946694bfc..e7f254fb1b31 100644
> --- a/drivers/gpu/drm/xe/xe_rtp.h
> +++ b/drivers/gpu/drm/xe/xe_rtp.h
> @@ -358,8 +358,14 @@ struct xe_reg_sr;
>  		XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__))	\
>  	}
>  
> -void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
> -		    struct xe_gt *gt, struct xe_hw_engine *hwe);
> +#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__),							\
> +	struct xe_hw_engine *:	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
> +	struct xe_gt *:		(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT },	\
> +	struct xe_device *:	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE })
> +
> +void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
> +		    const struct xe_rtp_entry *entries,
> +		    struct xe_reg_sr *sr);
>  
>  /* Match functions to be used with XE_RTP_MATCH_FUNC */
>  
> diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
> index 12df8a9e9c45..6d9ab9eb91ba 100644
> --- a/drivers/gpu/drm/xe/xe_rtp_types.h
> +++ b/drivers/gpu/drm/xe/xe_rtp_types.h
> @@ -95,4 +95,19 @@ struct xe_rtp_entry {
>  	u8 flags;
>  };
>  
> +enum xe_rtp_process_type {
> +	XE_RTP_PROCESS_TYPE_DEVICE,
> +	XE_RTP_PROCESS_TYPE_GT,
> +	XE_RTP_PROCESS_TYPE_ENGINE,
> +};
> +
> +struct xe_rtp_process_ctx {
> +	union {
> +		struct xe_device *xe;
> +		struct xe_gt *gt;
> +		struct xe_hw_engine *hwe;
> +	};
> +	enum xe_rtp_process_type type;
> +};
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
> index 5fc6a408429b..c2810ede3a65 100644
> --- a/drivers/gpu/drm/xe/xe_tuning.c
> +++ b/drivers/gpu/drm/xe/xe_tuning.c
> @@ -59,7 +59,9 @@ static const struct xe_rtp_entry lrc_tunings[] = {
>  
>  void xe_tuning_process_gt(struct xe_gt *gt)
>  {
> -	xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
> +
> +	xe_rtp_process(&ctx, gt_tunings, &gt->reg_sr);
>  }
>  EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
>  
> @@ -73,5 +75,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
>   */
>  void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
>  {
> -	xe_rtp_process(lrc_tunings, &hwe->reg_lrc, hwe->gt, hwe);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
> +
> +	xe_rtp_process(&ctx, lrc_tunings, &hwe->reg_lrc);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
> index b0bb2f4438f4..4b236b6f4c8e 100644
> --- a/drivers/gpu/drm/xe/xe_wa.c
> +++ b/drivers/gpu/drm/xe/xe_wa.c
> @@ -579,7 +579,9 @@ __diag_pop();
>   */
>  void xe_wa_process_gt(struct xe_gt *gt)
>  {
> -	xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
> +
> +	xe_rtp_process(&ctx, gt_was, &gt->reg_sr);
>  }
>  EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
>  
> @@ -593,7 +595,9 @@ EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
>   */
>  void xe_wa_process_engine(struct xe_hw_engine *hwe)
>  {
> -	xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
> +
> +	xe_rtp_process(&ctx, engine_was, &hwe->reg_sr);
>  }
>  
>  /**
> @@ -606,5 +610,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe)
>   */
>  void xe_wa_process_lrc(struct xe_hw_engine *hwe)
>  {
> -	xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe);
> +	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
> +
> +	xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
>  }
> -- 
> 2.40.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation