[PATCH] drm/xe: Add xe mocs suspend resume kunit

Matt Roper matthew.d.roper at intel.com
Thu Jan 25 23:23:28 UTC 2024


On Thu, Jan 25, 2024 at 04:46:37PM +0530, Janga Rahul Kumar wrote:
> This kunit verifies the mocs register content with
> the KMD programmed values before and after suspend resume.
> Add helper function to check mocs values and use it.

Before we add more tests here, I think we need to fix
https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1166 otherwise
we'll just be adding more failing tests to the test suite.  Basically
check_mocs_table() needs to be updated to not try to confirm the final
table entry on platforms where table entry 63 is readonly and can't be
updated by the driver.  It should be a simple fix, so you can provide
that as the first patch of a series, and then add this new test as the
second patch.

More comments below.

> 
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Badal Nilawar <badal.nilawar at intel.com>
> Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar at intel.com>
> ---
>  drivers/gpu/drm/xe/tests/xe_mocs.c      | 53 +++++++++++++++++++------
>  drivers/gpu/drm/xe/tests/xe_mocs_test.c |  1 +
>  drivers/gpu/drm/xe/tests/xe_mocs_test.h |  1 +
>  3 files changed, 43 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
> index df5c36b70ab4..a36f804e5ce0 100644
> --- a/drivers/gpu/drm/xe/tests/xe_mocs.c
> +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
> @@ -103,6 +103,14 @@ static void read_mocs_table(struct xe_gt *gt,
>  	xe_device_mem_access_put(gt_to_xe(gt));
>  }
>  
> +static void check_mocs_table(struct xe_gt *gt, struct live_mocs *mocs, unsigned int flags)
> +{
> +	if (flags & HAS_GLOBAL_MOCS)
> +		read_mocs_table(gt, &(mocs->table));
> +	if (flags & HAS_LNCF_MOCS)
> +		read_l3cc_table(gt, &(mocs->table));

You don't need the extra parentheses on these.

> +}
> +
>  static int mocs_kernel_test_run_device(struct xe_device *xe)
>  {
>  	/* Basic check the system is configured with the expected mocs table */
> @@ -115,10 +123,7 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
>  
>  	for_each_gt(gt, xe, id) {
>  		flags = live_mocs_init(&mocs, gt);
> -		if (flags & HAS_GLOBAL_MOCS)
> -			read_mocs_table(gt, &mocs.table);
> -		if (flags & HAS_LNCF_MOCS)
> -			read_l3cc_table(gt, &mocs.table);
> +		check_mocs_table(gt, &mocs, flags);
>  	}
>  	return 0;
>  }
> @@ -142,19 +147,13 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
>  	for_each_gt(gt, xe, id) {
>  		flags = live_mocs_init(&mocs, gt);
>  		kunit_info(test, "mocs_reset_test before reset\n");
> -		if (flags & HAS_GLOBAL_MOCS)
> -			read_mocs_table(gt, &mocs.table);
> -		if (flags & HAS_LNCF_MOCS)
> -			read_l3cc_table(gt, &mocs.table);
> +		check_mocs_table(gt, &mocs, flags);
>  
>  		xe_gt_reset_async(gt);
>  		flush_work(&gt->reset.worker);
>  
>  		kunit_info(test, "mocs_reset_test after reset\n");
> -		if (flags & HAS_GLOBAL_MOCS)
> -			read_mocs_table(gt, &mocs.table);
> -		if (flags & HAS_LNCF_MOCS)
> -			read_l3cc_table(gt, &mocs.table);
> +		check_mocs_table(gt, &mocs, flags);
>  	}
>  	return 0;
>  }
> @@ -164,3 +163,33 @@ void xe_live_mocs_reset_kunit(struct kunit *test)
>  	xe_call_for_each_device(mocs_reset_test_run_device);
>  }
>  EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit);
> +
> +static int mocs_suspend_resume_test_run_device(struct xe_device *xe)
> +{
> +	/* Check the mocs setup is retained over suspend resume */
> +
> +	struct live_mocs mocs;
> +	struct xe_gt *gt;
> +	unsigned int flags;
> +	int id;
> +	struct kunit *test = xe_cur_kunit();
> +
> +	for_each_gt(gt, xe, id) {
> +		flags = live_mocs_init(&mocs, gt);
> +		kunit_info(test, "mocs_suspend_resume_test before suspend resume\n");
> +		check_mocs_table(gt, &mocs, flags);
> +
> +		xe_gt_suspend(gt);
> +		xe_gt_resume(gt);

Hmm, usually our suspend-based tests do a true suspend (e.g., entering
S3 or S0ix or whatever) to make sure nothing important gets lost during
a platform-level suspend.  Here we're just calling the functions that
are invoked to prepare for a suspend and recover after a resume, without
actually doing the actual suspend/resume itself.  So this isn't really
testing the same thing, although it probably could still catch certain
types of driver implementation bugs.

Part of the problem with doing this testing in kunit rather than a
userspace IGT test is that it isn't really possible to do a true a true
suspend/resume in a kunit test the same way it is from a userspace IGT.

I'm not really sure what the best path forward is.  We could keep these
tests in kunit (and probably rename this new one slightly to make it
clear that it isn't a "real" suspend test) and just live with not having
full coverage.  Or we could rewrite these as IGT tests instead, assisted
by some debugfs entries.  E.g., if we added something like expected_mocs
and current_mocs entries under debugfs (one which dumps the software
MOCS table, one which dumps the current values in the registers), then
the various MOCS kunit tests could be re-written in IGT by doing
something like

   general mocs test:
      read expected_mocs
      read current_mocs
      assert(expected matches current)

   engine reset mocs test:
      read current_mocs
      trigger an engine reset by submitting a spinner
      read current_mocs
      assert(first read matches second)

   gt reset mocs test:
      read current_mocs
      trigger an full GT reset via debugfs
      read current_mocs
      assert(first read matches second)

   suspend mocs test:
      read current_mocs
      trigger a suspend/resume cycle
      read current_mocs
      assert(first read matches second)

While we're at it, it would probably be good to have all the same tests
for PAT as well.


Matt

> +
> +		kunit_info(test, "mocs_suspend_resume_test after suspend resume\n");
> +		check_mocs_table(gt, &mocs, flags);
> +	}
> +	return 0;
> +}
> +
> +void xe_live_mocs_suspend_resume_kunit(struct kunit *test)
> +{
> +	xe_call_for_each_device(mocs_suspend_resume_test_run_device);
> +}
> +EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_suspend_resume_kunit);
> diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
> index 4f62e7a4270b..0d813472cdd3 100644
> --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
> +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
> @@ -10,6 +10,7 @@
>  static struct kunit_case xe_mocs_tests[] = {
>  	KUNIT_CASE(xe_live_mocs_kernel_kunit),
>  	KUNIT_CASE(xe_live_mocs_reset_kunit),
> +	KUNIT_CASE(xe_live_mocs_suspend_resume_kunit),
>  	{}
>  };
>  
> diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
> index e7699d495411..117164cc0df7 100644
> --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h
> +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
> @@ -10,5 +10,6 @@ struct kunit;
>  
>  void xe_live_mocs_kernel_kunit(struct kunit *test);
>  void xe_live_mocs_reset_kunit(struct kunit *test);
> +void xe_live_mocs_suspend_resume_kunit(struct kunit *test);
>  
>  #endif
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list