[PATCH i-g-t v2 1/2] lib/xe: Add sync and async xe_force_gt_reset options

Cavitt, Jonathan jonathan.cavitt at intel.com
Wed Jun 5 18:05:45 UTC 2024


-----Original Message-----
From: Brost, Matthew <matthew.brost at intel.com> 
Sent: Wednesday, June 5, 2024 10:05 AM
To: Cavitt, Jonathan <jonathan.cavitt at intel.com>
Cc: igt-dev at lists.freedesktop.org; Gupta, saurabhg <saurabhg.gupta at intel.com>; Harrison, John C <john.c.harrison at intel.com>; Summers, Stuart <stuart.summers at intel.com>
Subject: Re: [PATCH i-g-t v2 1/2] lib/xe: Add sync and async xe_force_gt_reset options
> 
> On Wed, Jun 05, 2024 at 09:33:43AM -0700, Jonathan Cavitt wrote:
> > Add a new xe_force_gt_reset function, xe_force_gt_reset_sync, renaming
> > the original xe_force_gt_reset to xe_force_gt_reset_async.  This allows
> > the user to decide whether or not they want to initiate a synchronous or
> > asynchronous gt reset.  The asynchronous reset function was otherwise
> > unchanged, but the synchronous reset function operates by calling a new
> > debugfs function.
> > 
> > For now, default to using the asynchronous version for all current use
> > cases.
> > 
> 
> Best practices are to add a change log to patches. So here is would be
> something like:
> 
> v2:
>  - Add *async and *sync versions of xe_force_gt_reset (Matthew Brost)

I was under the impression that this was unnecessary because this is pretty much
an entirely different patch from what was written for v1, but I'll add the missing
revision markers for v4.
-Jonathan Cavitt

> 
> > Suggested-by: Matthew Brost <matthew.brost at intel.com>
> > Signed-off-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
> > CC: John Harrison <john.c.harrison at intel.com>
> > CC: Stuart Summers <stuart.summers at intel.com>
> > ---
> >  lib/xe/xe_gt.c              |  2 +-
> >  lib/xe/xe_ioctl.c           | 15 ++++++++++++++-
> >  lib/xe/xe_ioctl.h           |  3 ++-
> >  tests/intel/xe_exec_reset.c |  8 ++++----
> >  tests/intel/xe_gt_freq.c    |  2 +-
> >  tests/intel/xe_wedged.c     |  2 +-
> >  6 files changed, 23 insertions(+), 9 deletions(-)
> > 
> > diff --git a/lib/xe/xe_gt.c b/lib/xe/xe_gt.c
> > index 743d7a26ec..36e8fde363 100644
> > --- a/lib/xe/xe_gt.c
> > +++ b/lib/xe/xe_gt.c
> > @@ -69,7 +69,7 @@ void xe_force_gt_reset_all(int xe_fd)
> >  	int gt;
> >  
> >  	xe_for_each_gt(xe_fd, gt)
> > -		xe_force_gt_reset(xe_fd, gt);
> > +		xe_force_gt_reset_async(xe_fd, gt);
> >  }
> >  
> >  /**
> > diff --git a/lib/xe/xe_ioctl.c b/lib/xe/xe_ioctl.c
> > index 934c877ebc..6e3234cd3d 100644
> > --- a/lib/xe/xe_ioctl.c
> > +++ b/lib/xe/xe_ioctl.c
> > @@ -529,7 +529,7 @@ int64_t xe_wait_ufence(int fd, uint64_t *addr, uint64_t value,
> >  	return timeout;
> >  }
> >  
> > -void xe_force_gt_reset(int fd, int gt)
> > +void xe_force_gt_reset_async(int fd, int gt)
> >  {
> >  	char reset_string[128];
> >  	struct stat st;
> > @@ -541,3 +541,16 @@ void xe_force_gt_reset(int fd, int gt)
> >  		 minor(st.st_rdev), gt);
> >  	system(reset_string);
> >  }
> > +
> > +void xe_force_gt_reset_sync(int fd, int gt)
> > +{
> > +	char reset_string[128];
> > +	struct stat st;
> > +
> > +	igt_assert_eq(fstat(fd, &st), 0);
> > +
> > +	snprintf(reset_string, sizeof(reset_string),
> > +		 "cat /sys/kernel/debug/dri/%d/gt%d/force_reset_sync"
> > +		 minor(st.st_rdev), gt);
> 
> Again another nit, we can have an internal (static) function which
> accepts the 'bool sync' argument. The xe_force_gt_reset_sync and
> xe_force_gt_reset_async would call with correct argument. This gives use
> a single implmentation but hides 'bool sync' argument from the IGT
> layers of the code. Make sense?
> 
> Matt
> 
> > +	system(reset_string);
> > +}
> > diff --git a/lib/xe/xe_ioctl.h b/lib/xe/xe_ioctl.h
> > index 4d08402e0b..b27c0053f0 100644
> > --- a/lib/xe/xe_ioctl.h
> > +++ b/lib/xe/xe_ioctl.h
> > @@ -91,6 +91,7 @@ int __xe_wait_ufence(int fd, uint64_t *addr, uint64_t value,
> >  		     uint32_t exec_queue, int64_t *timeout);
> >  int64_t xe_wait_ufence(int fd, uint64_t *addr, uint64_t value,
> >  		       uint32_t exec_queue, int64_t timeout);
> > -void xe_force_gt_reset(int fd, int gt);
> > +void xe_force_gt_reset_async(int fd, int gt);
> > +void xe_force_gt_reset_sync(int fd, int gt);
> >  
> >  #endif /* XE_IOCTL_H */
> > diff --git a/tests/intel/xe_exec_reset.c b/tests/intel/xe_exec_reset.c
> > index e47c3730dd..05d63c0ba5 100644
> > --- a/tests/intel/xe_exec_reset.c
> > +++ b/tests/intel/xe_exec_reset.c
> > @@ -239,7 +239,7 @@ test_balancer(int fd, int gt, int class, int n_exec_queues, int n_execs,
> >  	}
> >  
> >  	if (flags & GT_RESET)
> > -		xe_force_gt_reset(fd, gt);
> > +		xe_force_gt_reset_async(fd, gt);
> >  
> >  	if (flags & CLOSE_FD) {
> >  		if (flags & CLOSE_EXEC_QUEUES) {
> > @@ -383,7 +383,7 @@ test_legacy_mode(int fd, struct drm_xe_engine_class_instance *eci,
> >  	}
> >  
> >  	if (flags & GT_RESET)
> > -		xe_force_gt_reset(fd, eci->gt_id);
> > +		xe_force_gt_reset_async(fd, eci->gt_id);
> >  
> >  	if (flags & CLOSE_FD) {
> >  		if (flags & CLOSE_EXEC_QUEUES) {
> > @@ -530,7 +530,7 @@ test_compute_mode(int fd, struct drm_xe_engine_class_instance *eci,
> >  	}
> >  
> >  	if (flags & GT_RESET)
> > -		xe_force_gt_reset(fd, eci->gt_id);
> > +		xe_force_gt_reset_async(fd, eci->gt_id);
> >  
> >  	if (flags & CLOSE_FD) {
> >  		if (flags & CLOSE_EXEC_QUEUES) {
> > @@ -590,7 +590,7 @@ static void do_resets(struct gt_thread_data *t)
> >  	while (!*(t->exit)) {
> >  		usleep(250000);	/* 250 ms */
> >  		(*t->num_reset)++;
> > -		xe_force_gt_reset(t->fd, t->gt);
> > +		xe_force_gt_reset_async(t->fd, t->gt);
> >  	}
> >  }
> >  
> > diff --git a/tests/intel/xe_gt_freq.c b/tests/intel/xe_gt_freq.c
> > index ff99b46a08..d2e4d1a09c 100644
> > --- a/tests/intel/xe_gt_freq.c
> > +++ b/tests/intel/xe_gt_freq.c
> > @@ -324,7 +324,7 @@ static void test_reset(int fd, int gt_id, int cycles)
> >  		igt_assert_f(get_freq(fd, gt_id, "cur") == rpn,
> >  			     "Failed after %d good cycles\n", i);
> >  
> > -		xe_force_gt_reset(fd, gt_id);
> > +		xe_force_gt_reset_async(fd, gt_id);
> >  
> >  		igt_assert_f(get_freq(fd, gt_id, "min") == rpn,
> >  			     "Failed after %d good cycles\n", i);
> > diff --git a/tests/intel/xe_wedged.c b/tests/intel/xe_wedged.c
> > index aa4a452bfc..a4fc53869e 100644
> > --- a/tests/intel/xe_wedged.c
> > +++ b/tests/intel/xe_wedged.c
> > @@ -31,7 +31,7 @@ static void force_wedged(int fd)
> >  	igt_debugfs_write(fd, "fail_gt_reset/probability", "100");
> >  	igt_debugfs_write(fd, "fail_gt_reset/times", "2");
> >  
> > -	xe_force_gt_reset(fd, 0);
> > +	xe_force_gt_reset_async(fd, 0);
> >  	sleep(1);
> >  }
> >  
> > -- 
> > 2.25.1
> > 
> 


More information about the igt-dev mailing list