[Intel-gfx] [PATCH] drm/i915/gt: Consider multi-gt at all places

Upadhyay, Tejas tejas.upadhyay at intel.com
Mon Apr 17 18:13:40 UTC 2023



> -----Original Message-----
> From: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Sent: Wednesday, April 5, 2023 2:01 PM
> To: Upadhyay, Tejas <tejas.upadhyay at intel.com>; Intel-
> GFX at Lists.FreeDesktop.Org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/gt: Consider multi-gt at all places
> 
> 
> On 05/04/2023 07:56, Upadhyay, Tejas wrote:
> >>> -int igt_live_test_end(struct igt_live_test *t)
> >>> +int igt_live_test_end(struct igt_live_test *t, struct intel_gt *gt)
> >>>    {
> >>> -	struct drm_i915_private *i915 = t->i915;
> >>> +	struct drm_i915_private *i915 = gt->i915;
> >>>    	struct intel_engine_cs *engine;
> >>>    	enum intel_engine_id id;
> >>>
> >>> @@ -57,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t)
> >>>    		return -EIO;
> >>>    	}
> >>>
> >>> -	for_each_engine(engine, to_gt(i915), id) {
> >>> +	for_each_engine(engine, gt, id) {
> >>>    		if (t->reset_engine[id] ==
> >>>    		    i915_reset_engine_count(&i915->gpu_error, engine))
> >>>    			continue;
> >>> diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h
> >>> b/drivers/gpu/drm/i915/selftests/igt_live_test.h
> >>> index 36ed42736c52..209b0548c603 100644
> >>> --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
> >>> +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
> >>> @@ -27,9 +27,9 @@ struct igt_live_test {
> >>>     * e.g. if the GPU was reset.
> >>>     */
> >>>    int igt_live_test_begin(struct igt_live_test *t,
> >>> -			struct drm_i915_private *i915,
> >>> +			struct intel_gt *gt,
> >>>    			const char *func,
> >>>    			const char *name);
> >>> -int igt_live_test_end(struct igt_live_test *t);
> >>> +int igt_live_test_end(struct igt_live_test *t, struct intel_gt
> >>> +*gt);
> >>
> >> Back in the day the plan was that live selftests are device focused
> >> and then we also have intel_gt_live_subtests, which are obviously GT
> >> focused. So in that sense adding a single GT parameter to
> >> igt_live_test_begin isn't something I immediately understand.
> >>
> >> Could you explain in one or two practical examples what is not
> >> working properly and how is this patch fixing it?
> >
> > For example you are running test "live_all_engines(void *arg)",
> >
> > -- Below test begin, will reset counters for primary GT - Tile0 -- err
> > = igt_live_test_begin(&t, to_gt(i915), __func__, "");
> >          if (err)
> >                  goto out_free;
> >
> > --- Now we loop for all engines, note here for MTL vcs, vecs engines are not
> on primary GT or tile 0,
> >       So counters did not reset on test begin does not cover them. ---
> >
> >        In test_begin, below will not reset count for vcs, vecs engines on MTL,
> > 	for_each_engine(engine, gt, id)
> >                  t->reset_engine[id] =
> >                          i915_reset_engine_count(&i915->gpu_error,
> > engine);
> >
> > --- Then below will end test, again for primary GT where above
> > mentioned engines are not there --- err = igt_live_test_end(&t,
> > to_gt(i915));
> >
> > In short to me it looks like igt_live_test for device needs attention when we
> have different engines on different GTs like MTL.
> 
> If you either add for_each_gt to that for_each_engine in igt_live_test_begin
> and igt_live_test_end, or alternatively for_each_uabi_engine (maybe misses
> some internal engines?), everything works? That would be much smaller
> patch and wouldn't falsely associate live tests with a single gt.
> 

Below would work, if you agree I will rearrange and send patch.

--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -16,28 +16,31 @@ int igt_live_test_begin(struct igt_live_test *t,
                        const char *func,
                        const char *name)
 {
-       struct intel_gt *gt = to_gt(i915);
+       struct intel_gt *gt;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
        int err;
+       unsigned int i;

-       t->i915 = i915;
-       t->func = func;
-       t->name = name;
+       for_each_gt(gt, i915, i) {
+               t->i915 = i915;
+               t->func = func;
+               t->name = name;

-       err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
-       if (err) {
-               pr_err("%s(%s): failed to idle before, with err=%d!",
-                      func, name, err);
-               return err;
-       }
+               err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
+               if (err) {
+                       pr_err("%s(%s): failed to idle before, with err=%d!",
+                                       func, name, err);
+                       return err;
+               }

-       t->reset_global = i915_reset_count(&i915->gpu_error);
+               t->reset_global = i915_reset_count(&i915->gpu_error);

-       for_each_engine(engine, gt, id)
-               t->reset_engine[id] =
+               for_each_engine(engine, gt, id)
+                       t->reset_engine[id] =
                        i915_reset_engine_count(&i915->gpu_error, engine);

+       }
        return 0;
 }

@@ -46,6 +49,7 @@ int igt_live_test_end(struct igt_live_test *t)
        struct drm_i915_private *i915 = t->i915;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
+       unsigned int i;

        if (igt_flush_test(i915))
                return -EIO;
@@ -57,17 +61,18 @@ int igt_live_test_end(struct igt_live_test *t)
                return -EIO;
        }

-       for_each_engine(engine, to_gt(i915), id) {
-               if (t->reset_engine[id] ==
-                   i915_reset_engine_count(&i915->gpu_error, engine))
-                       continue;
+       for_each_gt(gt, i915, i) {
+               for_each_engine(engine, gt, id) {
+                       if (t->reset_engine[id] ==
+                                       i915_reset_engine_count(&i915->gpu_error, engine))
+                               continue;

-               pr_err("%s(%s): engine '%s' was reset %d times!\n",
-                      t->func, t->name, engine->name,
-                      i915_reset_engine_count(&i915->gpu_error, engine) -
-                      t->reset_engine[id]);
-               return -EIO;
+                       pr_err("%s(%s): engine '%s' was reset %d times!\n",
+                                       t->func, t->name, engine->name,
+                                       i915_reset_engine_count(&i915->gpu_error, engine) -
+                                       t->reset_engine[id]);
+                       return -EIO;
+               }
        }
     
Regards,
Tejas                             
> Regards,
> 
> Tvrtko


More information about the Intel-gfx mailing list