[Intel-gfx] drm/i915: Watchdog timeout: Blindly trust watchdog timeout for reset?

kbuild test robot lkp at intel.com
Sat Jan 5 13:32:19 UTC 2019


Hi Michel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on v4.20 next-20190103]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Carlos-Santa/drm-i915-Watchdog-timeout-Blindly-trust-watchdog-timeout-for-reset/20190105-111445
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-s4-01052002 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/asm-generic/bug.h:5:0,
                    from arch/x86/include/asm/bug.h:47,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/gfp.h:5,
                    from include/linux/slab.h:15,
                    from include/linux/io-mapping.h:22,
                    from drivers/gpu/drm/i915/i915_drv.h:36,
                    from drivers/gpu/drm/i915/intel_hangcheck.c:25:
   drivers/gpu/drm/i915/intel_hangcheck.c: In function 'i915_hangcheck_elapsed':
   drivers/gpu/drm/i915/intel_hangcheck.c:443:24: error: 'struct intel_engine_hangcheck' has no member named 'watchdog'
          engine->hangcheck.watchdog == intel_engine_get_seqno(engine)) {
                           ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> drivers/gpu/drm/i915/intel_hangcheck.c:442:3: note: in expansion of macro 'if'
      if (engine->hangcheck.stalled ||
      ^~
   drivers/gpu/drm/i915/intel_hangcheck.c:443:24: error: 'struct intel_engine_hangcheck' has no member named 'watchdog'
          engine->hangcheck.watchdog == intel_engine_get_seqno(engine)) {
                           ^
   include/linux/compiler.h:58:42: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                             ^~~~
>> drivers/gpu/drm/i915/intel_hangcheck.c:442:3: note: in expansion of macro 'if'
      if (engine->hangcheck.stalled ||
      ^~
   drivers/gpu/drm/i915/intel_hangcheck.c:443:24: error: 'struct intel_engine_hangcheck' has no member named 'watchdog'
          engine->hangcheck.watchdog == intel_engine_get_seqno(engine)) {
                           ^
   include/linux/compiler.h:69:16: note: in definition of macro '__trace_if'
      ______r = !!(cond);     \
                   ^~~~
>> drivers/gpu/drm/i915/intel_hangcheck.c:442:3: note: in expansion of macro 'if'
      if (engine->hangcheck.stalled ||
      ^~

vim +/if +442 drivers/gpu/drm/i915/intel_hangcheck.c

   400	
   401	/*
   402	 * This is called when the chip hasn't reported back with completed
   403	 * batchbuffers in a long time. We keep track per ring seqno progress and
   404	 * if there are no progress, hangcheck score for that ring is increased.
   405	 * Further, acthd is inspected to see if the ring is stuck. On stuck case
   406	 * we kick the ring. If we see no progress on three subsequent calls
   407	 * we assume chip is wedged and try to fix it by resetting the chip.
   408	 */
   409	static void i915_hangcheck_elapsed(struct work_struct *work)
   410	{
   411		struct drm_i915_private *dev_priv =
   412			container_of(work, typeof(*dev_priv),
   413				     gpu_error.hangcheck_work.work);
   414		struct intel_engine_cs *engine;
   415		enum intel_engine_id id;
   416		unsigned int hung = 0, stuck = 0, wedged = 0;
   417	
   418		if (!i915_modparams.enable_hangcheck)
   419			return;
   420	
   421		if (!READ_ONCE(dev_priv->gt.awake))
   422			return;
   423	
   424		if (i915_terminally_wedged(&dev_priv->gpu_error))
   425			return;
   426	
   427		/* As enabling the GPU requires fairly extensive mmio access,
   428		 * periodically arm the mmio checker to see if we are triggering
   429		 * any invalid access.
   430		 */
   431		intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
   432	
   433		for_each_engine(engine, dev_priv, id) {
   434			struct intel_engine_hangcheck hc;
   435	
   436			semaphore_clear_deadlocks(dev_priv);
   437	
   438			hangcheck_load_sample(engine, &hc);
   439			hangcheck_accumulate_sample(engine, &hc);
   440			hangcheck_store_sample(engine, &hc);
   441	
 > 442			if (engine->hangcheck.stalled ||
   443			    engine->hangcheck.watchdog == intel_engine_get_seqno(engine)) {
   444				hung |= intel_engine_flag(engine);
   445				if (hc.action != ENGINE_DEAD)
   446					stuck |= intel_engine_flag(engine);
   447			}
   448	
   449			if (engine->hangcheck.wedged)
   450				wedged |= intel_engine_flag(engine);
   451		}
   452	
   453		if (wedged) {
   454			dev_err(dev_priv->drm.dev,
   455				"GPU recovery timed out,"
   456				" cancelling all in-flight rendering.\n");
   457			GEM_TRACE_DUMP();
   458			i915_gem_set_wedged(dev_priv);
   459		}
   460	
   461		if (hung)
   462			hangcheck_declare_hang(dev_priv, hung, stuck);
   463	
   464		/* Reset timer in case GPU hangs without another request being added */
   465		i915_queue_hangcheck(dev_priv);
   466	}
   467	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
-------------- next part --------------
A non-text attachment was scrubbed...
Name: .config.gz
Type: application/gzip
Size: 29663 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20190105/8f07b13f/attachment-0001.gz>


More information about the Intel-gfx mailing list