[Intel-gfx] [RFC 2/7] drm/i915/guc: Update GuC ADS size for error capture lists

Teres Alexis, Alan Previn alan.previn.teres.alexis at intel.com
Wed Nov 24 17:34:31 UTC 2021


Thanks Michal for the thorough review of the code (and the other patches). I will fix them all.

On the register-to-string helper function,
i'll have to think it through because i do want to keep future development
maintenance work when adding new registers simple (in the sense that
adding a single line into the table will be all thats needed).

Unless you are suggesting keeping a global i915-wide list somewhere?
which might be a bit of an overhead when searching through an offset list
to find the mmio being requested for string return - unless i keep a sorted tree
initialized with registers ordered by address, but would not work well for
different registers that share addresses on diff gen's).


...alan


On Tue, 2021-11-23 at 22:46 +0100, Michal Wajdeczko wrote:
> Hi,
> 
> just few random nits below
> 
> -Michal
> 
> 
> On 23.11.2021 00:03, Alan Previn wrote:
> > Update GuC ADS size allocation to include space for
> > the lists of error state capture register descriptors.
> > 
> > Also, populate the lists of registers we want GuC to report back to
> > Host on engine reset events. This list should include global,
> > engine-class and engine-instance registers for every engine-class
> > type on the current hardware.
> > 
> > NOTE: Start with a fake table of register lists to layout the
> > framework before adding real registers in subsequent patch.
> > 
> > Signed-off-by: Alan Previn <alan.previn.teres.alexis at intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile                 |   1 +
> >  drivers/gpu/drm/i915/gt/uc/intel_guc.c        |  10 +-
> >  drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   5 +
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    | 176 ++++++++++++-
> >  .../gpu/drm/i915/gt/uc/intel_guc_capture.c    | 232 ++++++++++++++++++
> >  .../gpu/drm/i915/gt/uc/intel_guc_capture.h    |  47 ++++
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  19 +-
> >  7 files changed, 476 insertions(+), 14 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> >  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 074d6b8edd23..e3c4d5cea4c3 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -190,6 +190,7 @@ i915-y += gt/uc/intel_uc.o \
> >  	  gt/uc/intel_guc_rc.o \
> >  	  gt/uc/intel_guc_slpc.o \
> >  	  gt/uc/intel_guc_submission.o \
> > +	  gt/uc/intel_guc_capture.o \
> 
> use alphabetical order
> 
> >  	  gt/uc/intel_huc.o \
> >  	  gt/uc/intel_huc_debugfs.o \
> >  	  gt/uc/intel_huc_fw.o
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > index 5cf9ebd2ee55..458f0d248a5a 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > @@ -335,9 +335,14 @@ int intel_guc_init(struct intel_guc *guc)
> >  	if (ret)
> >  		goto err_fw;
> >  
> > -	ret = intel_guc_ads_create(guc);
> > +	ret = intel_guc_capture_init(guc);
> >  	if (ret)
> >  		goto err_log;
> > +
> > +	ret = intel_guc_ads_create(guc);
> > +	if (ret)
> > +		goto err_capture;
> > +
> >  	GEM_BUG_ON(!guc->ads_vma);
> >  
> >  	ret = intel_guc_ct_init(&guc->ct);
> > @@ -376,6 +381,8 @@ int intel_guc_init(struct intel_guc *guc)
> >  	intel_guc_ct_fini(&guc->ct);
> >  err_ads:
> >  	intel_guc_ads_destroy(guc);
> > +err_capture:
> > +	intel_guc_capture_destroy(guc);
> >  err_log:
> >  	intel_guc_log_destroy(&guc->log);
> >  err_fw:
> > @@ -403,6 +410,7 @@ void intel_guc_fini(struct intel_guc *guc)
> >  	intel_guc_ct_fini(&guc->ct);
> >  
> >  	intel_guc_ads_destroy(guc);
> > +	intel_guc_capture_destroy(guc);
> >  	intel_guc_log_destroy(&guc->log);
> >  	intel_uc_fw_fini(&guc->fw);
> >  }
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index 9de99772f916..d136c69abe12 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -16,6 +16,7 @@
> >  #include "intel_guc_log.h"
> >  #include "intel_guc_reg.h"
> >  #include "intel_guc_slpc_types.h"
> > +#include "intel_guc_capture.h"
> 
> use alphabetical order
> 
> >  #include "intel_uc_fw.h"
> >  #include "i915_utils.h"
> >  #include "i915_vma.h"
> > @@ -37,6 +38,8 @@ struct intel_guc {
> >  	struct intel_guc_ct ct;
> >  	/** @slpc: sub-structure containing SLPC related data and objects */
> >  	struct intel_guc_slpc slpc;
> > +	/** @capture: the error-state-capture module's data and objects */
> > +	struct intel_guc_state_capture capture;
> >  
> >  	/** @sched_engine: Global engine used to submit requests to GuC */
> >  	struct i915_sched_engine *sched_engine;
> > @@ -138,6 +141,8 @@ struct intel_guc {
> >  	u32 ads_regset_size;
> >  	/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
> >  	u32 ads_golden_ctxt_size;
> > +	/** @ads_capture_size: size of register lists in the ADS used for error capture */
> > +	u32 ads_capture_size;
> >  	/** @ads_engine_usage_size: size of engine usage in the ADS */
> >  	u32 ads_engine_usage_size;
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > index 6c81ddd303d3..2780c0fadd01 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > @@ -10,6 +10,7 @@
> >  #include "gt/shmem_utils.h"
> >  #include "intel_guc_ads.h"
> >  #include "intel_guc_fwif.h"
> > +#include "intel_guc_capture.h"
> 
> wrong order
> 
> >  #include "intel_uc.h"
> >  #include "i915_drv.h"
> >  
> > @@ -71,8 +72,7 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
> >  
> >  static u32 guc_ads_capture_size(struct intel_guc *guc)
> >  {
> > -	/* Basic support to init ADS without a proper GuC error capture list */
> > -	return PAGE_ALIGN(PAGE_SIZE);
> > +	return PAGE_ALIGN(guc->ads_capture_size);
> >  }
> >  
> >  static u32 guc_ads_private_data_size(struct intel_guc *guc)
> > @@ -519,24 +519,170 @@ static void guc_init_golden_context(struct intel_guc *guc)
> >  	GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
> >  }
> >  
> > -static void guc_capture_prep_lists(struct intel_guc *guc, struct __guc_ads_blob *blob)
> > +static int
> > +guc_fill_reglist(struct intel_guc *guc, struct __guc_ads_blob *blob, int vf, bool enabled,
> > +		 int classid, int type, char *typename, u16 *p_numregs, int newnum, u8 **p_virt_ptr,
> > +		 u32 *p_blobptr_to_ggtt, u32 *p_ggtt, u32 null_ggtt)
> 
> hmm, this does not look good - do we really need all these params ?
> 
> >  {
> > -	int i, j;
> > -	u32 addr_ggtt, offset;
> > +	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> > +	struct guc_debug_capture_list *listnode;
> > +	int size = 0;
> >  
> > -	offset = guc_ads_capture_offset(guc);
> > -	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
> > +	if (blob && *p_numregs != newnum) {
> > +		if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
> > +			drm_warn(&i915->drm, "Guc-Cap VF%d-%s num-reg mismatch was=%d now=%d!\n",
> > +				 vf, typename, *p_numregs, newnum);
> > +		else
> > +			drm_warn(&i915->drm, "Guc-Cap VF%d-Class-%d-%s num-reg mismatch was=%d now=%d!\n",
> > +				 vf, classid, typename, *p_numregs, newnum);
> > +	}
> > +	/*
> > +	 * For enabled capture lists, we not only need to call capture module to help
> > +	 * populate the list-descriptor into the correct ads capture structures, but
> > +	 * we also need to increment the virtual pointers and ggtt offsets so that
> > +	 * caller has the subsequent gfx memory location.
> > +	 */
> > +	*p_numregs = newnum;
> > +	size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
> > +			  (newnum * sizeof(struct guc_mmio_reg)));
> > +	/* if caller hasn't allocated ADS blob, return size and counts, we're done */
> > +	if (!blob)
> > +		return size;
> > +	if (blob) {
> 
> redundant
> 
> > +		/* if caller allocated ADS blob, populate the capture register descriptors */
> > +		if (!newnum) {
> > +			*p_blobptr_to_ggtt = null_ggtt;
> > +		} else {
> > +			/* get ptr and populate header info: */
> > +			*p_blobptr_to_ggtt = *p_ggtt;
> > +			listnode = (struct guc_debug_capture_list *)*p_virt_ptr;
> > +			*p_ggtt += sizeof(struct guc_debug_capture_list);
> > +			*p_virt_ptr += sizeof(struct guc_debug_capture_list);
> > +			listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, *p_numregs);
> > +
> > +			/* get ptr and populate register descriptor list: */
> > +			intel_guc_capture_list_init(guc, vf, type, classid,
> > +						    (struct guc_mmio_reg *)*p_virt_ptr,
> > +						    *p_numregs);
> > +
> > +			/* increment ptrs for that header: */
> > +			*p_ggtt += size - sizeof(struct guc_debug_capture_list);
> > +			*p_virt_ptr += size - sizeof(struct guc_debug_capture_list);
> > +		}
> > +	}
> > +
> > +	return size;
> > +}
> > +
> > +static int guc_capture_prep_lists(struct intel_guc *guc, struct __guc_ads_blob *blob)
> > +{
> > +	struct intel_gt *gt = guc_to_gt(guc);
> > +	int i, j, size;
> > +	u32 ggtt, null_ggtt, offset, alloc_size = 0;
> > +	struct guc_gt_system_info *info, local_info;
> > +	struct guc_debug_capture_list *listnode;
> > +	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> > +	struct intel_guc_state_capture *gc = &guc->capture;
> > +	u16 tmp = 0;
> > +	u8 *ptr = NULL;
> > +
> > +	if (blob) {
> > +		offset = guc_ads_capture_offset(guc);
> > +		ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
> > +		ptr = ((u8 *)blob) + offset;
> > +		info = &blob->system_info;
> > +	} else {
> > +		memset(&local_info, 0, sizeof(local_info));
> > +		info = &local_info;
> > +		fill_engine_enable_masks(gt, info);
> > +	}
> > +
> > +	/* first, set aside the first page for a capture_list with zero descriptors */
> > +	alloc_size = PAGE_SIZE;
> > +	if (blob) {
> > +		listnode = (struct guc_debug_capture_list *)ptr;
> > +		listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, 0);
> > +		null_ggtt = ggtt;
> > +		ggtt += PAGE_SIZE;
> > +		ptr +=  PAGE_SIZE;
> > +	}
> >  
> > -	/* FIXME: Populate a proper capture list */
> > +#define COUNT_REGS intel_guc_capture_list_count
> > +#define FILL_REGS guc_fill_reglist
> > +#define TYPE_CLASS GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS
> > +#define TYPE_INSTANCE GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE
> >  
> >  	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
> >  		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
> > -			blob->ads.capture_instance[i][j] = addr_ggtt;
> > -			blob->ads.capture_class[i][j] = addr_ggtt;
> > +			if (!info->engine_enabled_masks[j]) {
> > +				if (gc->num_class_regs[i][j])
> > +					drm_warn(&i915->drm, "GuC-Cap VF%d-class-%d "
> > +						 "class regs valid mismatch was=%d now=%d!\n",
> > +						 i, j, gc->num_class_regs[i][j], tmp);
> > +				if (gc->num_instance_regs[i][j])
> > +					drm_warn(&i915->drm, "GuC-Cap VF%d-class-%d "
> > +						 "inst regs valid mismatch was=%d now=%d!\n",
> > +						 i, j, gc->num_instance_regs[i][j], tmp);
> > +				gc->num_class_regs[i][j] = 0;
> > +				gc->num_instance_regs[i][j] = 0;
> > +				if (blob) {
> > +					blob->ads.capture_class[i][j] = null_ggtt;
> > +					blob->ads.capture_instance[i][j] = null_ggtt;
> > +				}
> > +			} else {
> > +				if (!COUNT_REGS(guc, i, TYPE_CLASS,
> > +						guc_class_to_engine_class(j), &tmp)) {
> > +					size = FILL_REGS(guc, blob, i, true, j, TYPE_CLASS,
> > +							 "class", &gc->num_class_regs[i][j],
> > +							 tmp, &ptr,
> > +							 &blob->ads.capture_class[i][j],
> > +							 &ggtt, null_ggtt);
> > +					gc->class_list_size += size;
> > +					alloc_size += size;
> > +				} else {
> > +					gc->num_class_regs[i][j] = 0;
> > +					if (blob)
> > +						blob->ads.capture_class[i][j] = null_ggtt;
> > +				}
> > +				if (!COUNT_REGS(guc, i, TYPE_INSTANCE,
> > +						guc_class_to_engine_class(j), &tmp)) {
> > +					size = FILL_REGS(guc, blob, i, true, j, TYPE_INSTANCE,
> > +							 "instance", &gc->num_instance_regs[i][j],
> > +							 tmp, &ptr,
> > +							 &blob->ads.capture_instance[i][j],
> > +							 &ggtt, null_ggtt);
> > +					gc->instance_list_size += size;
> > +					alloc_size += size;
> > +				} else {
> > +					gc->num_instance_regs[i][j] = 0;
> > +					if (blob)
> > +						blob->ads.capture_instance[i][j] = null_ggtt;
> > +				}
> > +			}
> > +		}
> > +		if (!COUNT_REGS(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) {
> > +			size = FILL_REGS(guc, blob, i, true, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL,
> > +					 "global", &gc->num_global_regs[i], tmp, &ptr,
> > +					 &blob->ads.capture_global[i], &ggtt, null_ggtt);
> > +			gc->global_list_size += size;
> > +			alloc_size += size;
> > +		} else {
> > +			gc->num_global_regs[i] = 0;
> > +			if (blob)
> > +				blob->ads.capture_global[i] = null_ggtt;
> >  		}
> > -
> > -		blob->ads.capture_global[i] = addr_ggtt;
> >  	}
> > +
> > +#undef COUNT_REGS
> > +#undef FILL_REGS
> > +#undef TYPE_CLASS
> > +#undef TYPE_INSTANCE
> > +
> > +	if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(alloc_size))
> > +		drm_warn(&i915->drm, "GuC->ADS->Capture alloc size changed from %d to %d\n",
> > +			 guc->ads_capture_size, PAGE_ALIGN(alloc_size));
> > +
> > +	return PAGE_ALIGN(alloc_size);
> >  }
> >  
> >  static void __guc_ads_init(struct intel_guc *guc)
> > @@ -614,6 +760,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
> >  		return ret;
> >  	guc->ads_golden_ctxt_size = ret;
> >  
> > +	/* Likewise the capture lists: */
> > +	ret = guc_capture_prep_lists(guc, NULL);
> > +	if (ret < 0)
> > +		return ret;
> > +	guc->ads_capture_size = ret;
> > +
> >  	/* Now the total size can be determined: */
> >  	size = guc_ads_blob_size(guc);
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> > new file mode 100644
> > index 000000000000..c741c77b7fc8
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> > @@ -0,0 +1,232 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2021-2021 Intel Corporation
> > + */
> > +
> > +#include <drm/drm_print.h>
> > +
> > +#include "i915_drv.h"
> > +#include "i915_drv.h"
> 
> duplicated include
> 
> > +#include "i915_memcpy.h"
> > +#include "gt/intel_gt.h"
> > +
> > +#include "intel_guc_fwif.h"
> > +#include "intel_guc_capture.h"
> > +
> > +/* Define all device tables of GuC error capture register lists */
> > +
> > +/********************************* Gen12 LP  *********************************/
> 
> didn't we move away from "GEN" naming ?
> 
> > +/************** GLOBAL *************/
> 
> do we really need all these decorations ?
> 
> > +struct __guc_mmio_reg_descr gen12lp_global_regs[] = {
> > +	{SWF_ILK(0),               0,      0, "SWF_ILK0"},
> > +	/* Add additional register list */
> 
> do we need this reminder ?
> 
> > +/********** List of lists **********/
> > +struct __guc_mmio_reg_descr_group gen12lp_lists[] = {
> > +	{
> > +		.list = gen12lp_global_regs,
> > +		.num_regs = (sizeof(gen12lp_global_regs) / sizeof(struct __guc_mmio_reg_descr)),
> 
> ARRAY_SIZE ?
> 
> > +/************ FIXME: Populate tables for other devices in subsequent patch ************/
> > +
> > +static struct __guc_mmio_reg_descr_group *
> > +guc_capture_get_device_reglist(struct drm_i915_private *dev_priv)
> 
> in new code we are using "i915" instead of "dev_priv" and since this
> function has "guc" prefix it shall rather take "guc" as param:
> 
> guc_capture_get_device_reglist(struct intel_guc *guc)
> {
> 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> 	...
> 
> 
> > +static inline void
> > +warn_with_capture_list_identifier(struct drm_i915_private *i915, char *msg,
> > +				  u32 owner, u32 type, u32 classid)
> > +{
> > +	const char *ownerstr[GUC_CAPTURE_LIST_INDEX_MAX] = {"PF", "VF"};
> > +	const char *typestr[GUC_CAPTURE_LIST_TYPE_MAX - 1] = {"Class", "Instance"};
> > +	const char *classstr[GUC_LAST_ENGINE_CLASS + 1] = {"Render", "Video", "VideoEnhance",
> > +							   "Blitter", "Reserved"};
> 
> better to wrap that into simple small helpers like
> 
> 	const char *stringify_guc_capture_owner(u32 owner) { .. }
> 	const char *stringify_guc_capture_type(u32 type) { .. }
> 	const char *stringify_guc_capture_class(u32 class) { .. }
> 
> > +int intel_guc_capture_list_count(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
> > +				 u16 *num_entries)
> > +{
> > +	struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915;
> 
> s/dev_priv/i915
> redundant ()
> 
> > +	struct __guc_mmio_reg_descr_group *reglists = guc->capture.reglists;
> > +	struct __guc_mmio_reg_descr_group *match;
> > +
> > +	if (!reglists)
> > +		return -ENODEV;
> > +
> > +	match = guc_capture_get_one_list(reglists, owner, type, classid);
> > +	if (match) {
> > +		*num_entries = match->num_regs;
> > +		return 0;
> 
> IIRC early returns are preferred for error cases, not success
> 
> > +int intel_guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
> > +				struct guc_mmio_reg *ptr, u16 num_entries)
> > +{
> > +	u32 j = 0;
> > +	struct drm_i915_private *dev_priv = (guc_to_gt(guc))->i915;
> 
> s/dev_priv/i915
> redundant ()
> 
> > +struct intel_guc;
> > +
> > +struct __guc_mmio_reg_descr {
> > +	i915_reg_t reg;
> > +	u32 flags;
> > +	u32 mask;
> > +	char *regname;
> 
> const char* ?
> 
> but maybe instead of adding reg name to the GuC specific struct we
> should add generic purpose function that will return pretty name of the
> register:
> 
> i915_reg.c:
> 
> const char *i915_reg_to_string(i915_reg_r reg)
> {
> 	...
> }
> 
> >  
> >  /* Capture-types of GuC capture register lists */
> > -enum
> > +enum guc_capture_owner
> >  {
> >  	GUC_CAPTURE_LIST_INDEX_PF = 0,
> >  	GUC_CAPTURE_LIST_INDEX_VF = 1,
> >  	GUC_CAPTURE_LIST_INDEX_MAX = 2,
> 
> s/INDEX/OWNER ?
> 
> >  };
> >  


More information about the Intel-gfx mailing list