[Intel-xe] [PATCH 4/8] RFC drm/xe/guc: Add PMU counter for total active ticks

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Nov 28 10:52:22 UTC 2023


On 28/11/2023 10:06, Aravind Iddamsetty wrote:
> 
> On 11/27/23 19:41, Riana Tauro wrote:
>> GuC provides engine busyness ticks as a 64 bit counter which count
>> as clock ticks. These counters are maintained in a
>> shared memory buffer and internally updated on a continuous basis.
>>
>> GuC also provides a periodically total active ticks that GT has been
>> active for. This counter is exposed to the user such that busyness can
>> be calculated as a percentage using
>>
>> busyness % = (engine active ticks/total active ticks) * 100.
> is the total active ticks from epoch, but PMU is relative from instance of event open
> so i'm just thinking if total active ticks will be less than  engine active ticks in which case the
> % will be greater than 100.

If you are saying engine and total reset internally at different points 
that might be a problem.

In general I suggest a very extensive IGT coverage aka perf_pmu for 
i915, especially on the edge cases like GPU/engine resets, 
suspend/resume, driver unbind/unload, CPU hotplug, RC6 etc. This is 
because perf API is a bit tricky and certain things can surprise.

If the test coverage is on par with i915 PMU then I think you can be 
reasonably confident things work reliably and as expected.

> rather why don't we just expose engine busyness in ns like we did in i915.
> 
> @Tvrtko, any thoughts please?

AFAIR ticks are thought to work better from a VF but I don't remember 
the details.

Or outside the VF, do total ticks run when in RC6 and if they don't will 
percentage calculations from the host be meaningful.

Regards,

Tvrtko


> Thanks,
> Aravind.
>>
>> This patch provides a pmu counter for total active ticks.
>>
>> This is listed by perf tool as
>>
>> sudo ./perf list
>> 	  xe_0000_03_00.0/total-active-ticks-gt0/            [Kernel PMU event]
>>
>> and can be read using
>>
>> sudo ./perf stat -e xe_0000_03_00.0/total-active-ticks-gt0/ -I 1000
>>          time 	    counts  unit 	events
>>      1.001332764    58942964    xe_0000_03_00.0/total-active-ticks-gt0/
>>      2.011421147	   21191869    xe_0000_03_00.0/total-active-ticks-gt0/
>>      3.013223865	   19269012    xe_0000_03_00.0/total-active-ticks-gt0/
>>
>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_gt.c                  | 11 +++
>>   drivers/gpu/drm/xe/xe_gt.h                  |  1 +
>>   drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 75 +++++++++++++++++----
>>   drivers/gpu/drm/xe/xe_guc_engine_busyness.h |  1 +
>>   drivers/gpu/drm/xe/xe_pmu.c                 |  7 ++
>>   include/uapi/drm/xe_drm.h                   |  1 +
>>   6 files changed, 83 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index f06c74a7c6d0..2379462685f2 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -765,6 +765,17 @@ struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
>>   	return NULL;
>>   }
>>   
>> +/**
>> + * xe_gt_total_active_ticks - Return total active ticks
>> + * @gt: GT structure
>> + *
>> + * Returns total active ticks that the GT was active for.
>> + */
>> +u64 xe_gt_total_active_ticks(struct xe_gt *gt)
>> +{
>> +	return xe_guc_engine_busyness_active_ticks(&gt->uc.guc);
>> +}
>> +
>>   /**
>>    * xe_gt_engine_busy_ticks - Return current accumulated engine busyness ticks
>>    * @gt: GT structure
>> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>> index e23a6ceff863..023cb7c0c32d 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.h
>> +++ b/drivers/gpu/drm/xe/xe_gt.h
>> @@ -41,6 +41,7 @@ void xe_gt_reset_async(struct xe_gt *gt);
>>   void xe_gt_sanitize(struct xe_gt *gt);
>>   
>>   u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
>> +u64 xe_gt_total_active_ticks(struct xe_gt *gt);
>>   
>>   /**
>>    * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
>> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> index 611bafd14d1a..dcf468ee30ea 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> @@ -19,7 +19,16 @@
>>    * timer internal to GuC. The update rate is guaranteed to be at least 2Hz (but with
>>    * a caveat that is not real time, best effort only).
>>    *
>> + * In addition to the engine busyness ticks, there is also a total time count which
>> + * is a free running GT timestamp counter.
>> + *
>> + * Note that counters should be used as ratios of each other for calculating a
>> + * percentage.
>> + *
>>    * engine busyness ticks (ticks_engine) : clock ticks for which engine was active
>> + * total active ticks (ticks_gt)	: total clock ticks
>> + *
>> + * engine busyness % = (ticks_engine / ticks_gt) * 100
>>    */
>>   
>>   static bool guc_engine_busyness_supported(struct xe_guc *guc)
>> @@ -41,48 +50,69 @@ static bool guc_engine_busyness_supported(struct xe_guc *guc)
>>   
>>   static void guc_engine_busyness_usage_map(struct xe_guc *guc,
>>   					  struct xe_hw_engine *hwe,
>> -					  struct iosys_map *engine_map)
>> +					  struct iosys_map *engine_map,
>> +					  struct iosys_map *global_map)
>>   {
>>   	struct iosys_map *map;
>>   	size_t offset;
>>   	u32 instance;
>>   	u8 guc_class;
>>   
>> -	guc_class = xe_engine_class_to_guc_class(hwe->class);
>> -	instance = hwe->logical_instance;
>> +	if (hwe) {
>> +		guc_class = xe_engine_class_to_guc_class(hwe->class);
>> +		instance = hwe->logical_instance;
>> +	}
>>   
>>   	map = &guc->busy.bo->vmap;
>>   
>> -	offset = offsetof(struct guc_engine_observation_data,
>> -			  engine_data[guc_class][instance]);
>> +	if (hwe) {
>> +		offset = offsetof(struct guc_engine_observation_data,
>> +				  engine_data[guc_class][instance]);
>>   
>> -	*engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
>> +		*engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
>> +	}
>> +
>> +	*global_map = IOSYS_MAP_INIT_OFFSET(map, 0);
>>   }
>>   
>>   static void guc_engine_busyness_get_usage(struct xe_guc *guc,
>>   					  struct xe_hw_engine *hwe,
>> -					  u64 *_ticks_engine)
>> +					  u64 *_ticks_engine,
>> +					  u64 *_ticks_gt)
>>   {
>> -	struct iosys_map engine_map;
>> -	u64 ticks_engine = 0;
>> +	struct iosys_map engine_map, global_map;
>> +	u64 ticks_engine = 0, ticks_gt = 0;
>>   	int i = 0;
>>   
>> -	guc_engine_busyness_usage_map(guc, hwe, &engine_map);
>> +	guc_engine_busyness_usage_map(guc, hwe, &engine_map, &global_map);
>>   
>>   #define read_engine_usage(map_, field_) \
>>   	iosys_map_rd_field(map_, 0, struct guc_engine_data, field_)
>>   
>> +#define read_global_field(map_, field_) \
>> +	iosys_map_rd_field(map_, 0, struct guc_engine_observation_data, field_)
>> +
>>   	do {
>> -		ticks_engine = read_engine_usage(&engine_map, total_execution_ticks);
>> +		if (hwe)
>> +			ticks_engine = read_engine_usage(&engine_map, total_execution_ticks);
>> +
>> +		ticks_gt = read_global_field(&global_map, gt_timestamp);
>>   
>> -		if (read_engine_usage(&engine_map, total_execution_ticks) == ticks_engine)
>> +		if (hwe && read_engine_usage(&engine_map, total_execution_ticks) != ticks_engine)
>> +			continue;
>> +
>> +		if (read_global_field(&global_map, gt_timestamp) == ticks_gt)
>>   			break;
>>   	} while (++i < 6);
>>   
>>   #undef read_engine_usage
>> +#undef read_global_field
>>   
>>   	if (_ticks_engine)
>>   		*_ticks_engine = ticks_engine;
>> +
>> +	if (_ticks_gt)
>> +		*_ticks_gt = ticks_gt;
>>   }
>>   
>>   static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
>> @@ -112,6 +142,25 @@ static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
>>   	xe_bo_unpin_map_no_vm(guc->busy.bo);
>>   }
>>   
>> +/*
>> + * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
>> + * @guc: The GuC object
>> + *
>> + * Returns total active ticks that the GT has been running for.
>> + */
>> +u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
>> +{
>> +	u64 ticks_gt;
>> +
>> +	/* Engine busyness supported only on GuC >= 70.11.1 */
>> +	if (!guc_engine_busyness_supported(guc))
>> +		return 0;
>> +
>> +	guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
>> +
>> +	return ticks_gt;
>> +}
>> +
>>   /*
>>    * xe_guc_engine_busyness_ticks - Gets current accumulated
>>    *				  engine busyness ticks
>> @@ -128,7 +177,7 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
>>   	if (!guc_engine_busyness_supported(guc))
>>   		return 0;
>>   
>> -	guc_engine_busyness_get_usage(guc, hwe, &ticks_engine);
>> +	guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
>>   
>>   	return ticks_engine;
>>   }
>> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> index d70f06209896..57325910ebc4 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> @@ -12,6 +12,7 @@ struct xe_hw_engine;
>>   struct xe_guc;
>>   
>>   int xe_guc_engine_busyness_init(struct xe_guc *guc);
>> +u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
>>   u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
>>   
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>> index 6dd5c97129f2..0beb660689da 100644
>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>> @@ -9,6 +9,7 @@
>>   
>>   #include "regs/xe_gt_regs.h"
>>   #include "xe_device.h"
>> +#include "xe_gt.h"
>>   #include "xe_gt_clock.h"
>>   #include "xe_mmio.h"
>>   
>> @@ -124,6 +125,8 @@ config_status(struct xe_device *xe, u64 config)
>>   		if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
>>   			return -ENOENT;
>>   		break;
>> +	case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
>> +		break;
>>   	default:
>>   		return -ENOENT;
>>   	}
>> @@ -186,6 +189,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
>>   	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
>>   		val = engine_group_busyness_read(gt, config);
>>   		break;
>> +	case DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
>> +		val = xe_gt_total_active_ticks(gt);
>> +		break;
>>   	default:
>>   		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
>>   	}
>> @@ -357,6 +363,7 @@ create_event_attributes(struct xe_pmu *pmu)
>>   		__event(1, "copy-group-busy", "ns"),
>>   		__event(2, "media-group-busy", "ns"),
>>   		__event(3, "any-engine-group-busy", "ns"),
>> +		__event(4, "total-active-ticks", NULL),
>>   	};
>>   
>>   	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 88f3aca02b08..6f38f836b705 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -1016,6 +1016,7 @@ struct drm_xe_wait_user_fence {
>>   #define DRM_XE_PMU_COPY_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 1)
>>   #define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 2)
>>   #define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 3)
>> +#define DRM_XE_PMU_TOTAL_ACTIVE_TICKS(gt)	___DRM_XE_PMU_OTHER(gt, 4)
>>   
>>   #if defined(__cplusplus)
>>   }


More information about the Intel-xe mailing list