[PATCH v4 3/3] drm/xe: Cache hwe used for timestamp

Fri Nov 8 18:10:31 UTC 2024

On Fri, Nov 08, 2024 at 03:26:35PM +0000, Cavitt, Jonathan wrote:
>-----Original Message-----
>From: De Marchi, Lucas <lucas.demarchi at intel.com>
>Sent: Thursday, November 7, 2024 9:33 PM
>To: intel-xe at lists.freedesktop.org
>Cc: Cavitt, Jonathan <jonathan.cavitt at intel.com>; Nerlige Ramappa, Umesh <umesh.nerlige.ramappa at intel.com>; Brost, Matthew <matthew.brost at intel.com>; De Marchi, Lucas <lucas.demarchi at intel.com>
>Subject: [PATCH v4 3/3] drm/xe: Cache hwe used for timestamp
>>
>> Cache the hwe used to read the timestamp so it doesn't need to be done
>> every time it's read.
>>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>
>I have some notes, but nothing worth blocking on.
>
>Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>

still not fully convinced about doing this when there's almost 0
performance impact. I will keep this out for now and measure things
again later with more time.

>
>> ---
>>  drivers/gpu/drm/xe/xe_device.c       |  2 ++
>>  drivers/gpu/drm/xe/xe_device_types.h |  5 +++++
>>  drivers/gpu/drm/xe/xe_drm_client.c   | 10 ++++++++++
>>  drivers/gpu/drm/xe/xe_drm_client.h   |  2 ++
>>  4 files changed, 19 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index 0e2dd691bdae9..9668a60e3aeae 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -759,6 +759,8 @@ int xe_device_probe(struct xe_device *xe)
>>  	for_each_gt(gt, xe, id)
>>  		xe_gt_sanitize_freq(gt);
>>
>> +	xe_drm_client_init(xe);
>> +
>>  	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>>
>>  err_fini_display:
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>> index fffbb7d1c40b4..f13b738ce77a8 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -34,6 +34,7 @@
>>
>>  struct xe_ggtt;
>>  struct xe_pat_ops;
>> +struct xe_hw_engine;
>>
>>  #define XE_BO_INVALID_OFFSET	LONG_MAX
>>
>> @@ -509,6 +510,10 @@ struct xe_device {
>>  		int mode;
>>  	} wedged;
>>
>> +	struct {
>> +		struct xe_hw_engine *hwe;
>> +	} drm_client;
>> +
>>  #ifdef TEST_VM_OPS_ERROR
>>  	/**
>>  	 * @vm_inject_error_position: inject errors at different places in VM
>> diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
>> index 298a587da7f17..125ee18ab9f1f 100644
>> --- a/drivers/gpu/drm/xe/xe_drm_client.c
>> +++ b/drivers/gpu/drm/xe/xe_drm_client.c
>> @@ -274,6 +274,9 @@ static struct xe_hw_engine *any_engine(struct xe_device *xe)
>>  	struct xe_gt *gt;
>>  	unsigned long gt_id;
>>
>> +	if (xe->drm_client.hwe)
>> +		return xe->drm_client.hwe;
>> +
>>  	for_each_gt(gt, xe, gt_id) {
>>  		struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt);
>>
>> @@ -394,4 +397,11 @@ void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
>>  	show_meminfo(p, file);
>>  	show_run_ticks(p, file);
>>  }
>> +
>> +void xe_drm_client_init(struct xe_device *xe)
>> +{
>> +	xe->drm_client.hwe = NULL;
>> +	xe->drm_client.hwe = any_engine(xe);
>
>Two thoughts:
>
>1.
>I wonder if it would make sense for us to set xe->drm_client.hwe in any_engine, instead
>of as a separate assignment?  So, here we would call:
>
>"""
>void xe_drm_client_init(struct xe_device *xe)
>{
>	xe->drm_client.hwe = NULL;
>	any_engine(xe);

yes, that's possible but surprising for a function that in theory should
just lookup.

>}
>"""
>
>Then, in any_engine:
>
>"""
>static struct xe_hw_engine *any_engine(struct xe_device *xe)
>{
>	struct xe_gt *gt;
>	unsigned long gt_id;
>
>	if (xe->drm_client.hwe)
>		return xe->drm_client.hwe;
>
>	for_each_gt(gt, xe, gt_id) {
>		struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt);
>
>		if (hwe) {
>			xe->drm_client.hwe = hwe;
>			return hwe;
>		}
>	}
>
>	return NULL;
>}
>"""
>
>I can see why we might want to avoid doing something like this, but it's just a thought.

As said above, I will keep this out for now and think about improvements
here later. I've used both patterns in the past. Not decided here.

Lucas De Marchi

>
>2.
>I understand why drm_client.hwe needs to be assigned twice here (to avoid unassigned
>data comparison in any_engine call), but it still looks a bit strange at a glance.  It might
>almost make sense to restructure the xe->drm_client structure to have a separate "found"
>Boolean that can be set here:
>
>"""
>	struct {
>		bool found;
>		struct xe_hw_engine *hwe;
>	} drm_client;
>"""
>
>Then, in any_engine:
>
>"""
>	unsigned long gt_id;
>
>	if (xe->drm_client.found)
>		return xe->drm_client.hwe;
>
>...
>"""
>
>And finally, in xe_drm_client_init:
>
>"""
>void xe_drm_client_init(struct xe_device *xe)
>{
>	xe->drm_client.found = false;
>...
>"""
>
>-Jonathan Cavitt
>
>> +}
>> +
>>  #endif
>> diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
>> index a9649aa360110..5609fbd682650 100644
>> --- a/drivers/gpu/drm/xe/xe_drm_client.h
>> +++ b/drivers/gpu/drm/xe/xe_drm_client.h
>> @@ -16,6 +16,7 @@
>>  struct drm_file;
>>  struct drm_printer;
>>  struct xe_bo;
>> +struct xe_device;
>>
>>  struct xe_drm_client {
>>  	struct kref kref;
>> @@ -48,6 +49,7 @@ static inline void xe_drm_client_put(struct xe_drm_client *client)
>>  	kref_put(&client->kref, __xe_drm_client_free);
>>  }
>>
>> +void xe_drm_client_init(struct xe_device *xe);
>>  struct xe_drm_client *xe_drm_client_alloc(void);
>>  static inline struct xe_drm_client *
>>  xe_drm_client_get(struct xe_drm_client *client);
>> --
>> 2.47.0
>>
>>