[Intel-gfx] [Patch v2] Add uAPI to query microcontroller fw version

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Oct 4 08:20:10 UTC 2023


On 04/10/2023 04:40, Vivaik Balasubrawmanian wrote:
> Due to a bug in GuC firmware, Mesa can't enable by default the usage of
> async compute engines feature in DG2 and newer. A new GuC firmware fixed the issue but
> until now there was no way for Mesa to know if KMD was running with the fixed GuC version or not,
> so this uAPI is required.
> 
> More context on the issue:
> Vulkan allows applications to create types of queues: graphics, compute and copy.
> Today Intel Vulkan driver uses Render engine to implement all those 3 queues types.
> 
> There is a set of operations that a queue type is required to implement,
> DG2 compute engine have almost all the operations required by compute queue but still lacks some.
> So the solution is to send those operations not supported by compute engine to render engine
> and do some synchronization around it. But doing so causes the GuC scheduler to get stuck
> around the synchronization, until KMD resets the engine and ban the application context.
> This issue was root caused to a GuC firmware issue and was fixed in newer version.
> 
> So Mesa can't enable the "async compute" without knowing for sure that KMD is running
> with a GuC version that has the scheduler fix. Same will happen when Mesa start to use
> copy engine.
> 
> This uAPI  may be expanded in future to query other firmware versions too.
> 
> More information:
> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23661
> Mesa usage: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25233
> 
> v2:
> - incorporated feedback from Tvrtko Ursulin:
>    - updated patch description to clarify the use case that identified
>      this issue.
>    - updated query_uc_fw_version() to use copy_query_item() helper.
>    - updated the implemented GuC version query to return Submission
>      version.
> 
> Cc: John Harrison <John.C.Harrison at Intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: José Roberto de Souza <jose.souza at intel.com>
> 
> Signed-off-by: Vivaik Balasubrawmanian <vivaik.balasubrawmanian at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_query.c | 42 +++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h       | 32 +++++++++++++++++++++++
>   2 files changed, 74 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 00871ef99792..3e3563ab62b7 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -551,6 +551,47 @@ static int query_hwconfig_blob(struct drm_i915_private *i915,
>   	return hwconfig->size;
>   }
>   
> +static int
> +query_uc_fw_version(struct drm_i915_private *i915, struct drm_i915_query_item *query)
> +{
> +	struct drm_i915_query_uc_fw_version __user *query_ptr = u64_to_user_ptr(query->data_ptr);
> +	size_t size = sizeof(struct drm_i915_query_uc_fw_version);
> +	struct drm_i915_query_uc_fw_version resp;
> +	int ret;
> +
> +	ret = copy_query_item(&resp, size, size, query);
> +	if (ret == size) {
> +		query->length = size;
> +		return 0;
> +	} else if (ret != 0)
> +		return ret;

First block is not needed, see how other queries do it:

	ret = copy_query_item(&query, sizeof(query), total_length, query_item);
	if (ret != 0)
		return ret;

Top level dispatcher will update query->length:

		/* Only write the length back to userspace if they differ. */
		if (ret != item.length && put_user(ret, &user_item_ptr->length))
			return -EFAULT;

> +
> +	if (resp.pad || resp.pad2 || resp.reserved) {
> +		drm_dbg(&i915->drm,
> +			"Invalid input fw version query structure parameters received");
> +		return -EINVAL;
> +	}
> +
> +	switch (resp.uc_type) {
> +	case I915_QUERY_UC_TYPE_GUC_SUBMISSION: {
> +		struct intel_guc *guc = &i915->gt0.uc.guc;
> +
> +		resp.major_ver = guc->submission_version.major;
> +		resp.minor_ver = guc->submission_version.minor;
> +		resp.patch_ver = guc->submission_version.patch;
> +		resp.branch_ver = 0;
> +		break;
> +	}
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (copy_to_user(query_ptr, &resp, size))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   					struct drm_i915_query_item *query_item) = {
>   	query_topology_info,
> @@ -559,6 +600,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   	query_memregion_info,
>   	query_hwconfig_blob,
>   	query_geometry_subslices,
> +	query_uc_fw_version,
>   };
>   
>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7000e5910a1d..6f9d52263c77 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3013,6 +3013,7 @@ struct drm_i915_query_item {
>   	 *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
>   	 *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
>   	 *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
> +	 *  - %DRM_I915_QUERY_UC_FW_VERSION (see struct drm_i915_query_uc_fw_version)
>   	 */
>   	__u64 query_id;
>   #define DRM_I915_QUERY_TOPOLOGY_INFO		1
> @@ -3021,6 +3022,7 @@ struct drm_i915_query_item {
>   #define DRM_I915_QUERY_MEMORY_REGIONS		4
>   #define DRM_I915_QUERY_HWCONFIG_BLOB		5
>   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES	6
> +#define DRM_I915_QUERY_UC_FW_VERSION        7
>   /* Must be kept compact -- no holes and well documented */
>   
>   	/**
> @@ -3213,6 +3215,36 @@ struct drm_i915_query_topology_info {
>   	__u8 data[];
>   };
>   
> +/**
> +* struct drm_i915_query_uc_fw_version - query a micro-controller firmware version
> +*
> +* Given a uc_type this will return the major, minor, patch and branch version
> +* of the micro-controller firmware.
> +*/
> +struct drm_i915_query_uc_fw_version {
> +	/** @uc: The micro-controller type to query firmware version */
> +#define I915_QUERY_UC_TYPE_GUC_SUBMISSION 0
> +	__u16 uc_type;
> +
> +	/** @pad: MBZ */
> +	__u16 pad;

Or just make uc_type u32 and avoid the need for this padding?

> +
> +	/* @major_ver: major uc fw version */
> +	__u32 major_ver;
> +	/* @minor_ver: minor uc fw version */
> +	__u32 minor_ver;
> +	/* @patch_ver: patch uc fw version */
> +	__u32 patch_ver;
> +	/* @branch_ver: branch uc fw version */
> +	__u32 branch_ver;
> +
> +	/** @pad2: MBZ */
> +	__u32 pad2;
> +
> +	/** @reserved: Reserved */
> +	__u64 reserved;

Alternative could be to make the trailing padding like u32 reserved[$some_odd_number] which maybe keeps things visually tidier. No strong opinion from me.

Regards,

Tvrtko

> +};
> +
>   /**
>    * DOC: Engine Discovery uAPI
>    *
> 
> base-commit: e2d29b46ca6d480bc3bc328a7775c3028bc1e5c8


More information about the Intel-gfx mailing list