[RFC 10/14] drm/xe/pxp: add a query for PXP status
Souza, Jose
jose.souza at intel.com
Mon Jul 15 20:16:37 UTC 2024
On Mon, 2024-07-15 at 13:06 -0700, Daniele Ceraolo Spurio wrote:
>
> On 7/15/2024 12:52 PM, Souza, Jose wrote:
> > On Mon, 2024-07-15 at 12:38 -0700, Daniele Ceraolo Spurio wrote:
> > > On 7/15/2024 11:41 AM, Souza, Jose wrote:
> > > > On Mon, 2024-07-15 at 11:03 -0700, Daniele Ceraolo Spurio wrote:
> > > > > On 7/15/2024 10:54 AM, Souza, Jose wrote:
> > > > > > On Fri, 2024-07-12 at 14:28 -0700, Daniele Ceraolo Spurio wrote:
> > > > > > > PXP prerequisites (SW proxy and HuC auth via GSC) are completed
> > > > > > > asynchronously from driver load, which means that userspace can start
> > > > > > > submitting before we're ready to start a PXP session. Therefore, we need
> > > > > > > a query that userspace can use to check not only if PXP is supported by
> > > > > > > also to wait until the prerequisites are done.
> > > > > > >
> > > > > > > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> > > > > > > ---
> > > > > > > drivers/gpu/drm/xe/xe_pxp.c | 33 +++++++++++++++++++++++++++++++++
> > > > > > > drivers/gpu/drm/xe/xe_pxp.h | 1 +
> > > > > > > drivers/gpu/drm/xe/xe_query.c | 33 +++++++++++++++++++++++++++++++++
> > > > > > > include/uapi/drm/xe_drm.h | 28 ++++++++++++++++++++++++++++
> > > > > > > 4 files changed, 95 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
> > > > > > > index e979b47f155a..e39a47aeb050 100644
> > > > > > > --- a/drivers/gpu/drm/xe/xe_pxp.c
> > > > > > > +++ b/drivers/gpu/drm/xe/xe_pxp.c
> > > > > > > @@ -60,6 +60,39 @@ static bool pxp_prerequisites_done(const struct xe_pxp *pxp)
> > > > > > > return ready;
> > > > > > > }
> > > > > > >
> > > > > > > +/**
> > > > > > > + * xe_pxp_get_readiness_status - check whether PXP is ready for userspace use
> > > > > > > + * @pxp: the xe_pxp pointer (can be NULL if PXP is disabled)
> > > > > > > + *
> > > > > > > + * This function is used for status query from userspace, so the returned value
> > > > > > > + * follow the uapi (see drm_xe_query_pxp_status)
> > > > > > > + *
> > > > > > > + * Returns: 0 if PXP is not ready yet, 1 if it is ready, an errno value if PXP
> > > > > > > + * is not supported/enabled or if something went wrong in the initialization of
> > > > > > > + * the prerequisites.
> > > > > > > + */
> > > > > > > +int xe_pxp_get_readiness_status(struct xe_pxp *pxp)
> > > > > > > +{
> > > > > > > + int ret = 0;
> > > > > > > +
> > > > > > > + if (!xe_pxp_is_enabled(pxp))
> > > > > > > + return -ENODEV;
> > > > > > > +
> > > > > > > + /* if the GSC or HuC FW are in an error state, PXP will never work */
> > > > > > > + if (xe_uc_fw_status_to_error(pxp->gt->uc.huc.fw.status) ||
> > > > > > > + xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status))
> > > > > > > + return -EIO;
> > > > > > > +
> > > > > > > + xe_pm_runtime_get(pxp->xe);
> > > > > > > +
> > > > > > > + /* PXP requires both HuC loaded and GSC proxy initialized */
> > > > > > > + if (pxp_prerequisites_done(pxp))
> > > > > > > + ret = 1;
> > > > > > > +
> > > > > > > + xe_pm_runtime_put(pxp->xe);
> > > > > > > + return ret;
> > > > > > > +}
> > > > > > > +
> > > > > > > static bool pxp_session_is_in_play(struct xe_pxp *pxp, u32 id)
> > > > > > > {
> > > > > > > struct xe_gt *gt = pxp->gt;
> > > > > > > diff --git a/drivers/gpu/drm/xe/xe_pxp.h b/drivers/gpu/drm/xe/xe_pxp.h
> > > > > > > index 8f0a3a514fb8..b2aae4abdd0e 100644
> > > > > > > --- a/drivers/gpu/drm/xe/xe_pxp.h
> > > > > > > +++ b/drivers/gpu/drm/xe/xe_pxp.h
> > > > > > > @@ -14,6 +14,7 @@ struct xe_pxp;
> > > > > > >
> > > > > > > bool xe_pxp_is_supported(const struct xe_device *xe);
> > > > > > > bool xe_pxp_is_enabled(const struct xe_pxp *pxp);
> > > > > > > +int xe_pxp_get_readiness_status(struct xe_pxp *pxp);
> > > > > > >
> > > > > > > int xe_pxp_init(struct xe_device *xe);
> > > > > > > void xe_pxp_irq_handler(struct xe_device *xe, u16 iir);
> > > > > > > diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> > > > > > > index 4e01df6b1b7a..5da9f403c2b9 100644
> > > > > > > --- a/drivers/gpu/drm/xe/xe_query.c
> > > > > > > +++ b/drivers/gpu/drm/xe/xe_query.c
> > > > > > > @@ -22,6 +22,7 @@
> > > > > > > #include "xe_guc_hwconfig.h"
> > > > > > > #include "xe_macros.h"
> > > > > > > #include "xe_mmio.h"
> > > > > > > +#include "xe_pxp.h"
> > > > > > > #include "xe_ttm_vram_mgr.h"
> > > > > > >
> > > > > > > static const u16 xe_to_user_engine_class[] = {
> > > > > > > @@ -678,6 +679,37 @@ static int query_oa_units(struct xe_device *xe,
> > > > > > > return ret ? -EFAULT : 0;
> > > > > > > }
> > > > > > >
> > > > > > > +static int query_pxp_status(struct xe_device *xe, struct drm_xe_device_query *query)
> > > > > > > +{
> > > > > > > + struct drm_xe_query_pxp_status __user *query_ptr = u64_to_user_ptr(query->data);
> > > > > > > + size_t size = sizeof(struct drm_xe_query_pxp_status);
> > > > > > > + struct drm_xe_query_pxp_status resp;
> > > > > > > + int ret;
> > > > > > > +
> > > > > > > + if (query->size == 0) {
> > > > > > > + query->size = size;
> > > > > > > + return 0;
> > > > > > > + } else if (XE_IOCTL_DBG(xe, query->size != size)) {
> > > > > > > + return -EINVAL;
> > > > > > > + }
> > > > > > > +
> > > > > > > + if (copy_from_user(&resp, query_ptr, size))
> > > > > > > + return -EFAULT;
> > > > > > > +
> > > > > > > + ret = xe_pxp_get_readiness_status(xe->pxp);
> > > > > > > + if (ret < 0)
> > > > > > > + return ret;
> > > > > > > +
> > > > > > > + resp.status = ret;
> > > > > > when status is == 0? does it means that will always succeed? it just matter of time.
> > > > > no, it just means the init is still in progress. It can still end up in
> > > > > an error state if something goes wrong during the init (e.g. GSC proxy
> > > > > init failure), although that's unlikely.
> > > > >
> > > > > > For i915 in some older kernel versions we had to create a gem_context with I915_CONTEXT_PARAM_PROTECTED_CONTENT set to verify for sure if supported or
> > > > > > not.
> > > > > This query is enough to confirm. That's true for i915 as well, the query
> > > > > was added explicitly to avoid having to create a context just to check
> > > > > for PXP.
> > > > But as you said above, if status == 0 it could still fail, so UMD will still need create a exec_queue to verify because UMD needs to report to
> > > > applications if protected stuff is supported or not as one of first steps of Vulkan API.
> > > If you need the answer and the status is 0, you need to wait a bit an
> > > then check again. There is no other safe way of knowing if PXP fully is
> > > supported or not.
> > > Note that we expect this to complete relatively quickly after driver
> > > load; the init itself takes ~500ms, but since it is done in a worker
> > > thread it can be delayed a bit if the system is busy. The only exception
> > > is if the mei driver doesn't load, for which case the timeout is 10s,
> > > but that should in theory never happen unless the HW is in a very bad
> > > state or the mei driver is totally busted.
> > >
> > > > From what I looked to the code, exec_queue will return EBUSY if UMD tries to create a exec_queue with DRM_XE_PXP_TYPE_HWDRM but
> > > > pxp_prerequisites_done() is still returning false. Can't you make something block in KMD until PXP initialization is running? Maybe add a blocking
> > > > parameter in the status uAPI?
> > > I can't block the driver init given that GSC init can be slow and we
> > > don't want to delay the whole driver load by 500ms or more.
> > I mean not block driver init but block exec_queue_create or DRM_XE_DEVICE_QUERY_PXP_STATUS.
> >
> > > I could add a parameter to block the pxp_status query until the result
> > > is certain, but is there any reason you can't do such a wait in
> > > userspace? I think it's better to block in userspace when possible and
> > > it should be relatively simple to implement, something like:
> > >
> > > while (time_ms < timeout_ms) {
> > > err = ioctl (<query pxp status>);
> > > if (err || pxp_status == 1)
> > > break;
> > >
> > > msleep(50);
> > > time_ms += 50;
> > > }
> > The problem of doing this in UMD is that we would need to wait up to 10s in all cases.
> > KMD could do some checking like if mei is loaded then wait up to 500ms, if not ready after 500ms mark as not supported.
>
> We actually can't do any extra checking inside KMD. I haven't found a
> way of figuring out if mei is working; the mei driver has a number of
> sub-components, one of which is the one we need, and I couldn't find any
> way to figure out if it's not up because we're still waiting for it to
> load or if something went wrong. The only thing we can do is timeout on
> it (more on this below).
>
> >
> > And from what I understood there is no timeout in pxp_prerequisites_done() so even after 10s it would still return 1, so the next application starting
> > would need to wait again up to 10s.
>
> There is a timeout on the GSC FW init and HuC auth, so the FW status
> will move to error if the init doesn't complete or something goes wrong.
> We check the FW status at the beginning of
> xe_pxp_get_readiness_status(), so all calls from that point onward
> would return an error code. The status can only be 0 in the first 10s
> from driver load (which is the worst case and should never happen, in
> normal scenario it should resolve within 1s even if there is a problem),
> after that it'll either move to 1 or return an error.
Okay will do the wait on UMD but can you add some comments to drm_xe_query_pxp_status with the time expected for PXP to finish initialization?
Mesa reviewers will ask from where did I got the timeout value.
>
> >
> > > I have something similar in the local IGT that I'm using for testing.
> > >
> > > Also note that the proposed behavior is basically the same as i915.
> > > Apart from the defines, the only difference is that in i915 we wait for
> > > a bit (250ms) on context creation, but if PXP is not ready within that
> > > time we do return a try again later error. Relevant i915 snippet:
> > >
> > > ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT);
> > > if (ret < 0) {
> > > drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail
> > > (%d)", ret);
> > > return ret;
> > > } else if (ret > 1) {
> > > return -EIO; /* per UAPI spec, user may retry later */
> > > }
> > >
> > > > > > > + resp.supported_session_types =
> > > > > > > + BIT(DRM_XE_PXP_TYPE_NONE) | BIT(DRM_XE_PXP_TYPE_HWDRM);
> > > > > > Looks odd to me that you return DRM_XE_PXP_TYPE_NONE as supported...
> > > > > I just added it in to have all values of the enum represented. I can
> > > > > take it out no problem if you think it is clearer that way, I'll just
> > > > > add a note in the interface doc that DRM_XE_PXP_TYPE_NONE is always
> > > > > supported.
> > > > Ah just notice that enum drm_xe_pxp_session_type is set in exec_queue. But still, if PXP is not supported/available this will fail, so I can't see a
> > > > case were only DRM_XE_PXP_TYPE_NONE is returned.
> > > > So I don't think you need to return it as supported.
> > > ok, I'll update it.
> > >
> > > Daniele
> > >
> > > >
> > > > > Daniele
> > > > >
> > > > > > > +
> > > > > > > + if (copy_to_user(query_ptr, &resp, size))
> > > > > > > + return -EFAULT;
> > > > > > > +
> > > > > > > + return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > static int (* const xe_query_funcs[])(struct xe_device *xe,
> > > > > > > struct drm_xe_device_query *query) = {
> > > > > > > query_engines,
> > > > > > > @@ -689,6 +721,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
> > > > > > > query_engine_cycles,
> > > > > > > query_uc_fw_version,
> > > > > > > query_oa_units,
> > > > > > > + query_pxp_status,
> > > > > > > };
> > > > > > >
> > > > > > > int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > > > > > > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > > > > > > index 631fdc2ed493..746bc16bd220 100644
> > > > > > > --- a/include/uapi/drm/xe_drm.h
> > > > > > > +++ b/include/uapi/drm/xe_drm.h
> > > > > > > @@ -619,6 +619,32 @@ struct drm_xe_query_uc_fw_version {
> > > > > > > __u64 reserved;
> > > > > > > };
> > > > > > >
> > > > > > > +/**
> > > > > > > + * struct drm_xe_query_pxp_status - query if PXP is ready
> > > > > > > + *
> > > > > > > + * If PXP is enabled and no fatal error as occurred, the status will be set to
> > > > > > > + * one of the following values:
> > > > > > > + * 0: PXP init still in progress
> > > > > > > + * 1: PXP init complete
> > > > > > > + *
> > > > > > > + * The supported session type bitmask is based on the values in
> > > > > > > + * enum drm_xe_pxp_session_type, including TYPE_NONE.
> > > > > > > + *
> > > > > > > + * If PXP is not enabled or something has gone wrong, the query will be failed
> > > > > > > + * with one of the following error codes:
> > > > > > > + * -ENODEV: PXP not supported or disabled;
> > > > > > > + * -EIO: fatal error occurred during init, so PXP will never be enabled;
> > > > > > > + * -EINVAL: incorrect value provided as part of the query;
> > > > > > > + * -EFAULT: error copying the memory between kernel and userspace.
> > > > > > > + */
> > > > > > > +struct drm_xe_query_pxp_status {
> > > > > > > + /** @status: current PXP status */
> > > > > > > + __u32 status;
> > > > > > > +
> > > > > > > + /** @supported_session_types: bitmask of supported PXP session types */
> > > > > > > + __u32 supported_session_types;
> > > > > > > +};
> > > > > > > +
> > > > > > > /**
> > > > > > > * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
> > > > > > > * structure to query device information
> > > > > > > @@ -638,6 +664,7 @@ struct drm_xe_query_uc_fw_version {
> > > > > > > * attributes.
> > > > > > > * - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY
> > > > > > > * - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES
> > > > > > > + * - %DRM_XE_DEVICE_QUERY_PXP_STATUS
> > > > > > > *
> > > > > > > * If size is set to 0, the driver fills it with the required size for
> > > > > > > * the requested type of data to query. If size is equal to the required
> > > > > > > @@ -690,6 +717,7 @@ struct drm_xe_device_query {
> > > > > > > #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6
> > > > > > > #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7
> > > > > > > #define DRM_XE_DEVICE_QUERY_OA_UNITS 8
> > > > > > > +#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9
> > > > > > > /** @query: The type of data to query */
> > > > > > > __u32 query;
> > > > > > >
>
More information about the Intel-xe
mailing list