[PATCH v2] drm/xe: Add device CXL capabilities identification

Michal Wajdeczko michal.wajdeczko at intel.com
Sun May 26 17:08:27 UTC 2024



On 26.05.2024 15:02, Farah Kassabri wrote:
> As future Intel GPUs will use CXL interface with the host
> servers, this patch will add check if the xe device has CXL
> capabilities or not, by reading the PCIe standard DVSEC register
> and identify the CXL vendor id.
> 
> Signed-off-by: Farah Kassabri <fkassabri at habana.ai>
> Reviewed by: Wajdeczko, Michal <Michal.Wajdeczko at intel.com>

please scratch that, giving comments does not imply r-b, you need
explicit r-b statement, see [1]

[1]
https://www.kernel.org/doc/html/latest/process/submitting-patches.html#using-reported-by-tested-by-reviewed-by-suggested-by-and-fixes

> ---
> changes in v2:
> fixes following review
> 
>  drivers/gpu/drm/xe/Makefile          |  3 ++-
>  drivers/gpu/drm/xe/xe_cxl.c          | 35 ++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_cxl.h          | 26 +++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_device.c       |  5 ++++
>  drivers/gpu/drm/xe/xe_device_types.h | 10 ++++++++
>  5 files changed, 78 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/xe/xe_cxl.c
>  create mode 100644 drivers/gpu/drm/xe/xe_cxl.h
> 
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index a5959bb7b1fb..5f35dff98238 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -65,6 +65,7 @@ $(uses_generated_oob): $(generated_oob)
>  xe-y += xe_bb.o \
>  	xe_bo.o \
>  	xe_bo_evict.o \
> +	xe_cxl.o \
>  	xe_debugfs.o \
>  	xe_devcoredump.o \
>  	xe_device.o \
> @@ -146,7 +147,7 @@ xe-y += xe_bb.o \
>  	xe_vram_freq.o \
>  	xe_wait_user_fence.o \
>  	xe_wa.o \
> -	xe_wopcm.o
> +	xe_wopcm.o \

this is now an unrelated change

>  
>  xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
>  
> diff --git a/drivers/gpu/drm/xe/xe_cxl.c b/drivers/gpu/drm/xe/xe_cxl.c
> new file mode 100644
> index 000000000000..13c4bf7fe35e
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_cxl.c
> @@ -0,0 +1,35 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#include "xe_cxl.h"
> +
> +int xe_cxl_init_capabilities(struct xe_device *xe)
> +{
> +	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
> +	u16 ctrl, cxl_dvsec;
> +	int rc;
> +
> +	cxl_dvsec = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
> +	if (!cxl_dvsec)
> +		return 0;
> +
> +	xe->info.has_cxl_cap = true;
> +	xe->cxl.dvsec = cxl_dvsec;
> +
> +	rc = pci_read_config_word(pdev, cxl_dvsec + CXL_DVSEC_CTRL_OFFSET, &ctrl);


> +	if (rc < 0) {
> +		drm_err(&xe->drm, "Failed to read dvsec ctrl register(%d)\n", rc);
> +		return rc;

is it ok to have xe->info.has_cxl_cap set but with invalid xe->cxl.type?

> +	}
> +
> +	if (ctrl & CXL_DVSEC_MEM_ENABLE)
> +		xe->cxl.type = (ctrl & BIT(0)) ? CXL_DEV_TYPE_TWO : CXL_DEV_TYPE_THREE;
> +	else
> +		xe->cxl.type = CXL_DEV_TYPE_ONE;
> +
> +	drm_dbg(&xe->drm, "The device has CXL capability, type %u\n", xe->cxl.type);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_cxl.h b/drivers/gpu/drm/xe/xe_cxl.h
> new file mode 100644
> index 000000000000..6ba89d527c84
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_cxl.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef _XE_CXL_H_
> +#define _XE_CXL_H_
> +
> +#include "xe_device.h"
> +#include "../drivers/cxl/cxlpci.h"

again, you likely need them in .c but not in .h

btw, it looks that cxlpci.h is private, unlikely to be meant to be
included by other modules, are we doing right thing ?

> +
> +enum {
> +	CXL_DEV_TYPE_ONE = 1,
> +	CXL_DEV_TYPE_TWO = 2,
> +	CXL_DEV_TYPE_THREE = 3,
> +};

kind of still cryptic without any kernel-doc

if not used beyond xe_cxl.c then maybe move it there?

or define it inside xe_device.cxl as cxl.type ?

> +
> +/**
> + * xe_cxl_init_capabilities - set CXL device capabilities
> + * @xe: xe device structure
> + *
> + * Set CXL capabilities and information of the xe device.

also document "Return:"

> + */

nit: in Xe driver I can see that most (all?) function kernel-doc are
placed together with the implementation, not near the declaration

> +int xe_cxl_init_capabilities(struct xe_device *xe);
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 5acf2c92789f..04677c9ff10f 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -50,6 +50,7 @@
>  #include "xe_ttm_sys_mgr.h"
>  #include "xe_vm.h"
>  #include "xe_wait_user_fence.h"
> +#include "xe_cxl.h"
>  
>  static int xe_file_open(struct drm_device *dev, struct drm_file *file)
>  {
> @@ -312,6 +313,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>  	if (WARN_ON(err))
>  		goto err;
>  
> +	err = xe_cxl_init_capabilities(xe);
> +	if (err)
> +		goto err;

btw, do we want to treat an error from CXL pci_read_config_word() as a
reason to abort driver load ?

> +
>  	return xe;
>  
>  err:
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 8e7048ff3ee5..323c3d57fdb8 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -285,6 +285,8 @@ struct xe_device {
>  		u8 has_atomic_enable_pte_bit:1;
>  		/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
>  		u8 has_device_atomics_on_smem:1;
> +		/** @info.has_cxl_cap: device has CXL capabilities */
> +		u8 has_cxl_cap:1;
>  
>  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>  		struct {
> @@ -321,6 +323,14 @@ struct xe_device {
>  		struct ttm_resource_manager sys_mgr;
>  	} mem;
>  
> +	/** @cxl: CXL info for device */
> +	struct {
> +		/** @cxl.dvsec: offset to device DVSEC */
> +		u16 dvsec;
> +		/** @cxl.type: CXL device type */
> +		u16 type;
> +	} cxl;
> +
>  	/** @sriov: device level virtualization data */
>  	struct {
>  		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */


More information about the Intel-xe mailing list