[PATCH 1/2] drm/nouveau: Add DRM_IOCTL_NOUVEAU_GET_ZCULL_INFO
Ben Skeggs
bskeggs at nvidia.com
Thu Mar 20 19:57:55 UTC 2025
On 21/3/25 04:18, Danilo Krummrich wrote:
> Hi Mel,
>
> On Wed, Mar 12, 2025 at 05:36:14PM -0400, Mel Henning wrote:
>> Userspace needs this information to set up zcull correctly.
> This is a very brief motivation for the commit, please also describe what the
> commit does using imperative form.
>
>> Signed-off-by: Mel Henning <mhenning at darkrefraction.com>
>> ---
>> .../drm/nouveau/include/nvkm/core/device.h | 6 ++
>> .../sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h | 69 +++++++++++++++++++
>> drivers/gpu/drm/nouveau/nouveau_abi16.c | 15 ++++
>> drivers/gpu/drm/nouveau/nouveau_abi16.h | 1 +
>> drivers/gpu/drm/nouveau/nouveau_drm.c | 1 +
>> drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c | 34 +++++++++
>> include/uapi/drm/nouveau_drm.h | 19 +++++
>> 7 files changed, 145 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
>> index 46afb877a296..d1742ff1cf6b 100644
>> --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
>> +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
>> @@ -3,6 +3,9 @@
>> #define __NVKM_DEVICE_H__
>> #include <core/oclass.h>
>> #include <core/intr.h>
>> +
>> +#include "uapi/drm/nouveau_drm.h"
>> +
>> enum nvkm_subdev_type;
>>
>> enum nvkm_device_type {
>> @@ -72,6 +75,9 @@ struct nvkm_device {
>> bool armed;
>> bool legacy_done;
>> } intr;
>> +
>> + bool has_zcull_info;
>> + struct drm_nouveau_get_zcull_info zcull_info;
> This is bypassing the nvif layer entirely. I think you should store the contents
> of struct NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS in struct r535_gr and have an
> nvif interface to access the data.
I agree here, though nvkm_gr would be a better choice for a couple of
reasons, not least that it's possible (and should be reasonably trivial)
to support this on earlier GPUs - should someone desire to at a later point.
Ben.
>
>> };
>>
>> struct nvkm_subdev *nvkm_device_subdev(struct nvkm_device *, int type, int inst);
>> diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h
>> index 59f8895bc5d7..01884b926c9c 100644
>> --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h
>> +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h
>> @@ -26,6 +26,75 @@
>> * DEALINGS IN THE SOFTWARE.
>> */
>>
>> +/**
>> + * NV2080_CTRL_CMD_GR_GET_ZCULL_INFO
>> + *
>> + * This command is used to query the RM for zcull information that the
>> + * driver will need to allocate and manage the zcull regions.
>> + *
>> + * widthAlignPixels
>> + * This parameter returns the width alignment restrictions in pixels
>> + * used to adjust a surface for proper aliquot coverage (typically
>> + * #TPC's * 16).
>> + *
>> + * heightAlignPixels
>> + * This parameter returns the height alignment restrictions in pixels
>> + * used to adjust a surface for proper aliquot coverage (typically 32).
>> + *
>> + * pixelSquaresByAliquots
>> + * This parameter returns the pixel area covered by an aliquot
>> + * (typically #Zcull_banks * 16 * 16).
>> + *
>> + * aliquotTotal
>> + * This parameter returns the total aliquot pool available in HW.
>> + *
>> + * zcullRegionByteMultiplier
>> + * This parameter returns multiplier used to convert aliquots in a region
>> + * to the number of bytes required to save/restore them.
>> + *
>> + * zcullRegionHeaderSize
>> + * This parameter returns the region header size which is required to be
>> + * allocated and accounted for in any save/restore operation on a region.
>> + *
>> + * zcullSubregionHeaderSize
>> + * This parameter returns the subregion header size which is required to be
>> + * allocated and accounted for in any save/restore operation on a region.
>> + *
>> + * subregionCount
>> + * This parameter returns the subregion count.
>> + *
>> + * subregionWidthAlignPixels
>> + * This parameter returns the subregion width alignment restrictions in
>> + * pixels used to adjust a surface for proper aliquot coverage
>> + * (typically #TPC's * 16).
>> + *
>> + * subregionHeightAlignPixels
>> + * This parameter returns the subregion height alignment restrictions in
>> + * pixels used to adjust a surface for proper aliquot coverage
>> + * (typically 62).
>> + *
>> + * The callee should compute the size of a zcull region as follows.
>> + * (numBytes = aliquots * zcullRegionByteMultiplier +
>> + * zcullRegionHeaderSize + zcullSubregionHeaderSize)
>> + */
>> +#define NV2080_CTRL_CMD_GR_GET_ZCULL_INFO (0x20801206U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_GR_INTERFACE_ID << 8) | NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS_MESSAGE_ID" */
>> +
>> +#define NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS_SUBREGION_SUPPORTED
>> +#define NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS_MESSAGE_ID (0x6U)
>> +
>> +typedef struct NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS {
>> + NvU32 widthAlignPixels;
>> + NvU32 heightAlignPixels;
>> + NvU32 pixelSquaresByAliquots;
>> + NvU32 aliquotTotal;
>> + NvU32 zcullRegionByteMultiplier;
>> + NvU32 zcullRegionHeaderSize;
>> + NvU32 zcullSubregionHeaderSize;
>> + NvU32 subregionCount;
>> + NvU32 subregionWidthAlignPixels;
>> + NvU32 subregionHeightAlignPixels;
>> +} NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS;
>> +
>> typedef enum NV2080_CTRL_CMD_GR_CTXSW_PREEMPTION_BIND_BUFFERS {
>> NV2080_CTRL_CMD_GR_CTXSW_PREEMPTION_BIND_BUFFERS_MAIN = 0,
>> NV2080_CTRL_CMD_GR_CTXSW_PREEMPTION_BIND_BUFFERS_SPILL = 1,
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
>> index 2a0617e5fe2a..981abea97546 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
>> @@ -333,6 +333,21 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
>> return 0;
>> }
>>
>> +int
>> +nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS)
>> +{
>> + struct nouveau_drm *drm = nouveau_drm(dev);
>> + struct nvkm_device *device = drm->nvkm;
>> + struct drm_nouveau_get_zcull_info *out = data;
>> +
>> + if (device->has_zcull_info) {
>> + *out = device->zcull_info;
> This needs copy_to_user().
>
>> + return 0;
>> + } else {
>> + return -ENODEV;
>> + }
>> +}
>> +
>> int
>> nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
>> {
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
>> index af6b4e1cefd2..134b3ab58719 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
>> +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
>> @@ -6,6 +6,7 @@
>> struct drm_device *dev, void *data, struct drm_file *file_priv
>>
>> int nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS);
>> +int nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS);
>> int nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS);
>> int nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS);
>> int nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
>> index 107f63f08bd9..4c4168b50e60 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
>> @@ -1244,6 +1244,7 @@ nouveau_ioctls[] = {
>> DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_RENDER_ALLOW),
>> + DRM_IOCTL_DEF_DRV(NOUVEAU_GET_ZCULL_INFO, nouveau_abi16_ioctl_get_zcull_info, DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_RENDER_ALLOW),
>> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c
>> index f4bed3eb1ec2..6669f2b1f492 100644
>> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c
>> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c
>> @@ -34,6 +34,7 @@
>> #include <nvrm/535.113.01/common/sdk/nvidia/inc/alloc/alloc_channel.h>
>> #include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl0080/ctrl0080fifo.h>
>> #include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h>
>> +#include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h>
>> #include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080internal.h>
>> #include <nvrm/535.113.01/nvidia/generated/g_kernel_channel_nvoc.h>
>>
>> @@ -244,6 +245,8 @@ static int
>> r535_gr_oneinit(struct nvkm_gr *base)
>> {
>> NV2080_CTRL_INTERNAL_STATIC_GR_GET_CONTEXT_BUFFERS_INFO_PARAMS *info;
>> + NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS *zcull_info;
>> + u32 zcull_ctxsw_size, zcull_ctxsw_align;
>> struct r535_gr *gr = container_of(base, typeof(*gr), base);
>> struct nvkm_subdev *subdev = &gr->base.engine.subdev;
>> struct nvkm_device *device = subdev->device;
>> @@ -418,6 +421,11 @@ r535_gr_oneinit(struct nvkm_gr *base)
>> }
>> }
>>
>> + zcull_ctxsw_size = info->engineContextBuffersInfo[0]
>> + .engine[NV0080_CTRL_FIFO_GET_ENGINE_CONTEXT_PROPERTIES_ENGINE_ID_GRAPHICS_ZCULL].size;
>> + zcull_ctxsw_align = info->engineContextBuffersInfo[0]
>> + .engine[NV0080_CTRL_FIFO_GET_ENGINE_CONTEXT_PROPERTIES_ENGINE_ID_GRAPHICS_ZCULL].alignment;
>> +
>> nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, info);
>>
>> /* Promote golden context to RM. */
>> @@ -450,6 +458,32 @@ r535_gr_oneinit(struct nvkm_gr *base)
>> }
>> }
>>
>> + zcull_info = nvkm_gsp_rm_ctrl_rd(&gsp->internal.device.subdevice,
>> + NV2080_CTRL_CMD_GR_GET_ZCULL_INFO,
>> + sizeof(*zcull_info));
>> + if (WARN_ON(IS_ERR(zcull_info))) {
>> + ret = PTR_ERR(zcull_info);
>> + goto done;
>> + }
>> +
>> + device->has_zcull_info = true;
>> +
>> + device->zcull_info.width_align_pixels = zcull_info->widthAlignPixels;
>> + device->zcull_info.height_align_pixels = zcull_info->heightAlignPixels;
>> + device->zcull_info.pixel_squares_by_aliquots = zcull_info->pixelSquaresByAliquots;
>> + device->zcull_info.aliquot_total = zcull_info->aliquotTotal;
>> + device->zcull_info.zcull_region_byte_multiplier = zcull_info->zcullRegionByteMultiplier;
>> + device->zcull_info.zcull_region_header_size = zcull_info->zcullRegionHeaderSize;
>> + device->zcull_info.zcull_subregion_header_size = zcull_info->zcullSubregionHeaderSize;
>> + device->zcull_info.subregion_count = zcull_info->subregionCount;
>> + device->zcull_info.subregion_width_align_pixels = zcull_info->subregionWidthAlignPixels;
>> + device->zcull_info.subregion_height_align_pixels = zcull_info->subregionHeightAlignPixels;
>> +
>> + device->zcull_info.ctxsw_size = zcull_ctxsw_size;
>> + device->zcull_info.ctxsw_align = zcull_ctxsw_align;
>> +
>> + nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, zcull_info);
> Please move this to a separate function.
>
>> +
>> done:
>> nvkm_gsp_rm_free(&golden.chan);
>> for (int i = gr->ctxbuf_nr - 1; i >= 0; i--)
>> diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
>> index dd87f8f30793..33361784eb4e 100644
>> --- a/include/uapi/drm/nouveau_drm.h
>> +++ b/include/uapi/drm/nouveau_drm.h
> Please do the uAPI change in a separate commit.
>
>> @@ -432,6 +432,22 @@ struct drm_nouveau_exec {
>> __u64 push_ptr;
>> };
>>
>> +struct drm_nouveau_get_zcull_info {
> Please add some documentation to this structure.
>
>> + __u32 width_align_pixels;
>> + __u32 height_align_pixels;
>> + __u32 pixel_squares_by_aliquots;
>> + __u32 aliquot_total;
>> + __u32 zcull_region_byte_multiplier;
>> + __u32 zcull_region_header_size;
>> + __u32 zcull_subregion_header_size;
>> + __u32 subregion_count;
>> + __u32 subregion_width_align_pixels;
>> + __u32 subregion_height_align_pixels;
>> +
>> + __u32 ctxsw_size;
>> + __u32 ctxsw_align;
>> +};
> What if this ever changes between hardware revisions or firmware versions?
>
>> +
>> #define DRM_NOUVEAU_GETPARAM 0x00
>> #define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */
>> #define DRM_NOUVEAU_CHANNEL_ALLOC 0x02
>> @@ -445,6 +461,7 @@ struct drm_nouveau_exec {
>> #define DRM_NOUVEAU_VM_INIT 0x10
>> #define DRM_NOUVEAU_VM_BIND 0x11
>> #define DRM_NOUVEAU_EXEC 0x12
>> +#define DRM_NOUVEAU_GET_ZCULL_INFO 0x13
>> #define DRM_NOUVEAU_GEM_NEW 0x40
>> #define DRM_NOUVEAU_GEM_PUSHBUF 0x41
>> #define DRM_NOUVEAU_GEM_CPU_PREP 0x42
>> @@ -513,6 +530,8 @@ struct drm_nouveau_svm_bind {
>> #define DRM_IOCTL_NOUVEAU_VM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct drm_nouveau_vm_init)
>> #define DRM_IOCTL_NOUVEAU_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct drm_nouveau_vm_bind)
>> #define DRM_IOCTL_NOUVEAU_EXEC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec)
>> +
>> +#define DRM_IOCTL_NOUVEAU_GET_ZCULL_INFO DRM_IOR (DRM_COMMAND_BASE + DRM_NOUVEAU_GET_ZCULL_INFO, struct drm_nouveau_get_zcull_info)
>> #if defined(__cplusplus)
>> }
>> #endif
>> --
>> 2.48.1
>>
More information about the Nouveau
mailing list