[Intel-xe] [PATCH 4/4] drm/xe/xe2: Update MOCS fields in blitter instructions
Matt Roper
matthew.d.roper at intel.com
Fri Sep 29 16:20:07 UTC 2023
On Thu, Sep 28, 2023 at 09:43:51PM -0700, Lucas De Marchi wrote:
> From: Haridhar Kalvala <haridhar.kalvala at intel.com>
>
> Xe2 changes or adds bits for mocs in a few BLT instructions:
> XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET.
> Modify the code to deal with the new location.
We should also mention that unlike Xe1, the documented "MOCS" field is
only the MOCS index and that the PXP bit is now explicitly documented
separately.
Aside from that,
Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
>
> Bspec: 57567,57566,57565,57562
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: Haridhar Kalvala <haridhar.kalvala at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 ++
> drivers/gpu/drm/xe/xe_migrate.c | 47 +++++++++++++++--------
> 2 files changed, 34 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> index cc7b56763f10..21738281bdd0 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> @@ -45,6 +45,7 @@
> #define CCS_SIZE_MASK 0x3FF
> #define CCS_SIZE_SHIFT 8
> #define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26)
> +#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28)
> #define NUM_CCS_BYTES_PER_BLOCK 256
> #define NUM_BYTES_PER_CCS_BYTE 256
> #define NUM_CCS_BLKS_PER_XFER 1024
> @@ -53,12 +54,14 @@
> #define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19)
> #define XY_FAST_COLOR_BLT_DW 16
> #define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22)
> +#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24)
> #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>
> #define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
> #define XY_FAST_COPY_BLT_DEPTH_32 (3<<24)
> #define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31)
> #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30)
> +#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20)
>
> #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
> #define PVC_MEM_SET_CMD_LEN_DW 7
> @@ -66,6 +69,7 @@
> #define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24)
> /* Bspec lists field as [6:0], but index alone is from [6:1] */
> #define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1)
> +#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3)
>
> #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
>
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index b682d34bc1e5..1b4afa8c0560 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
> u64 src_ofs, bool src_is_indirect,
> u32 size)
> {
> + struct xe_device *xe = gt_to_xe(gt);
> u32 *cs = bb->cs + bb->len;
> u32 num_ccs_blks;
> - u32 mocs = gt->mocs.uc_index;
> + u32 mocs;
>
> num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
> NUM_CCS_BYTES_PER_BLOCK);
> xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
> +
> + if (GRAPHICS_VERx100(xe) >= 2000)
> + mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
> + else
> + mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
> +
> *cs++ = XY_CTRL_SURF_COPY_BLT |
> (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
> (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
> ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
> *cs++ = lower_32_bits(src_ofs);
> - *cs++ = upper_32_bits(src_ofs) |
> - FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
> + *cs++ = upper_32_bits(src_ofs) | mocs;
> *cs++ = lower_32_bits(dst_ofs);
> - *cs++ = upper_32_bits(dst_ofs) |
> - FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
> + *cs++ = upper_32_bits(dst_ofs) | mocs;
>
> bb->len = cs - bb->cs;
> }
> @@ -544,24 +549,26 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
> unsigned int pitch)
> {
> struct xe_device *xe = gt_to_xe(gt);
> + u32 mocs = 0;
> + u32 tile_y = 0;
>
> xe_gt_assert(gt, size / pitch <= S16_MAX);
> xe_gt_assert(gt, pitch / 4 <= S16_MAX);
> xe_gt_assert(gt, pitch <= U16_MAX);
>
> + if (GRAPHICS_VERx100(xe) >= 2000) {
> + mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
> + tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
> + }
> +
> bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
> - if (GRAPHICS_VER(xe) >= 20)
> - bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch |
> - XY_FAST_COPY_BLT_D1_SRC_TILE4 |
> - XY_FAST_COPY_BLT_D1_DST_TILE4;
> - else
> - bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
> + bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
> bb->cs[bb->len++] = 0;
> bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
> bb->cs[bb->len++] = lower_32_bits(dst_ofs);
> bb->cs[bb->len++] = upper_32_bits(dst_ofs);
> bb->cs[bb->len++] = 0;
> - bb->cs[bb->len++] = pitch;
> + bb->cs[bb->len++] = pitch | mocs;
> bb->cs[bb->len++] = lower_32_bits(src_ofs);
> bb->cs[bb->len++] = upper_32_bits(src_ofs);
> }
> @@ -812,8 +819,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
> static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> u32 size, u32 pitch)
> {
> + struct xe_device *xe = gt_to_xe(gt);
> u32 *cs = bb->cs + bb->len;
> - u32 mocs = gt->mocs.uc_index;
> u32 len = PVC_MEM_SET_CMD_LEN_DW;
>
> *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
> @@ -822,7 +829,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
> *cs++ = pitch - 1;
> *cs++ = lower_32_bits(src_ofs);
> *cs++ = upper_32_bits(src_ofs);
> - *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs);
> + if (GRAPHICS_VERx100(xe) >= 2000)
> + *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
> + else
> + *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
>
> xe_gt_assert(gt, cs - bb->cs == len + bb->len);
>
> @@ -835,15 +845,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
> struct xe_device *xe = gt_to_xe(gt);
> u32 *cs = bb->cs + bb->len;
> u32 len = XY_FAST_COLOR_BLT_DW;
> - u32 mocs = gt->mocs.uc_index;
>
> if (GRAPHICS_VERx100(xe) < 1250)
> len = 11;
>
> *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
> (len - 2);
> - *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
> - (pitch - 1);
> + if (GRAPHICS_VERx100(xe) >= 2000)
> + *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
> + (pitch - 1);
> + else
> + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
> + (pitch - 1);
> *cs++ = 0;
> *cs++ = (size / pitch) << 16 | pitch / 4;
> *cs++ = lower_32_bits(src_ofs);
> --
> 2.40.1
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list