[Intel-xe] [PATCH v4 1/2] drm/xe: Skip XY_FAST_COLOR instruction on link copy engines
Balasubramani Vivekanandan
balasubramani.vivekanandan at intel.com
Fri Mar 17 15:31:23 UTC 2023
On 16.03.2023 17:47, Lucas De Marchi wrote:
> On Wed, Mar 15, 2023 at 01:44:59PM +0530, Balasubramani Vivekanandan wrote:
> > Link copy engines doesn't support the XY_FAST_COLOR instruction.
> > Currently this instruction is used only at one place to clear a ttm
> > resource while migrating a BO.
> > A new device_info member is created to know if a platform has link copy
> > engine. If it supports, then instead of using XY_FAST_COLOR instruction,
> > MEM_SET is used which is available both in main and link copy engines.
> >
> > Since MEM_SET only supports setting 8-bit value, to keep things simple
> > emit_clear funciton is restricted only to clear memory address range
> > when platform has link copy engine.
> >
> > BSpec: 68433
> >
> > Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan at intel.com>
> > ---
> > drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 9 ++++
> > drivers/gpu/drm/xe/xe_device_types.h | 2 +
> > drivers/gpu/drm/xe/xe_migrate.c | 66 +++++++++++++++++++++--
> > drivers/gpu/drm/xe/xe_pci.c | 4 ++
> > 4 files changed, 76 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > index 288576035ce3..e60372a82723 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > @@ -6,6 +6,8 @@
> > #ifndef _XE_GPU_COMMANDS_H_
> > #define _XE_GPU_COMMANDS_H_
> >
> > +#include "regs/xe_reg_defs.h"
> > +
> > #define INSTR_CLIENT_SHIFT 29
> > #define INSTR_MI_CLIENT 0x0
> > #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
> > @@ -56,6 +58,13 @@
> > #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
> > #define BLT_DEPTH_32 (3<<24)
> >
> > +#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
> > +#define PVC_MEM_SET_CMD_LEN_DW 7
> > +#define PVC_MS_MATRIX REG_BIT(17)
> > +#define PVC_MS_DATA_FIELD GENMASK(31, 24)
> > +/* Bspec lists field as [6:0], but index alone is from [6:1] */
> > +#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1)
> > +
> > #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> > #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28)
> > #define PIPE_CONTROL_AMFS_FLUSH (1<<25)
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > index 034e0956f4ea..46cf37224090 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -95,6 +95,8 @@ struct xe_device {
> > bool has_4tile;
> > /** @has_range_tlb_invalidation: Has range based TLB invalidations */
> > bool has_range_tlb_invalidation;
> > + /** @has_link_copy_engines: Whether the platform has link copy engines */
> > + bool has_link_copy_engine;
> > /** @enable_display: display enabled */
> > bool enable_display;
> >
> > diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> > index c0523d8fe944..44a4712f8c83 100644
> > --- a/drivers/gpu/drm/xe/xe_migrate.c
> > +++ b/drivers/gpu/drm/xe/xe_migrate.c
> > @@ -746,14 +746,37 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
> > return fence;
> > }
> >
> > -static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> > - u32 size, u32 pitch, u32 value, bool is_vram)
> > +static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> > + u32 size, u32 pitch)
> > +{
> > + u32 *cs = bb->cs + bb->len;
> > + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
> > + u32 len = PVC_MEM_SET_CMD_LEN_DW;
> > +
> > + *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
> > + *cs++ = pitch - 1;
> > + *cs++ = (size / pitch) - 1;
> > + *cs++ = pitch - 1;
> > + *cs++ = lower_32_bits(src_ofs);
> > + *cs++ = upper_32_bits(src_ofs);
> > + *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
> > +
> > + XE_BUG_ON(cs - bb->cs != len + bb->len);
> > +
> > + bb->len += len;
> > +}
> > +
> > +static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
> > + u64 src_ofs, u32 size, u32 pitch, u32 value,
> > + bool is_vram)
> > {
> > u32 *cs = bb->cs + bb->len;
> > - u32 len = XY_FAST_COLOR_BLT_DW;
> > + u32 len;
> > u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
> > + struct xe_device *xe = gt_to_xe(gt);
> >
> > - if (GRAPHICS_VERx100(gt->xe) < 1250)
> > + len = XY_FAST_COLOR_BLT_DW;
> > + if (GRAPHICS_VERx100(xe) < 1250)
> > len = 11;
> >
> > *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
> > @@ -779,7 +802,39 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> > }
> >
> > XE_BUG_ON(cs - bb->cs != len + bb->len);
> > +
> > bb->len += len;
> > +}
> > +
> > +static u32 emit_clear_cmd_len(struct xe_device *xe)
> > +{
> > + if (xe->info.has_link_copy_engine)
> > + return PVC_MEM_SET_CMD_LEN_DW;
> > + else
> > + return XY_FAST_COLOR_BLT_DW;
> > +}
> > +
> > +static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> > + u32 size, u32 pitch, u32 value, bool is_vram)
> > +{
> > + struct xe_device *xe = gt_to_xe(gt);
> > +
> > + if (xe->info.has_link_copy_engine) {
> > + /* MEM_SET command supports setting only 8-bit value. So it
> > + * can't be used to set a 32-bit value. And emit_clear function
> > + * is currently used only to clear the address range. Instead
> > + * of limiting value argument to a 8-bit range, for platforms
> > + * with link copy engines, lets not accept any value to set and
> > + * use emit_clear to only clear the memory range.
> > + */
> > + XE_WARN_ON(value);
>
> since this patch comes first, this would create a warning on
> uses of emit_clear() not changed in this patch.
>
> Also, this is further exported by xe_migrate_clear(), which received
> value as a paramenter. By doing what you are doing here you basically
> require every caller of that function to know about the limitation.
>
> Humn... options:
>
> 1) Would it be possible to make sure the migrate clear is executed on
> the main copy engine? I'm not sure there will be contention on using
> that engine, but I think in i915 it was a possible option
>
> 2) Remove the value from xe_migrate_clear() and adjust the callers.
> Document that this is being cleared with 0.
>
> 2b) if there is a need outside of test (why would we test
> something that we are not making use of?), then add another
> function like xe_migrate_clear_with_val() that may be
> implemented differently for PVC or that just returns
> -ENOTSUPP. In which case it shoudl be documented that not
> all platforms support it.
Thanks Lucas. I have reworked the patch as per the option 2. I will send
a new version for review.
Regards,
Bala
>
>
> Lucas De Marchi
>
> > + emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
> > +
> > + } else {
> > + emit_clear_main_copy(gt, bb, src_ofs, size, pitch, value,
> > + is_vram);
> > + }
> > +
> >
> > return 0;
> > }
> > @@ -836,7 +891,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> > batch_size = 2 +
> > pte_update_size(m, clear_vram, &src_it,
> > &clear_L0, &clear_L0_ofs, &clear_L0_pt,
> > - XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
> > + emit_clear_cmd_len(xe), 0,
> > + NUM_PT_PER_BLIT);
> > if (xe_device_has_flat_ccs(xe) && clear_vram)
> > batch_size += EMIT_COPY_CCS_DW;
> >
> > diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> > index c4d9fd2e7b2b..e555f13395ab 100644
> > --- a/drivers/gpu/drm/xe/xe_pci.c
> > +++ b/drivers/gpu/drm/xe/xe_pci.c
> > @@ -72,6 +72,8 @@ struct xe_device_desc {
> > bool has_4tile;
> > bool has_range_tlb_invalidation;
> > bool has_asid;
> > +
> > + bool has_link_copy_engine;
> > };
> >
> > __diag_push();
> > @@ -224,6 +226,7 @@ static const struct xe_device_desc pvc_desc = {
> > .vm_max_level = 4,
> > .supports_usm = true,
> > .has_asid = true,
> > + .has_link_copy_engine = true,
> > };
> >
> > #define MTL_MEDIA_ENGINES \
> > @@ -413,6 +416,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> > xe->info.has_flat_ccs = desc->has_flat_ccs;
> > xe->info.has_4tile = desc->has_4tile;
> > xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
> > + xe->info.has_link_copy_engine = desc->has_link_copy_engine;
> >
> > spd = subplatform_get(xe, desc);
> > xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
> > --
> > 2.25.1
> >
More information about the Intel-xe
mailing list