[PATCH 2/3] drm/xe: Update xe_migrate_vram to support compression

Matthew Brost matthew.brost at intel.com
Tue Jul 15 15:58:47 UTC 2025


On Tue, Jul 15, 2025 at 09:49:01AM +0100, Matthew Auld wrote:
> On 15/07/2025 09:37, Matthew Auld wrote:
> > On 14/07/2025 18:33, Matthew Brost wrote:
> > > While SVM does not currently support compression, other users of
> > > xe_migrate_vram (e.g., devcoredump) expect the data to be read back
> > > uncompressed. Update xe_migrate_vram to support compressed data.
> > > 
> > > Cc: stable at vger.kernel.org
> > > Fixes: 9c44fd5f6e8a ("drm/xe: Add migrate layer functions for SVM
> > > support")
> > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > ---
> > >   drivers/gpu/drm/xe/xe_migrate.c | 31 ++++++++++++++++++++++++-------
> > >   1 file changed, 24 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/
> > > xe_migrate.c
> > > index ba1cff2e4cda..936daa2b363d 100644
> > > --- a/drivers/gpu/drm/xe/xe_migrate.c
> > > +++ b/drivers/gpu/drm/xe/xe_migrate.c
> > > @@ -1613,7 +1613,8 @@ static struct dma_fence
> > > *xe_migrate_vram(struct xe_migrate *m,
> > >                        unsigned long len,
> > >                        unsigned long sram_offset,
> > >                        dma_addr_t *sram_addr, u64 vram_addr,
> > > -                     const enum xe_migrate_copy_dir dir)
> > > +                     const enum xe_migrate_copy_dir dir,
> > > +                     bool needs_ccs_emit)
> > >   {
> > >       struct xe_gt *gt = m->tile->primary_gt;
> > >       struct xe_device *xe = gt_to_xe(gt);
> > > @@ -1623,10 +1624,12 @@ static struct dma_fence
> > > *xe_migrate_vram(struct xe_migrate *m,
> > >       u64 src_L0_ofs, dst_L0_ofs;
> > >       struct xe_sched_job *job;
> > >       struct xe_bb *bb;
> > > -    u32 update_idx, pt_slot = 0;
> > > +    u32 update_idx, pt_slot = 0, flush_flags = 0;
> > >       unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE);
> > >       unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ?
> > >           PAGE_SIZE : 4;
> > > +    bool use_comp_pat = xe_device_has_flat_ccs(xe) &&
> > > +        GRAPHICS_VER(xe) >= 20 && dir == XE_MIGRATE_COPY_TO_SRAM;
> > >       int err;
> > >       if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) ||
> > > @@ -1637,6 +1640,8 @@ static struct dma_fence
> > > *xe_migrate_vram(struct xe_migrate *m,
> > >       batch_size += pte_update_cmd_size(len);
> > >       batch_size += EMIT_COPY_DW;
> > > +    if (needs_ccs_emit)
> > > +        batch_size += EMIT_COPY_CCS_DW;
> > >       bb = xe_bb_new(gt, batch_size, use_usm_batch);
> > >       if (IS_ERR(bb)) {
> > > @@ -1652,7 +1657,7 @@ static struct dma_fence
> > > *xe_migrate_vram(struct xe_migrate *m,
> > >           dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
> > >       } else {
> > > -        src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
> > > +        src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, use_comp_pat);
> > >           dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
> > >       }
> > > @@ -1661,6 +1666,17 @@ static struct dma_fence
> > > *xe_migrate_vram(struct xe_migrate *m,
> > >       emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, len, pitch);
> > > +    if (needs_ccs_emit) {
> > > +        if (dir == XE_MIGRATE_COPY_TO_VRAM)
> > > +            flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
> > > +                              false, dst_L0_ofs,
> > > +                              true, len, 0, true);
> > > +        else
> > > +            flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
> > > +                              true, dst_L0_ofs,
> > > +                              false, len, 0, true);
> > > +    }
> > 
> > I think we can drop this and anything related to needs_ccs_emit.  In
> > theory we should only need the use_comp_pat change. IIUC this path is
> > VRAM only, and only xe2+ can be decompressed in the KMD (dg2 is no-go),
> > and for that we don't manage the raw CCS state.
> 
> So xe_migrate_ccs_copy() is not actually decompressing anything, but rather
> copying the raw CCS state around, which I think we use for manual
> save/restore during swap and things like that to preserve the compression
> state.
> 
> I think we do also use xe_migrate_ccs_copy() to intitially clear the CCS
> state, but I think that has already happened somewhere else? Or do we need

Yes, the clear has already happened on BO allocation (or free when that part merges).

> it here, if we say go from tt -> vram on platforms like dg2?
> 

This copy to user path readable path either in devcoreeump, or SVM page fault.

I agree, this part can be dropped.

Matt

> > 
> > > +
> > >       job = xe_bb_create_migration_job(m->q, bb,
> > >                        xe_migrate_batch_base(m, use_usm_batch),
> > >                        update_idx);
> > > @@ -1669,7 +1685,7 @@ static struct dma_fence
> > > *xe_migrate_vram(struct xe_migrate *m,
> > >           goto err;
> > >       }
> > > -    xe_sched_job_add_migrate_flush(job, 0);
> > > +    xe_sched_job_add_migrate_flush(job, flush_flags);
> > >       mutex_lock(&m->job_mutex);
> > >       xe_sched_job_arm(job);
> > > @@ -1708,7 +1724,7 @@ struct dma_fence *xe_migrate_to_vram(struct
> > > xe_migrate *m,
> > >                        u64 dst_addr)
> > >   {
> > >       return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr,
> > > dst_addr,
> > > -                   XE_MIGRATE_COPY_TO_VRAM);
> > > +                   XE_MIGRATE_COPY_TO_VRAM, false);
> > >   }
> > >   /**
> > > @@ -1729,7 +1745,7 @@ struct dma_fence *xe_migrate_from_vram(struct
> > > xe_migrate *m,
> > >                          dma_addr_t *dst_addr)
> > >   {
> > >       return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr,
> > > src_addr,
> > > -                   XE_MIGRATE_COPY_TO_SRAM);
> > > +                   XE_MIGRATE_COPY_TO_SRAM, false);
> > >   }
> > >   static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t
> > > *dma_addr,
> > > @@ -1890,7 +1906,8 @@ int xe_migrate_access_memory(struct xe_migrate
> > > *m, struct xe_bo *bo,
> > >                         dma_addr + current_page,
> > >                         vram_addr, write ?
> > >                         XE_MIGRATE_COPY_TO_VRAM :
> > > -                      XE_MIGRATE_COPY_TO_SRAM);
> > > +                      XE_MIGRATE_COPY_TO_SRAM,
> > > +                      xe_bo_needs_ccs_pages(bo));
> > >           if (IS_ERR(__fence)) {
> > >               if (fence)
> > >                   dma_fence_wait(fence, false);
> > 
> 


More information about the Intel-xe mailing list