[PATCH] drm/radeon: use IBs for VM page table updates v2

Alex Deucher alexdeucher at gmail.com
Fri Feb 1 08:58:05 PST 2013


On Fri, Feb 1, 2013 at 11:32 AM, Christian König
<deathsimple at vodafone.de> wrote:
> From: Alex Deucher <alexander.deucher at amd.com>
>
> For very large page table updates, we can exceed the
> size of the ring.  To avoid this, use an IB to perform
> the page table update.
>
> v2(ck): cleanup the IB infrastructure and the use it instead
>         of filling the struct ourself.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Christian König <christian.koenig at amd.com>

Looks good!  thanks for fixing that up.  I'll apply it to my WIP 3.9
tree and test on cayman.

Alex

> ---
>  drivers/gpu/drm/radeon/ni.c          |   30 +++++++++--------
>  drivers/gpu/drm/radeon/radeon.h      |    7 ++--
>  drivers/gpu/drm/radeon/radeon_asic.h |    8 +++--
>  drivers/gpu/drm/radeon/radeon_cs.c   |   19 +++--------
>  drivers/gpu/drm/radeon/radeon_gart.c |   60 ++++++++++++++--------------------
>  drivers/gpu/drm/radeon/radeon_ring.c |   19 +++++++++++
>  drivers/gpu/drm/radeon/si.c          |   52 +++++++++++++++--------------
>  7 files changed, 103 insertions(+), 92 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
> index 170bd03..7cead76 100644
> --- a/drivers/gpu/drm/radeon/ni.c
> +++ b/drivers/gpu/drm/radeon/ni.c
> @@ -1946,19 +1946,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
>   * cayman_vm_set_page - update the page tables using the CP
>   *
>   * @rdev: radeon_device pointer
> + * @ib: indirect buffer to fill with commands
>   * @pe: addr of the page entry
>   * @addr: dst addr to write into pe
>   * @count: number of page entries to update
>   * @incr: increase next addr by incr bytes
>   * @flags: access flags
>   *
> - * Update the page tables using the CP (cayman-si).
> + * Update the page tables using the CP (cayman/TN).
>   */
> -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
> +void cayman_vm_set_page(struct radeon_device *rdev,
> +                       struct radeon_ib *ib,
> +                       uint64_t pe,
>                         uint64_t addr, unsigned count,
>                         uint32_t incr, uint32_t flags)
>  {
> -       struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
>         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
>         uint64_t value;
>         unsigned ndw;
> @@ -1969,9 +1971,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                         if (ndw > 0x3FFF)
>                                 ndw = 0x3FFF;
>
> -                       radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
> -                       radeon_ring_write(ring, pe);
> -                       radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
> +                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
> +                       ib->ptr[ib->length_dw++] = pe;
> +                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
>                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
>                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
>                                         value = radeon_vm_map_gart(rdev, addr);
> @@ -1983,8 +1985,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                 }
>                                 addr += incr;
>                                 value |= r600_flags;
> -                               radeon_ring_write(ring, value);
> -                               radeon_ring_write(ring, upper_32_bits(value));
> +                               ib->ptr[ib->length_dw++] = value;
> +                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
>                         }
>                 }
>         } else {
> @@ -1994,9 +1996,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                 ndw = 0xFFFFE;
>
>                         /* for non-physically contiguous pages (system) */
> -                       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
> -                       radeon_ring_write(ring, pe);
> -                       radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
> +                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
> +                       ib->ptr[ib->length_dw++] = pe;
> +                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
>                         for (; ndw > 0; ndw -= 2, --count, pe += 8) {
>                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
>                                         value = radeon_vm_map_gart(rdev, addr);
> @@ -2008,10 +2010,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                 }
>                                 addr += incr;
>                                 value |= r600_flags;
> -                               radeon_ring_write(ring, value);
> -                               radeon_ring_write(ring, upper_32_bits(value));
> +                               ib->ptr[ib->length_dw++] = value;
> +                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
>                         }
>                 }
> +               while (ib->length_dw & 0x7)
> +                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
>         }
>  }
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 6539d6c..307d681 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -780,6 +780,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
>                   struct radeon_ib *ib, struct radeon_vm *vm,
>                   unsigned size);
>  void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
> +void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence);
>  int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
>                        struct radeon_ib *const_ib);
>  int radeon_ib_pool_init(struct radeon_device *rdev);
> @@ -1188,7 +1189,9 @@ struct radeon_asic {
>                 void (*fini)(struct radeon_device *rdev);
>
>                 u32 pt_ring_index;
> -               void (*set_page)(struct radeon_device *rdev, uint64_t pe,
> +               void (*set_page)(struct radeon_device *rdev,
> +                                struct radeon_ib *ib,
> +                                uint64_t pe,
>                                  uint64_t addr, unsigned count,
>                                  uint32_t incr, uint32_t flags);
>         } vm;
> @@ -1810,7 +1813,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
>  #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
>  #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
>  #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
> -#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags)))
> +#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
>  #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
>  #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
>  #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
> index e429e25..f4134a8 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.h
> +++ b/drivers/gpu/drm/radeon/radeon_asic.h
> @@ -474,7 +474,9 @@ int cayman_vm_init(struct radeon_device *rdev);
>  void cayman_vm_fini(struct radeon_device *rdev);
>  void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
>  uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
> -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
> +void cayman_vm_set_page(struct radeon_device *rdev,
> +                       struct radeon_ib *ib,
> +                       uint64_t pe,
>                         uint64_t addr, unsigned count,
>                         uint32_t incr, uint32_t flags);
>  int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
> @@ -506,7 +508,9 @@ int si_irq_set(struct radeon_device *rdev);
>  int si_irq_process(struct radeon_device *rdev);
>  int si_vm_init(struct radeon_device *rdev);
>  void si_vm_fini(struct radeon_device *rdev);
> -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
> +void si_vm_set_page(struct radeon_device *rdev,
> +                   struct radeon_ib *ib,
> +                   uint64_t pe,
>                     uint64_t addr, unsigned count,
>                     uint32_t incr, uint32_t flags);
>  void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 1d214b6..70d3824 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -125,18 +125,6 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
>         return 0;
>  }
>
> -static void radeon_cs_sync_to(struct radeon_cs_parser *p,
> -                             struct radeon_fence *fence)
> -{
> -       struct radeon_fence *other;
> -
> -       if (!fence)
> -               return;
> -
> -       other = p->ib.sync_to[fence->ring];
> -       p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other);
> -}
> -
>  static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
>  {
>         int i;
> @@ -145,7 +133,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
>                 if (!p->relocs[i].robj)
>                         continue;
>
> -               radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj);
> +               radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj);
>         }
>  }
>
> @@ -472,8 +460,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
>                 goto out;
>         }
>         radeon_cs_sync_rings(parser);
> -       radeon_cs_sync_to(parser, vm->fence);
> -       radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
> +       radeon_ib_sync_to(&parser->ib, vm->fence);
> +       radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id(
> +               rdev, vm, parser->ring));
>
>         if ((rdev->family >= CHIP_TAHITI) &&
>             (parser->chunk_const_ib_idx != -1)) {
> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
> index 6e24f84..2c1341f 100644
> --- a/drivers/gpu/drm/radeon/radeon_gart.c
> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
> @@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
>   */
>  static int radeon_vm_update_pdes(struct radeon_device *rdev,
>                                  struct radeon_vm *vm,
> +                                struct radeon_ib *ib,
>                                  uint64_t start, uint64_t end)
>  {
>         static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
> @@ -971,7 +972,7 @@ retry:
>                     ((last_pt + incr * count) != pt)) {
>
>                         if (count) {
> -                               radeon_asic_vm_set_page(rdev, last_pde,
> +                               radeon_asic_vm_set_page(rdev, ib, last_pde,
>                                                         last_pt, count, incr,
>                                                         RADEON_VM_PAGE_VALID);
>                         }
> @@ -985,7 +986,7 @@ retry:
>         }
>
>         if (count) {
> -               radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
> +               radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
>                                         incr, RADEON_VM_PAGE_VALID);
>
>         }
> @@ -1009,6 +1010,7 @@ retry:
>   */
>  static void radeon_vm_update_ptes(struct radeon_device *rdev,
>                                   struct radeon_vm *vm,
> +                                 struct radeon_ib *ib,
>                                   uint64_t start, uint64_t end,
>                                   uint64_t dst, uint32_t flags)
>  {
> @@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
>                 if ((last_pte + 8 * count) != pte) {
>
>                         if (count) {
> -                               radeon_asic_vm_set_page(rdev, last_pte,
> +                               radeon_asic_vm_set_page(rdev, ib, last_pte,
>                                                         last_dst, count,
>                                                         RADEON_GPU_PAGE_SIZE,
>                                                         flags);
> @@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
>         }
>
>         if (count) {
> -               radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
> +               radeon_asic_vm_set_page(rdev, ib, last_pte,
> +                                       last_dst, count,
>                                         RADEON_GPU_PAGE_SIZE, flags);
>         }
>  }
> @@ -1080,8 +1083,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
>                             struct ttm_mem_reg *mem)
>  {
>         unsigned ridx = rdev->asic->vm.pt_ring_index;
> -       struct radeon_ring *ring = &rdev->ring[ridx];
> -       struct radeon_semaphore *sem = NULL;
> +       struct radeon_ib ib;
>         struct radeon_bo_va *bo_va;
>         unsigned nptes, npdes, ndw;
>         uint64_t addr;
> @@ -1124,25 +1126,13 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
>                 bo_va->valid = false;
>         }
>
> -       if (vm->fence && radeon_fence_signaled(vm->fence)) {
> -               radeon_fence_unref(&vm->fence);
> -       }
> -
> -       if (vm->fence && vm->fence->ring != ridx) {
> -               r = radeon_semaphore_create(rdev, &sem);
> -               if (r) {
> -                       return r;
> -               }
> -       }
> -
>         nptes = radeon_bo_ngpu_pages(bo);
>
>         /* assume two extra pdes in case the mapping overlaps the borders */
>         npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
>
> -       /* estimate number of dw needed */
> -       /* semaphore, fence and padding */
> -       ndw = 32;
> +       /* padding, etc. */
> +       ndw = 64;
>
>         if (RADEON_VM_BLOCK_SIZE > 11)
>                 /* reserve space for one header for every 2k dwords */
> @@ -1161,33 +1151,31 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
>         /* reserve space for pde addresses */
>         ndw += npdes * 2;
>
> -       r = radeon_ring_lock(rdev, ring, ndw);
> -       if (r) {
> -               return r;
> -       }
> +       /* update too big for an IB */
> +       if (ndw > 0xfffff)
> +               return -ENOMEM;
>
> -       if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
> -               radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
> -               radeon_fence_note_sync(vm->fence, ridx);
> -       }
> +       r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4);
> +       ib.length_dw = 0;
>
> -       r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
> +       r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
>         if (r) {
> -               radeon_ring_unlock_undo(rdev, ring);
> +               radeon_ib_free(rdev, &ib);
>                 return r;
>         }
>
> -       radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
> +       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
>                               addr, bo_va->flags);
>
> -       radeon_fence_unref(&vm->fence);
> -       r = radeon_fence_emit(rdev, &vm->fence, ridx);
> +       radeon_ib_sync_to(&ib, vm->fence);
> +       r = radeon_ib_schedule(rdev, &ib, NULL);
>         if (r) {
> -               radeon_ring_unlock_undo(rdev, ring);
> +               radeon_ib_free(rdev, &ib);
>                 return r;
>         }
> -       radeon_ring_unlock_commit(rdev, ring);
> -       radeon_semaphore_free(rdev, &sem, vm->fence);
> +       radeon_fence_unref(&vm->fence);
> +       vm->fence = radeon_fence_ref(ib.fence);
> +       radeon_ib_free(rdev, &ib);
>         radeon_fence_unref(&vm->last_flush);
>
>         return 0;
> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
> index cd72062..8d58e26 100644
> --- a/drivers/gpu/drm/radeon/radeon_ring.c
> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
> @@ -109,6 +109,25 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
>  }
>
>  /**
> + * radeon_ib_sync_to - sync to fence before executing the IB
> + *
> + * @ib: IB object to add fence to
> + * @fence: fence to sync to
> + *
> + * Sync to the fence before executing the IB
> + */
> +void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence)
> +{
> +       struct radeon_fence *other;
> +
> +       if (!fence)
> +               return;
> +
> +       other = ib->sync_to[fence->ring];
> +       ib->sync_to[fence->ring] = radeon_fence_later(fence, other);
> +}
> +
> +/**
>   * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
>   *
>   * @rdev: radeon_device pointer
> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
> index cd83bc5..a910cb9 100644
> --- a/drivers/gpu/drm/radeon/si.c
> +++ b/drivers/gpu/drm/radeon/si.c
> @@ -3043,19 +3043,21 @@ void si_vm_fini(struct radeon_device *rdev)
>   * si_vm_set_page - update the page tables using the CP
>   *
>   * @rdev: radeon_device pointer
> + * @ib: indirect buffer to fill with commands
>   * @pe: addr of the page entry
>   * @addr: dst addr to write into pe
>   * @count: number of page entries to update
>   * @incr: increase next addr by incr bytes
>   * @flags: access flags
>   *
> - * Update the page tables using the CP (cayman-si).
> + * Update the page tables using the CP (SI).
>   */
> -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
> +void si_vm_set_page(struct radeon_device *rdev,
> +                   struct radeon_ib *ib,
> +                   uint64_t pe,
>                     uint64_t addr, unsigned count,
>                     uint32_t incr, uint32_t flags)
>  {
> -       struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
>         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
>         uint64_t value;
>         unsigned ndw;
> @@ -3066,11 +3068,11 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                         if (ndw > 0x3FFE)
>                                 ndw = 0x3FFE;
>
> -                       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
> -                       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> -                                                WRITE_DATA_DST_SEL(1)));
> -                       radeon_ring_write(ring, pe);
> -                       radeon_ring_write(ring, upper_32_bits(pe));
> +                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
> +                       ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
> +                                       WRITE_DATA_DST_SEL(1));
> +                       ib->ptr[ib->length_dw++] = pe;
> +                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
>                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
>                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
>                                         value = radeon_vm_map_gart(rdev, addr);
> @@ -3082,8 +3084,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                 }
>                                 addr += incr;
>                                 value |= r600_flags;
> -                               radeon_ring_write(ring, value);
> -                               radeon_ring_write(ring, upper_32_bits(value));
> +                               ib->ptr[ib->length_dw++] = value;
> +                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
>                         }
>                 }
>         } else {
> @@ -3095,9 +3097,9 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                         ndw = 0xFFFFE;
>
>                                 /* for non-physically contiguous pages (system) */
> -                               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
> -                               radeon_ring_write(ring, pe);
> -                               radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
> +                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
> +                               ib->ptr[ib->length_dw++] = pe;
> +                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
>                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
>                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
>                                                 value = radeon_vm_map_gart(rdev, addr);
> @@ -3109,8 +3111,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                         }
>                                         addr += incr;
>                                         value |= r600_flags;
> -                                       radeon_ring_write(ring, value);
> -                                       radeon_ring_write(ring, upper_32_bits(value));
> +                                       ib->ptr[ib->length_dw++] = value;
> +                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
>                                 }
>                         }
>                 } else {
> @@ -3124,20 +3126,22 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
>                                 else
>                                         value = 0;
>                                 /* for physically contiguous pages (vram) */
> -                               radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
> -                               radeon_ring_write(ring, pe); /* dst addr */
> -                               radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
> -                               radeon_ring_write(ring, r600_flags); /* mask */
> -                               radeon_ring_write(ring, 0);
> -                               radeon_ring_write(ring, value); /* value */
> -                               radeon_ring_write(ring, upper_32_bits(value));
> -                               radeon_ring_write(ring, incr); /* increment size */
> -                               radeon_ring_write(ring, 0);
> +                               ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
> +                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
> +                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
> +                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
> +                               ib->ptr[ib->length_dw++] = 0;
> +                               ib->ptr[ib->length_dw++] = value; /* value */
> +                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
> +                               ib->ptr[ib->length_dw++] = incr; /* increment size */
> +                               ib->ptr[ib->length_dw++] = 0;
>                                 pe += ndw * 4;
>                                 addr += (ndw / 2) * incr;
>                                 count -= ndw / 2;
>                         }
>                 }
> +               while (ib->length_dw & 0x7)
> +                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
>         }
>  }
>
> --
> 1.7.9.5
>


More information about the dri-devel mailing list