[PATCH] drm/amdgpu: simplify padding calculations (v2) (REPOST)
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Nov 26 11:54:30 UTC 2019
Am 20.11.19 um 23:29 schrieb Luben Tuikov:
> Simplify padding calculations.
>
> v2: Comment update and spacing.
>
> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 ++++++++++++-----
> 5 files changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index 82cdb8f57bfd..909c9c0b4233 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
> u32 extra_bits = vmid & 0xf;
>
> /* IB packet must end on a 8 DW boundary */
> - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
> amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 89e8c74a40f4..1c49f78b2014 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>
> /* IB packet must end on a 8 DW boundary */
> - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 011fd12c41fe..db752f2e05f9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>
> /* IB packet must end on a 8 DW boundary */
> - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d209cbf54e7d..7d8c2ec7df5b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>
> /* IB packet must end on a 8 DW boundary */
> - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 64c53eed7fac..51ebb2147ee7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
>
> - /* IB packet must end on a 8 DW boundary */
> - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + /* An IB packet must end on a 8 DW boundary--the next dword
> + * must be on a 8-dword boundary. Our IB packet below is 6
> + * dwords long, thus add x number of NOPs, such that, in
> + * modular arithmetic,
> + * wptr + 6 + x = 8k, k >= 0, which in C is,
> + * (wptr + 6 + x) % 8 = 0.
> + * The expression below, is a solution of x.
> + */
> + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1086,10 +1093,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
> }
>
> /**
> - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
> - *
> + * sdma_v5_0_ring_pad_ib - pad the IB
> * @ib: indirect buffer to fill with padding
> *
> + * Pad the IB with NOPs to a boundary multiple of 8.
> */
> static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
> {
> @@ -1097,7 +1104,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 0x7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
More information about the amd-gfx
mailing list