[Mesa-dev] [PATCH 2/9] ac: change legacy_surf_level::slice_size to dword units
Nicolai Hähnle
nhaehnle at gmail.com
Thu Nov 23 08:18:15 UTC 2017
On 22.11.2017 22:33, Marek Olšák wrote:
> On Wed, Nov 22, 2017 at 7:46 PM, Nicolai Hähnle <nhaehnle at gmail.com
> <mailto:nhaehnle at gmail.com>> wrote:
> > On 21.11.2017 18:30, Marek Olšák wrote:
> >>
> >> From: Marek Olšák <marek.olsak at amd.com <mailto:marek.olsak at amd.com>>
> >>
> >> The next commit will reduce the size even more.
> >> ---
> >> src/amd/common/ac_surface.c | 2 +-
> >> src/amd/common/ac_surface.h | 2 +-
> >> src/amd/vulkan/radv_image.c | 8 ++++----
> >> src/gallium/drivers/r600/evergreen_state.c | 8 ++++----
> >> src/gallium/drivers/r600/r600_state.c | 8 ++++----
> >> src/gallium/drivers/r600/r600_texture.c | 14 +++++++-------
> >> src/gallium/drivers/r600/radeon_uvd.c | 2 +-
> >> src/gallium/drivers/radeon/r600_texture.c | 14 +++++++-------
> >> src/gallium/drivers/radeon/radeon_uvd.c | 2 +-
> >> src/gallium/drivers/radeonsi/cik_sdma.c | 4 ++--
> >> src/gallium/drivers/radeonsi/si_dma.c | 8 ++++----
> >> src/gallium/winsys/radeon/drm/radeon_drm_surface.c | 4 ++--
> >> 12 files changed, 38 insertions(+), 38 deletions(-)
> >>
> >> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> >> index f7600a3..2b6c3fb 100644
> >> --- a/src/amd/common/ac_surface.c
> >> +++ b/src/amd/common/ac_surface.c
> >> @@ -297,21 +297,21 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
> >> ret = AddrComputeSurfaceInfo(addrlib,
> >> AddrSurfInfoIn,
> >> AddrSurfInfoOut);
> >> if (ret != ADDR_OK) {
> >> return ret;
> >> }
> >> surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] :
> >> &surf->u.legacy.level[level];
> >> surf_level->offset = align64(surf->surf_size,
> >> AddrSurfInfoOut->baseAlign);
> >> - surf_level->slice_size = AddrSurfInfoOut->sliceSize;
> >> + surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
> >> surf_level->nblk_x = AddrSurfInfoOut->pitch;
> >> surf_level->nblk_y = AddrSurfInfoOut->height;
> >> switch (AddrSurfInfoOut->tileMode) {
> >> case ADDR_TM_LINEAR_ALIGNED:
> >> surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
> >> break;
> >> case ADDR_TM_1D_TILED_THIN1:
> >> surf_level->mode = RADEON_SURF_MODE_1D;
> >> break;
> >> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
> >> index 1dc95cd..ec89f6b 100644
> >> --- a/src/amd/common/ac_surface.h
> >> +++ b/src/amd/common/ac_surface.h
> >> @@ -64,21 +64,21 @@ enum radeon_micro_mode {
> >> /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
> >> #define RADEON_SURF_FMASK (1 << 21)
> >> #define RADEON_SURF_DISABLE_DCC (1 << 22)
> >> #define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
> >> #define RADEON_SURF_IMPORTED (1 << 24)
> >> #define RADEON_SURF_OPTIMIZE_FOR_SPACE (1 << 25)
> >> #define RADEON_SURF_SHAREABLE (1 << 26)
> >> struct legacy_surf_level {
> >> uint64_t offset;
> >> - uint64_t slice_size;
> >> + uint32_t slice_size_dw; /* in dwords; max =
> 4GB /
> >> 4. */
> >> uint32_t dcc_offset; /* relative offset within
> >> DCC mip tree */
> >> uint32_t dcc_fast_clear_size;
> >> uint16_t nblk_x;
> >> uint16_t nblk_y;
> >> enum radeon_surf_mode mode;
> >> };
> >> struct legacy_surf_layout {
> >> unsigned bankw:4; /* max 8 */
> >> unsigned bankh:4; /* max 8 */
> >> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> >> index b532aa9..fb7bbde 100644
> >> --- a/src/amd/vulkan/radv_image.c
> >> +++ b/src/amd/vulkan/radv_image.c
> >> @@ -1149,25 +1149,25 @@ void radv_GetImageSubresourceLayout(
> >> if (device->physical_device->rad_info.chip_class >= GFX9) {
> >> pLayout->offset = surface->u.gfx9.offset[level] +
> >> surface->u.gfx9.surf_slice_size * layer;
> >> pLayout->rowPitch = surface->u.gfx9.surf_pitch *
> >> surface->bpe;
> >> pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
> >> pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
> >> pLayout->size = surface->u.gfx9.surf_slice_size;
> >> if (image->type == VK_IMAGE_TYPE_3D)
> >> pLayout->size *= u_minify(image->info.depth,
> >> level);
> >> } else {
> >> - pLayout->offset =
> surface->u.legacy.level[level].offset +
> >> surface->u.legacy.level[level].slice_size * layer;
> >> + pLayout->offset =
> surface->u.legacy.level[level].offset +
> >> surface->u.legacy.level[level].slice_size_dw * 4 * layer;
> >
> >
> > I believe the maximum slice size in bytes is (with an RGBA32 texture)
> >
> > 16384 * 16384 * 16 = 2^14 * 2^14 * 2^4 = 2^32
> >
> > The problem with this code is that the multiplication is now performed as
> > uint32_t and can therefore wrap-around. So an explicit cast to 64-bits is
> > required.
> >
> > In practice, I guess this rather becomes an issue with smaller slice
> sizes
> > but larger layer indices. We really need test case to exercise >= 4 GB
> > textures...
>
> This should do it:
>
> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
> index f18548f..fa17b34 100644
> --- a/src/amd/common/ac_surface.h
> +++ b/src/amd/common/ac_surface.h
> @@ -71,7 +71,8 @@ enum radeon_micro_mode {
>
> struct legacy_surf_level {
> uint64_t offset;
> - uint32_t slice_size_dw; /* in dwords; max = 4GB
> / 4. */
> + /* Declare 32 bits of uint64_t, so that multiplication results in
> 64 bits. */
> + uint64_t slice_size_dw:32; /* in dwords; max =
> 4GB / 4. */
> uint32_t dcc_offset; /* relative offset within
> DCC mip tree */
> uint32_t dcc_fast_clear_size;
> unsigned nblk_x:15;
Congratulations, you found a compiler bug! :)
I like this idea very much; however:
nha at capella:~/tmp$ cat foo.c
#include <stdint.h>
struct s {
uint64_t foo:32;
};
uint64_t foo(uint32_t x, struct s* p)
{
return x * p->foo;
}
uint64_t foo2(uint32_t x, struct s* p)
{
return x * (uint64_t)p->foo;
}
nha at capella:~/tmp$ gcc -Wall -Wextra -O2 -S foo.c
nha at capella:~/tmp$ cat foo.s
.file "foo.c"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
movl %edi, %eax
imull (%rsi), %eax
ret
.cfi_endproc
.LFE0:
.size foo, .-foo
.p2align 4,,15
.globl foo2
.type foo2, @function
foo2:
.LFB1:
.cfi_startproc
movl (%rsi), %eax
movl %edi, %edi
imulq %rdi, %rax
ret
.cfi_endproc
.LFE1:
.size foo2, .-foo2
.ident "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406"
.section .note.GNU-stack,"", at progbits
This is with gcc-7.2.
Some more observations:
- Clang 5.0 produces the same code
- gcc produces entirely ridiculous code when the bitfield has 33 or more
bits (it ends up producing what is essentially a 33-bit wide multiplication)
- Clang actually produces the expected code when the bitfield has 33 or
more bits
- gcc produces the expected code when compiling it as C++
Could you please file bugs against both gcc and clang for this?
I mean, it's entirely possible that the C standard is crazy enough that
it says this behavior is technically correct -- though I doubt it given
the behavior for 33 bits and the inconsistency between the compilers for
C++. In any case, *if* treating the value as less than 64 bits is
correct, they should really produce a warning since the resulting code
is dangerously different from what you'd expect.
Cheers,
Nicolai
>
> Marek
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list