<div dir="ltr">On Wed, Nov 22, 2017 at 7:46 PM, Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote:<br>> On 21.11.2017 18:30, Marek Olšák wrote:<br>>><br>>> From: Marek Olšák <<a href="mailto:marek.olsak@amd.com">marek.olsak@amd.com</a>><br>>><br>>> The next commit will reduce the size even more.<br>>> ---<br>>>   src/amd/common/ac_surface.c                        |  2 +-<br>>>   src/amd/common/ac_surface.h                        |  2 +-<br>>>   src/amd/vulkan/radv_image.c                        |  8 ++++----<br>>>   src/gallium/drivers/r600/evergreen_state.c         |  8 ++++----<br>>>   src/gallium/drivers/r600/r600_state.c              |  8 ++++----<br>>>   src/gallium/drivers/r600/r600_texture.c            | 14 +++++++-------<br>>>   src/gallium/drivers/r600/radeon_uvd.c              |  2 +-<br>>>   src/gallium/drivers/radeon/r600_texture.c          | 14 +++++++-------<br>>>   src/gallium/drivers/radeon/radeon_uvd.c            |  2 +-<br>>>   src/gallium/drivers/radeonsi/cik_sdma.c            |  4 ++--<br>>>   src/gallium/drivers/radeonsi/si_dma.c              |  8 ++++----<br>>>   src/gallium/winsys/radeon/drm/radeon_drm_surface.c |  4 ++--<br>>>   12 files changed, 38 insertions(+), 38 deletions(-)<br>>><br>>> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c<br>>> index f7600a3..2b6c3fb 100644<br>>> --- a/src/amd/common/ac_surface.c<br>>> +++ b/src/amd/common/ac_surface.c<br>>> @@ -297,21 +297,21 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,<br>>>         ret = AddrComputeSurfaceInfo(addrlib,<br>>>                                      AddrSurfInfoIn,<br>>>                                      AddrSurfInfoOut);<br>>>         if (ret != ADDR_OK) {<br>>>                 return ret;<br>>>         }<br>>>         surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] :<br>>> &surf->u.legacy.level[level];<br>>>         surf_level->offset = align64(surf->surf_size,<br>>> AddrSurfInfoOut->baseAlign);<br>>> -       surf_level->slice_size = AddrSurfInfoOut->sliceSize;<br>>> +       surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;<br>>>         surf_level->nblk_x = AddrSurfInfoOut->pitch;<br>>>         surf_level->nblk_y = AddrSurfInfoOut->height;<br>>>         switch (AddrSurfInfoOut->tileMode) {<br>>>         case ADDR_TM_LINEAR_ALIGNED:<br>>>                 surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;<br>>>                 break;<br>>>         case ADDR_TM_1D_TILED_THIN1:<br>>>                 surf_level->mode = RADEON_SURF_MODE_1D;<br>>>                 break;<br>>> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h<br>>> index 1dc95cd..ec89f6b 100644<br>>> --- a/src/amd/common/ac_surface.h<br>>> +++ b/src/amd/common/ac_surface.h<br>>> @@ -64,21 +64,21 @@ enum radeon_micro_mode {<br>>>   /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */<br>>>   #define RADEON_SURF_FMASK                       (1 << 21)<br>>>   #define RADEON_SURF_DISABLE_DCC                 (1 << 22)<br>>>   #define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)<br>>>   #define RADEON_SURF_IMPORTED                    (1 << 24)<br>>>   #define RADEON_SURF_OPTIMIZE_FOR_SPACE          (1 << 25)<br>>>   #define RADEON_SURF_SHAREABLE                   (1 << 26)<br>>>     struct legacy_surf_level {<br>>>       uint64_t                    offset;<br>>> -    uint64_t                    slice_size;<br>>> +    uint32_t                    slice_size_dw; /* in dwords; max = 4GB /<br>>> 4. */<br>>>       uint32_t                    dcc_offset; /* relative offset within<br>>> DCC mip tree */<br>>>       uint32_t                    dcc_fast_clear_size;<br>>>       uint16_t                    nblk_x;<br>>>       uint16_t                    nblk_y;<br>>>       enum radeon_surf_mode       mode;<br>>>   };<br>>>     struct legacy_surf_layout {<br>>>       unsigned                    bankw:4;  /* max 8 */<br>>>       unsigned                    bankh:4;  /* max 8 */<br>>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c<br>>> index b532aa9..fb7bbde 100644<br>>> --- a/src/amd/vulkan/radv_image.c<br>>> +++ b/src/amd/vulkan/radv_image.c<br>>> @@ -1149,25 +1149,25 @@ void radv_GetImageSubresourceLayout(<br>>>         if (device->physical_device->rad_info.chip_class >= GFX9) {<br>>>                 pLayout->offset = surface->u.gfx9.offset[level] +<br>>> surface->u.gfx9.surf_slice_size * layer;<br>>>                 pLayout->rowPitch = surface->u.gfx9.surf_pitch *<br>>> surface->bpe;<br>>>                 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;<br>>>                 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;<br>>>                 pLayout->size = surface->u.gfx9.surf_slice_size;<br>>>                 if (image->type == VK_IMAGE_TYPE_3D)<br>>>                         pLayout->size *= u_minify(image->info.depth,<br>>> level);<br>>>         } else {<br>>> -               pLayout->offset = surface->u.legacy.level[level].offset +<br>>> surface->u.legacy.level[level].slice_size * layer;<br>>> +               pLayout->offset = surface->u.legacy.level[level].offset +<br>>> surface->u.legacy.level[level].slice_size_dw * 4 * layer;<br>><br>><br>> I believe the maximum slice size in bytes is (with an RGBA32 texture)<br>><br>> 16384 * 16384 * 16 = 2^14 * 2^14 * 2^4 = 2^32<br>><br>> The problem with this code is that the multiplication is now performed as<br>> uint32_t and can therefore wrap-around. So an explicit cast to 64-bits is<br>> required.<br>><br>> In practice, I guess this rather becomes an issue with smaller slice sizes<br>> but larger layer indices. We really need test case to exercise >= 4 GB<br>> textures...<br><br>This should do it:<br><br>diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h<br>index f18548f..fa17b34 100644<br>--- a/src/amd/common/ac_surface.h<br>+++ b/src/amd/common/ac_surface.h<br>@@ -71,7 +71,8 @@ enum radeon_micro_mode {<br> <br> struct legacy_surf_level {<br>     uint64_t                    offset;<br>-    uint32_t                    slice_size_dw; /* in dwords; max = 4GB / 4. */<br>+    /* Declare 32 bits of uint64_t, so that multiplication results in 64 bits. */<br>+    uint64_t                    slice_size_dw:32; /* in dwords; max = 4GB / 4. */<br>     uint32_t                    dcc_offset; /* relative offset within DCC mip tree */<br>     uint32_t                    dcc_fast_clear_size;<br>     unsigned                    nblk_x:15;<br><br>Marek<br></div>