[Mesa-stable] [Mesa-dev] [PATCH] anv: Use separate MOCS settings for external BOs
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Wed Oct 3 13:47:46 UTC 2018
On 02/10/2018 23:11, Jason Ekstrand wrote:
> On Broadwell and above, we have to use different MOCS settings to allow
> the kernel to take over and disable caching when needed for external
> buffers. On Broadwell, this is especially important because the kernel
> can't disable eLLC so we have to do it in userspace. We very badly
> don't want to do that on everything so we need separate MOCS for
> external and internal BOs.
>
> In order to do this, we add an anv-specific BO flag for "external" and
> use that to distinguish between buffers which may be shared with other
> processes and/or display and those which are entirely internal. That,
> together with an anv_mocs_for_bo helper lets us choose the right MOCS
> settings for each BO use.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
> Cc: mesa-stable at lists.freedesktop.org
Looks good and thanks for adding the anv bo flags, I'll reuse that!
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> src/intel/vulkan/anv_allocator.c | 12 ++++++++--
> src/intel/vulkan/anv_batch_chain.c | 2 +-
> src/intel/vulkan/anv_blorp.c | 15 ++++++------
> src/intel/vulkan/anv_device.c | 9 +++++--
> src/intel/vulkan/anv_image.c | 5 ++--
> src/intel/vulkan/anv_intel.c | 2 +-
> src/intel/vulkan/anv_private.h | 38 +++++++++++++++++++++++-------
> src/intel/vulkan/gen7_cmd_buffer.c | 3 ++-
> src/intel/vulkan/gen8_cmd_buffer.c | 3 ++-
> src/intel/vulkan/genX_cmd_buffer.c | 18 +++++++-------
> src/intel/vulkan/genX_gpu_memcpy.c | 5 ++--
> src/intel/vulkan/genX_state.c | 6 +++++
> 12 files changed, 80 insertions(+), 38 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
> index ab01d46cbeb..f62d48ae3fe 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t gem_handle)
> (EXEC_OBJECT_WRITE | \
> EXEC_OBJECT_ASYNC | \
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
> - EXEC_OBJECT_PINNED)
> + EXEC_OBJECT_PINNED | \
> + ANV_BO_EXTERNAL)
>
> VkResult
> anv_bo_cache_alloc(struct anv_device *device,
> @@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
> struct anv_bo **bo_out)
> {
> assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
> + assert(bo_flags & ANV_BO_EXTERNAL);
>
> pthread_mutex_lock(&cache->mutex);
>
> @@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
> * client has imported a BO twice in different ways and they get what
> * they have coming.
> */
> - uint64_t new_flags = 0;
> + uint64_t new_flags = ANV_BO_EXTERNAL;
> new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
> new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
> new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> @@ -1411,6 +1413,12 @@ anv_bo_cache_export(struct anv_device *device,
> assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
> struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
>
> + /* This BO must have been flagged external in order for us to be able
> + * to export it. This is done based on external options passed into
> + * anv_AllocateMemory.
> + */
> + assert(bo->bo.flags & ANV_BO_EXTERNAL);
> +
> int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
> if (fd < 0)
> return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
> diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
> index 0f7c8325ea4..3e13553ac18 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -1088,7 +1088,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
> obj->relocs_ptr = 0;
> obj->alignment = 0;
> obj->offset = bo->offset;
> - obj->flags = bo->flags | extra_flags;
> + obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
> obj->rsvd1 = 0;
> obj->rsvd2 = 0;
> }
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index fa7936d0981..29ed6b2ee35 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -156,7 +156,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
> .addr = {
> .buffer = buffer->address.bo,
> .offset = buffer->address.offset + offset,
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device, buffer->address.bo),
> },
> };
>
> @@ -209,7 +209,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
> .addr = {
> .buffer = image->planes[plane].address.bo,
> .offset = image->planes[plane].address.offset + surface->offset,
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
> },
> };
>
> @@ -219,7 +219,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
> blorp_surf->aux_addr = (struct blorp_address) {
> .buffer = image->planes[plane].address.bo,
> .offset = image->planes[plane].address.offset + aux_surface->offset,
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
> };
> blorp_surf->aux_usage = aux_usage;
>
> @@ -669,12 +669,12 @@ void anv_CmdCopyBuffer(
> struct blorp_address src = {
> .buffer = src_buffer->address.bo,
> .offset = src_buffer->address.offset + pRegions[r].srcOffset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
> };
> struct blorp_address dst = {
> .buffer = dst_buffer->address.bo,
> .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
> };
>
> blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
> @@ -727,7 +727,7 @@ void anv_CmdUpdateBuffer(
> struct blorp_address dst = {
> .buffer = dst_buffer->address.bo,
> .offset = dst_buffer->address.offset + dstOffset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
> };
>
> blorp_buffer_copy(&batch, src, dst, copy_size);
> @@ -1437,7 +1437,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
> .buffer = image->planes[0].address.bo,
> .offset = image->planes[0].address.offset +
> image->planes[0].shadow_surface.offset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device,
> + image->planes[0].address.bo),
> },
> };
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index eae93587349..d9d9553aec2 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -2278,8 +2278,8 @@ VkResult anv_AllocateMemory(
> fd_info->handleType ==
> VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
>
> - result = anv_bo_cache_import(device, &device->bo_cache,
> - fd_info->fd, bo_flags, &mem->bo);
> + result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
> + bo_flags | ANV_BO_EXTERNAL, &mem->bo);
> if (result != VK_SUCCESS)
> goto fail;
>
> @@ -2316,6 +2316,11 @@ VkResult anv_AllocateMemory(
> */
> close(fd_info->fd);
> } else {
> + const VkExportMemoryAllocateInfoKHR *fd_info =
> + vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
> + if (fd_info && fd_info->handleTypes)
> + bo_flags |= ANV_BO_EXTERNAL;
> +
> result = anv_bo_cache_alloc(device, &device->bo_cache,
> pAllocateInfo->allocationSize, bo_flags,
> &mem->bo);
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index b0d8c560adb..ad820917f23 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -1097,7 +1097,7 @@ anv_image_fill_surface_state(struct anv_device *device,
> .size_B = surface->isl.size_B,
> .format = ISL_FORMAT_RAW,
> .stride_B = 1,
> - .mocs = device->default_mocs);
> + .mocs = anv_mocs_for_bo(device, address.bo));
> state_inout->address = address,
> state_inout->aux_address = ANV_NULL_ADDRESS;
> state_inout->clear_address = ANV_NULL_ADDRESS;
> @@ -1198,7 +1198,8 @@ anv_image_fill_surface_state(struct anv_device *device,
> .aux_address = anv_address_physical(aux_address),
> .clear_address = anv_address_physical(clear_address),
> .use_clear_address = !anv_address_is_null(clear_address),
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device,
> + state_inout->address.bo),
> .x_offset_sa = tile_x_sa,
> .y_offset_sa = tile_y_sa);
>
> diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
> index 06db5787a9c..ed1bc096c66 100644
> --- a/src/intel/vulkan/anv_intel.c
> +++ b/src/intel/vulkan/anv_intel.c
> @@ -73,7 +73,7 @@ VkResult anv_CreateDmaBufImageINTEL(
>
> image = anv_image_from_handle(image_h);
>
> - uint64_t bo_flags = 0;
> + uint64_t bo_flags = ANV_BO_EXTERNAL;
> if (device->instance->physicalDevice.supports_48bit_addresses)
> bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> if (device->instance->physicalDevice.use_softpin)
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 0b78d59e567..926f353aa4b 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -558,6 +558,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
> return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
> }
>
> +/* Extra ANV-defined BO flags which won't be passed to the kernel */
> +#define ANV_BO_EXTERNAL (1ull << 31)
> +#define ANV_BO_FLAG_MASK (1ull << 31)
> +
> struct anv_bo {
> uint32_t gem_handle;
>
> @@ -1018,6 +1022,7 @@ struct anv_device {
> struct anv_scratch_pool scratch_pool;
>
> uint32_t default_mocs;
> + uint32_t external_mocs;
>
> pthread_mutex_t mutex;
> pthread_cond_t queue_submit;
> @@ -1047,6 +1052,15 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
> anv_state_pool_free(anv_binding_table_pool(device), state);
> }
>
> +static inline uint32_t
> +anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
> +{
> + if (bo->flags & ANV_BO_EXTERNAL)
> + return device->external_mocs;
> + else
> + return device->default_mocs;
> +}
> +
> static void inline
> anv_state_flush(struct anv_device *device, struct anv_state state)
> {
> @@ -1328,6 +1342,12 @@ _anv_combine_address(struct anv_batch *batch, void *location,
> .AgeforQUADLRU = 0 \
> }
>
> +#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \
> + .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, \
> + .TargetCache = L3DefertoPATforLLCeLLCselection, \
> + .AgeforQUADLRU = 0 \
> + }
> +
> /* Skylake: MOCS is now an index into an array of 62 different caching
> * configurations programmed by the kernel.
> */
> @@ -1337,9 +1357,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
> .IndextoMOCSTables = 2 \
> }
>
> -#define GEN9_MOCS_PTE { \
> - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
> - .IndextoMOCSTables = 1 \
> +#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \
> + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
> + .IndextoMOCSTables = 1 \
> }
>
> /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
> @@ -1348,9 +1368,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
> .IndextoMOCSTables = 2 \
> }
>
> -#define GEN10_MOCS_PTE { \
> - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
> - .IndextoMOCSTables = 1 \
> +#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) { \
> + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
> + .IndextoMOCSTables = 1 \
> }
>
> /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
> @@ -1359,9 +1379,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
> .IndextoMOCSTables = 2 \
> }
>
> -#define GEN11_MOCS_PTE { \
> - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
> - .IndextoMOCSTables = 1 \
> +#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) { \
> + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
> + .IndextoMOCSTables = 1 \
> }
>
> struct anv_device_memory {
> diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
> index cf1f8ee2829..da51cb9781c 100644
> --- a/src/intel/vulkan/gen7_cmd_buffer.c
> +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> @@ -246,7 +246,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
> ib.CutIndexEnable = pipeline->primitive_restart;
> #endif
> ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type;
> - ib.MemoryObjectControlState = GENX(MOCS);
> + ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> + buffer->address.bo);
>
> ib.BufferStartingAddress = anv_address_add(buffer->address,
> offset);
> diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
> index ca2baf84a19..752d04f3013 100644
> --- a/src/intel/vulkan/gen8_cmd_buffer.c
> +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> @@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)(
>
> anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
> ib.IndexFormat = vk_to_gen_index_type[indexType];
> - ib.MemoryObjectControlState = GENX(MOCS);
> + ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> + buffer->address.bo);
> ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
> ib.BufferSize = buffer->size - offset;
> }
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index a543860b976..c112a4afa10 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -2547,12 +2547,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
> struct GENX(VERTEX_BUFFER_STATE) state = {
> .VertexBufferIndex = vb,
>
> -#if GEN_GEN >= 8
> - .MemoryObjectControlState = GENX(MOCS),
> -#else
> + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> + buffer->address.bo),
> +#if GEN_GEN <= 7
> .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
> .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
> - .VertexBufferMemoryObjectControlState = GENX(MOCS),
> #endif
>
> .AddressModifyEnable = true,
> @@ -2691,12 +2690,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
> .VertexBufferIndex = index,
> .AddressModifyEnable = true,
> .BufferPitch = 0,
> + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
> #if (GEN_GEN >= 8)
> - .MemoryObjectControlState = GENX(MOCS),
> .BufferStartingAddress = addr,
> .BufferSize = size
> #else
> - .VertexBufferMemoryObjectControlState = GENX(MOCS),
> .BufferStartingAddress = addr,
> .EndAddress = anv_address_add(addr, size),
> #endif
> @@ -3681,9 +3679,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
> if (dw == NULL)
> return;
>
> - struct isl_depth_stencil_hiz_emit_info info = {
> - .mocs = device->default_mocs,
> - };
> + struct isl_depth_stencil_hiz_emit_info info = { };
>
> if (iview)
> info.view = &iview->planes[0].isl;
> @@ -3701,6 +3697,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
> image->planes[depth_plane].address.bo,
> image->planes[depth_plane].address.offset +
> surface->offset);
> + info.mocs =
> + anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
>
> const uint32_t ds =
> cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
> @@ -3732,6 +3730,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
> image->planes[stencil_plane].address.bo,
> image->planes[stencil_plane].address.offset +
> surface->offset);
> + info.mocs =
> + anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
> }
>
> isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
> diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
> index fd78f4d125b..81522986550 100644
> --- a/src/intel/vulkan/genX_gpu_memcpy.c
> +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> @@ -167,11 +167,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> .AddressModifyEnable = true,
> .BufferStartingAddress = src,
> .BufferPitch = bs,
> + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
> #if (GEN_GEN >= 8)
> - .MemoryObjectControlState = GENX(MOCS),
> .BufferSize = size,
> #else
> - .VertexBufferMemoryObjectControlState = GENX(MOCS),
> .EndAddress = anv_address_add(src, size - 1),
> #endif
> });
> @@ -228,7 +227,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
>
> anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
> sob.SOBufferIndex = 0;
> - sob.SOBufferObjectControlState = GENX(MOCS);
> + sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
> sob.SurfaceBaseAddress = dst;
>
> #if GEN_GEN >= 8
> diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
> index aa5bce5a801..75bcd96d78a 100644
> --- a/src/intel/vulkan/genX_state.c
> +++ b/src/intel/vulkan/genX_state.c
> @@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device)
> {
> GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
> &GENX(MOCS));
> +#if GEN_GEN >= 8
> + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
> + &GENX(EXTERNAL_MOCS));
> +#else
> + device->external_mocs = device->default_mocs;
> +#endif
>
> struct anv_batch batch;
>
More information about the mesa-stable
mailing list