[Mesa-dev] [PATCH] anv: Use separate MOCS settings for external BOs on gen8
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Aug 2 10:42:23 UTC 2018
On 10/07/18 05:59, Jason Ekstrand wrote:
> On all other platforms, it's safe to use the usual PTE settings for both
> internal and external BOs. On Broadwell, however, we can't get the
> right caching behavior for scanout without disabling eLLC and we really
> don't want to do this on everything.
>
> In order to do this, we add an anv-specific BO flag for "external" and
> use that to distinguish between buffers which may be shared with other
> processes and/or display and those which are entirely internal. That,
> together with an anv_mocs_for_bo helper lets us choose the right MOCS
> settings for each BO use.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
Hey Jason,
Looking at Eero's comment on the bug entry, it seems we would need this
external MOCS on eLLC platforms, not just Gen8?
Cheers,
-
Lionel
> Cc: mesa-stable at lists.freedesktop.org
> ---
> src/intel/vulkan/anv_allocator.c | 12 ++++++++++--
> src/intel/vulkan/anv_batch_chain.c | 2 +-
> src/intel/vulkan/anv_blorp.c | 15 ++++++++-------
> src/intel/vulkan/anv_device.c | 9 +++++++--
> src/intel/vulkan/anv_image.c | 5 +++--
> src/intel/vulkan/anv_intel.c | 2 +-
> src/intel/vulkan/anv_private.h | 20 ++++++++++++++++++++
> src/intel/vulkan/gen7_cmd_buffer.c | 3 ++-
> src/intel/vulkan/gen8_cmd_buffer.c | 3 ++-
> src/intel/vulkan/genX_cmd_buffer.c | 18 +++++++++---------
> src/intel/vulkan/genX_gpu_memcpy.c | 5 ++---
> src/intel/vulkan/genX_state.c | 6 ++++++
> 12 files changed, 71 insertions(+), 29 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
> index ab01d46cbeb..f62d48ae3fe 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t gem_handle)
> (EXEC_OBJECT_WRITE | \
> EXEC_OBJECT_ASYNC | \
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
> - EXEC_OBJECT_PINNED)
> + EXEC_OBJECT_PINNED | \
> + ANV_BO_EXTERNAL)
>
> VkResult
> anv_bo_cache_alloc(struct anv_device *device,
> @@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
> struct anv_bo **bo_out)
> {
> assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
> + assert(bo_flags & ANV_BO_EXTERNAL);
>
> pthread_mutex_lock(&cache->mutex);
>
> @@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
> * client has imported a BO twice in different ways and they get what
> * they have coming.
> */
> - uint64_t new_flags = 0;
> + uint64_t new_flags = ANV_BO_EXTERNAL;
> new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
> new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
> new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> @@ -1411,6 +1413,12 @@ anv_bo_cache_export(struct anv_device *device,
> assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
> struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
>
> + /* This BO must have been flagged external in order for us to be able
> + * to export it. This is done based on external options passed into
> + * anv_AllocateMemory.
> + */
> + assert(bo->bo.flags & ANV_BO_EXTERNAL);
> +
> int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
> if (fd < 0)
> return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
> diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
> index c47a81c8a4d..33b1735460b 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -1088,7 +1088,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
> obj->relocs_ptr = 0;
> obj->alignment = 0;
> obj->offset = bo->offset;
> - obj->flags = bo->flags | extra_flags;
> + obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
> obj->rsvd1 = 0;
> obj->rsvd2 = 0;
> }
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index c76392fcd97..a024f8a380a 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -155,7 +155,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
> .addr = {
> .buffer = buffer->address.bo,
> .offset = buffer->address.offset + offset,
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device, buffer->address.bo),
> },
> };
>
> @@ -208,7 +208,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
> .addr = {
> .buffer = image->planes[plane].address.bo,
> .offset = image->planes[plane].address.offset + surface->offset,
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
> },
> };
>
> @@ -218,7 +218,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
> blorp_surf->aux_addr = (struct blorp_address) {
> .buffer = image->planes[plane].address.bo,
> .offset = image->planes[plane].address.offset + aux_surface->offset,
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
> };
> blorp_surf->aux_usage = aux_usage;
>
> @@ -663,12 +663,12 @@ void anv_CmdCopyBuffer(
> struct blorp_address src = {
> .buffer = src_buffer->address.bo,
> .offset = src_buffer->address.offset + pRegions[r].srcOffset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
> };
> struct blorp_address dst = {
> .buffer = dst_buffer->address.bo,
> .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
> };
>
> blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
> @@ -721,7 +721,7 @@ void anv_CmdUpdateBuffer(
> struct blorp_address dst = {
> .buffer = dst_buffer->address.bo,
> .offset = dst_buffer->address.offset + dstOffset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
> };
>
> blorp_buffer_copy(&batch, src, dst, copy_size);
> @@ -1413,7 +1413,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
> .buffer = image->planes[0].address.bo,
> .offset = image->planes[0].address.offset +
> image->planes[0].shadow_surface.offset,
> - .mocs = cmd_buffer->device->default_mocs,
> + .mocs = anv_mocs_for_bo(cmd_buffer->device,
> + image->planes[0].address.bo),
> },
> };
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 6c8eb673f7c..954847bb91c 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -2191,8 +2191,8 @@ VkResult anv_AllocateMemory(
> fd_info->handleType ==
> VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
>
> - result = anv_bo_cache_import(device, &device->bo_cache,
> - fd_info->fd, bo_flags, &mem->bo);
> + result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
> + bo_flags | ANV_BO_EXTERNAL, &mem->bo);
> if (result != VK_SUCCESS)
> goto fail;
>
> @@ -2229,6 +2229,11 @@ VkResult anv_AllocateMemory(
> */
> close(fd_info->fd);
> } else {
> + const VkExportMemoryAllocateInfoKHR *fd_info =
> + vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
> + if (fd_info && fd_info->handleTypes)
> + bo_flags |= ANV_BO_EXTERNAL;
> +
> result = anv_bo_cache_alloc(device, &device->bo_cache,
> pAllocateInfo->allocationSize, bo_flags,
> &mem->bo);
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index c62bf7ae2b5..31813ca7ec4 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -1086,7 +1086,7 @@ anv_image_fill_surface_state(struct anv_device *device,
> .size = surface->isl.size,
> .format = ISL_FORMAT_RAW,
> .stride = 1,
> - .mocs = device->default_mocs);
> + .mocs = anv_mocs_for_bo(device, address.bo));
> state_inout->address = address,
> state_inout->aux_address = ANV_NULL_ADDRESS;
> state_inout->clear_address = ANV_NULL_ADDRESS;
> @@ -1187,7 +1187,8 @@ anv_image_fill_surface_state(struct anv_device *device,
> .aux_address = anv_address_physical(aux_address),
> .clear_address = anv_address_physical(clear_address),
> .use_clear_address = !anv_address_is_null(clear_address),
> - .mocs = device->default_mocs,
> + .mocs = anv_mocs_for_bo(device,
> + state_inout->address.bo),
> .x_offset_sa = tile_x_sa,
> .y_offset_sa = tile_y_sa);
>
> diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
> index 06db5787a9c..ed1bc096c66 100644
> --- a/src/intel/vulkan/anv_intel.c
> +++ b/src/intel/vulkan/anv_intel.c
> @@ -73,7 +73,7 @@ VkResult anv_CreateDmaBufImageINTEL(
>
> image = anv_image_from_handle(image_h);
>
> - uint64_t bo_flags = 0;
> + uint64_t bo_flags = ANV_BO_EXTERNAL;
> if (device->instance->physicalDevice.supports_48bit_addresses)
> bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> if (device->instance->physicalDevice.use_softpin)
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index a9a7bb8cd86..ea1536fa464 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -555,6 +555,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
> return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
> }
>
> +/* Extra ANV-defined BO flags which won't be passed to the kernel */
> +#define ANV_BO_EXTERNAL (1ull << 31)
> +#define ANV_BO_FLAG_MASK (1ull << 31)
> +
> struct anv_bo {
> uint32_t gem_handle;
>
> @@ -1003,6 +1007,7 @@ struct anv_device {
> struct anv_scratch_pool scratch_pool;
>
> uint32_t default_mocs;
> + uint32_t external_mocs;
>
> pthread_mutex_t mutex;
> pthread_cond_t queue_submit;
> @@ -1032,6 +1037,15 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
> anv_state_pool_free(anv_binding_table_pool(device), state);
> }
>
> +static inline uint32_t
> +anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
> +{
> + if (bo->flags & ANV_BO_EXTERNAL)
> + return device->external_mocs;
> + else
> + return device->default_mocs;
> +}
> +
> static void inline
> anv_state_flush(struct anv_device *device, struct anv_state state)
> {
> @@ -1322,6 +1336,12 @@ _anv_combine_address(struct anv_batch *batch, void *location,
> .AgeforQUADLRU = 0 \
> }
>
> +#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \
> + .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, \
> + .TargetCache = L3DefertoPATforLLCeLLCselection, \
> + .AgeforQUADLRU = 0 \
> + }
> +
> /* Skylake: MOCS is now an index into an array of 62 different caching
> * configurations programmed by the kernel.
> */
> diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
> index 3acfbb710c0..fc51c067439 100644
> --- a/src/intel/vulkan/gen7_cmd_buffer.c
> +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> @@ -245,7 +245,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
> ib.CutIndexEnable = pipeline->primitive_restart;
> #endif
> ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type;
> - ib.MemoryObjectControlState = GENX(MOCS);
> + ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> + buffer->address.bo);
>
> ib.BufferStartingAddress = anv_address_add(buffer->address,
> offset);
> diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
> index ca2baf84a19..752d04f3013 100644
> --- a/src/intel/vulkan/gen8_cmd_buffer.c
> +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> @@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)(
>
> anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
> ib.IndexFormat = vk_to_gen_index_type[indexType];
> - ib.MemoryObjectControlState = GENX(MOCS);
> + ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> + buffer->address.bo);
> ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
> ib.BufferSize = buffer->size - offset;
> }
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index b7ed817d3a0..b55acca4521 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -2514,12 +2514,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
> struct GENX(VERTEX_BUFFER_STATE) state = {
> .VertexBufferIndex = vb,
>
> -#if GEN_GEN >= 8
> - .MemoryObjectControlState = GENX(MOCS),
> -#else
> + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> + buffer->address.bo),
> +#if GEN_GEN <= 7
> .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
> .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
> - .VertexBufferMemoryObjectControlState = GENX(MOCS),
> #endif
>
> .AddressModifyEnable = true,
> @@ -2633,12 +2632,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
> .VertexBufferIndex = index,
> .AddressModifyEnable = true,
> .BufferPitch = 0,
> + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
> #if (GEN_GEN >= 8)
> - .MemoryObjectControlState = GENX(MOCS),
> .BufferStartingAddress = addr,
> .BufferSize = size
> #else
> - .VertexBufferMemoryObjectControlState = GENX(MOCS),
> .BufferStartingAddress = addr,
> .EndAddress = anv_address_add(addr, size),
> #endif
> @@ -3390,9 +3388,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
> if (dw == NULL)
> return;
>
> - struct isl_depth_stencil_hiz_emit_info info = {
> - .mocs = device->default_mocs,
> - };
> + struct isl_depth_stencil_hiz_emit_info info = { };
>
> if (iview)
> info.view = &iview->planes[0].isl;
> @@ -3410,6 +3406,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
> image->planes[depth_plane].address.bo,
> image->planes[depth_plane].address.offset +
> surface->offset);
> + info.mocs =
> + anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
>
> const uint32_t ds =
> cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
> @@ -3441,6 +3439,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
> image->planes[stencil_plane].address.bo,
> image->planes[stencil_plane].address.offset +
> surface->offset);
> + info.mocs =
> + anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
> }
>
> isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
> diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
> index eaafcfa3b22..b51c1804659 100644
> --- a/src/intel/vulkan/genX_gpu_memcpy.c
> +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> @@ -158,11 +158,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> .AddressModifyEnable = true,
> .BufferStartingAddress = { src, src_offset },
> .BufferPitch = bs,
> + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src),
> #if (GEN_GEN >= 8)
> - .MemoryObjectControlState = GENX(MOCS),
> .BufferSize = size,
> #else
> - .VertexBufferMemoryObjectControlState = GENX(MOCS),
> .EndAddress = { src, src_offset + size - 1 },
> #endif
> });
> @@ -219,7 +218,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
>
> anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
> sob.SOBufferIndex = 0;
> - sob.SOBufferObjectControlState = GENX(MOCS);
> + sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst),
> sob.SurfaceBaseAddress = (struct anv_address) { dst, dst_offset };
>
> #if GEN_GEN >= 8
> diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
> index b1014d9e797..d049bcadfda 100644
> --- a/src/intel/vulkan/genX_state.c
> +++ b/src/intel/vulkan/genX_state.c
> @@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device)
> {
> GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
> &GENX(MOCS));
> +#if GEN_GEN == 8
> + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
> + &GENX(EXTERNAL_MOCS));
> +#else
> + device->external_mocs = device->default_mocs;
> +#endif
>
> struct anv_batch batch;
>
More information about the mesa-dev
mailing list