[Mesa-stable] [Mesa-dev] [PATCH] anv: Use separate MOCS settings for external BOs

Lionel Landwerlin lionel.g.landwerlin at intel.com
Wed Oct 3 13:47:46 UTC 2018


On 02/10/2018 23:11, Jason Ekstrand wrote:
> On Broadwell and above, we have to use different MOCS settings to allow
> the kernel to take over and disable caching when needed for external
> buffers.  On Broadwell, this is especially important because the kernel
> can't disable eLLC so we have to do it in userspace.  We very badly
> don't want to do that on everything so we need separate MOCS for
> external and internal BOs.
>
> In order to do this, we add an anv-specific BO flag for "external" and
> use that to distinguish between buffers which may be shared with other
> processes and/or display and those which are entirely internal.  That,
> together with an anv_mocs_for_bo helper lets us choose the right MOCS
> settings for each BO use.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
> Cc: mesa-stable at lists.freedesktop.org


Looks good and thanks for adding the anv bo flags, I'll reuse that!


Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>


> ---
>   src/intel/vulkan/anv_allocator.c   | 12 ++++++++--
>   src/intel/vulkan/anv_batch_chain.c |  2 +-
>   src/intel/vulkan/anv_blorp.c       | 15 ++++++------
>   src/intel/vulkan/anv_device.c      |  9 +++++--
>   src/intel/vulkan/anv_image.c       |  5 ++--
>   src/intel/vulkan/anv_intel.c       |  2 +-
>   src/intel/vulkan/anv_private.h     | 38 +++++++++++++++++++++++-------
>   src/intel/vulkan/gen7_cmd_buffer.c |  3 ++-
>   src/intel/vulkan/gen8_cmd_buffer.c |  3 ++-
>   src/intel/vulkan/genX_cmd_buffer.c | 18 +++++++-------
>   src/intel/vulkan/genX_gpu_memcpy.c |  5 ++--
>   src/intel/vulkan/genX_state.c      |  6 +++++
>   12 files changed, 80 insertions(+), 38 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
> index ab01d46cbeb..f62d48ae3fe 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t gem_handle)
>      (EXEC_OBJECT_WRITE | \
>       EXEC_OBJECT_ASYNC | \
>       EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
> -    EXEC_OBJECT_PINNED)
> +    EXEC_OBJECT_PINNED | \
> +    ANV_BO_EXTERNAL)
>   
>   VkResult
>   anv_bo_cache_alloc(struct anv_device *device,
> @@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
>                       struct anv_bo **bo_out)
>   {
>      assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
> +   assert(bo_flags & ANV_BO_EXTERNAL);
>   
>      pthread_mutex_lock(&cache->mutex);
>   
> @@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
>          * client has imported a BO twice in different ways and they get what
>          * they have coming.
>          */
> -      uint64_t new_flags = 0;
> +      uint64_t new_flags = ANV_BO_EXTERNAL;
>         new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
>         new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
>         new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> @@ -1411,6 +1413,12 @@ anv_bo_cache_export(struct anv_device *device,
>      assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
>      struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
>   
> +   /* This BO must have been flagged external in order for us to be able
> +    * to export it.  This is done based on external options passed into
> +    * anv_AllocateMemory.
> +    */
> +   assert(bo->bo.flags & ANV_BO_EXTERNAL);
> +
>      int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
>      if (fd < 0)
>         return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
> diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
> index 0f7c8325ea4..3e13553ac18 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -1088,7 +1088,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
>         obj->relocs_ptr = 0;
>         obj->alignment = 0;
>         obj->offset = bo->offset;
> -      obj->flags = bo->flags | extra_flags;
> +      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
>         obj->rsvd1 = 0;
>         obj->rsvd2 = 0;
>      }
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index fa7936d0981..29ed6b2ee35 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -156,7 +156,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
>         .addr = {
>            .buffer = buffer->address.bo,
>            .offset = buffer->address.offset + offset,
> -         .mocs = device->default_mocs,
> +         .mocs = anv_mocs_for_bo(device, buffer->address.bo),
>         },
>      };
>   
> @@ -209,7 +209,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
>         .addr = {
>            .buffer = image->planes[plane].address.bo,
>            .offset = image->planes[plane].address.offset + surface->offset,
> -         .mocs = device->default_mocs,
> +         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
>         },
>      };
>   
> @@ -219,7 +219,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
>         blorp_surf->aux_addr = (struct blorp_address) {
>            .buffer = image->planes[plane].address.bo,
>            .offset = image->planes[plane].address.offset + aux_surface->offset,
> -         .mocs = device->default_mocs,
> +         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
>         };
>         blorp_surf->aux_usage = aux_usage;
>   
> @@ -669,12 +669,12 @@ void anv_CmdCopyBuffer(
>         struct blorp_address src = {
>            .buffer = src_buffer->address.bo,
>            .offset = src_buffer->address.offset + pRegions[r].srcOffset,
> -         .mocs = cmd_buffer->device->default_mocs,
> +         .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
>         };
>         struct blorp_address dst = {
>            .buffer = dst_buffer->address.bo,
>            .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
> -         .mocs = cmd_buffer->device->default_mocs,
> +         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
>         };
>   
>         blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
> @@ -727,7 +727,7 @@ void anv_CmdUpdateBuffer(
>         struct blorp_address dst = {
>            .buffer = dst_buffer->address.bo,
>            .offset = dst_buffer->address.offset + dstOffset,
> -         .mocs = cmd_buffer->device->default_mocs,
> +         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
>         };
>   
>         blorp_buffer_copy(&batch, src, dst, copy_size);
> @@ -1437,7 +1437,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
>            .buffer = image->planes[0].address.bo,
>            .offset = image->planes[0].address.offset +
>                      image->planes[0].shadow_surface.offset,
> -         .mocs = cmd_buffer->device->default_mocs,
> +         .mocs = anv_mocs_for_bo(cmd_buffer->device,
> +                                 image->planes[0].address.bo),
>         },
>      };
>   
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index eae93587349..d9d9553aec2 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -2278,8 +2278,8 @@ VkResult anv_AllocateMemory(
>                fd_info->handleType ==
>                  VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
>   
> -      result = anv_bo_cache_import(device, &device->bo_cache,
> -                                   fd_info->fd, bo_flags, &mem->bo);
> +      result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
> +                                   bo_flags | ANV_BO_EXTERNAL, &mem->bo);
>         if (result != VK_SUCCESS)
>            goto fail;
>   
> @@ -2316,6 +2316,11 @@ VkResult anv_AllocateMemory(
>          */
>         close(fd_info->fd);
>      } else {
> +      const VkExportMemoryAllocateInfoKHR *fd_info =
> +         vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
> +      if (fd_info && fd_info->handleTypes)
> +         bo_flags |= ANV_BO_EXTERNAL;
> +
>         result = anv_bo_cache_alloc(device, &device->bo_cache,
>                                     pAllocateInfo->allocationSize, bo_flags,
>                                     &mem->bo);
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index b0d8c560adb..ad820917f23 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -1097,7 +1097,7 @@ anv_image_fill_surface_state(struct anv_device *device,
>                               .size_B = surface->isl.size_B,
>                               .format = ISL_FORMAT_RAW,
>                               .stride_B = 1,
> -                            .mocs = device->default_mocs);
> +                            .mocs = anv_mocs_for_bo(device, address.bo));
>         state_inout->address = address,
>         state_inout->aux_address = ANV_NULL_ADDRESS;
>         state_inout->clear_address = ANV_NULL_ADDRESS;
> @@ -1198,7 +1198,8 @@ anv_image_fill_surface_state(struct anv_device *device,
>                             .aux_address = anv_address_physical(aux_address),
>                             .clear_address = anv_address_physical(clear_address),
>                             .use_clear_address = !anv_address_is_null(clear_address),
> -                          .mocs = device->default_mocs,
> +                          .mocs = anv_mocs_for_bo(device,
> +                                                  state_inout->address.bo),
>                             .x_offset_sa = tile_x_sa,
>                             .y_offset_sa = tile_y_sa);
>   
> diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
> index 06db5787a9c..ed1bc096c66 100644
> --- a/src/intel/vulkan/anv_intel.c
> +++ b/src/intel/vulkan/anv_intel.c
> @@ -73,7 +73,7 @@ VkResult anv_CreateDmaBufImageINTEL(
>   
>      image = anv_image_from_handle(image_h);
>   
> -   uint64_t bo_flags = 0;
> +   uint64_t bo_flags = ANV_BO_EXTERNAL;
>      if (device->instance->physicalDevice.supports_48bit_addresses)
>         bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>      if (device->instance->physicalDevice.use_softpin)
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 0b78d59e567..926f353aa4b 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -558,6 +558,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
>      return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
>   }
>   
> +/* Extra ANV-defined BO flags which won't be passed to the kernel */
> +#define ANV_BO_EXTERNAL    (1ull << 31)
> +#define ANV_BO_FLAG_MASK   (1ull << 31)
> +
>   struct anv_bo {
>      uint32_t gem_handle;
>   
> @@ -1018,6 +1022,7 @@ struct anv_device {
>       struct anv_scratch_pool                     scratch_pool;
>   
>       uint32_t                                    default_mocs;
> +    uint32_t                                    external_mocs;
>   
>       pthread_mutex_t                             mutex;
>       pthread_cond_t                              queue_submit;
> @@ -1047,6 +1052,15 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
>      anv_state_pool_free(anv_binding_table_pool(device), state);
>   }
>   
> +static inline uint32_t
> +anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
> +{
> +   if (bo->flags & ANV_BO_EXTERNAL)
> +      return device->external_mocs;
> +   else
> +      return device->default_mocs;
> +}
> +
>   static void inline
>   anv_state_flush(struct anv_device *device, struct anv_state state)
>   {
> @@ -1328,6 +1342,12 @@ _anv_combine_address(struct anv_batch *batch, void *location,
>         .AgeforQUADLRU = 0                                       \
>      }
>   
> +#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) {     \
> +      .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,  \
> +      .TargetCache = L3DefertoPATforLLCeLLCselection,                      \
> +      .AgeforQUADLRU = 0                                                   \
> +   }
> +
>   /* Skylake: MOCS is now an index into an array of 62 different caching
>    * configurations programmed by the kernel.
>    */
> @@ -1337,9 +1357,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
>         .IndextoMOCSTables                           = 2         \
>      }
>   
> -#define GEN9_MOCS_PTE {                                 \
> -      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
> -      .IndextoMOCSTables                           = 1  \
> +#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) {  \
> +      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                       \
> +      .IndextoMOCSTables                           = 1                  \
>      }
>   
>   /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
> @@ -1348,9 +1368,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
>         .IndextoMOCSTables                           = 2         \
>      }
>   
> -#define GEN10_MOCS_PTE {                                 \
> -      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
> -      .IndextoMOCSTables                           = 1  \
> +#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) {   \
> +      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
> +      .IndextoMOCSTables                           = 1                     \
>      }
>   
>   /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
> @@ -1359,9 +1379,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
>         .IndextoMOCSTables                           = 2         \
>      }
>   
> -#define GEN11_MOCS_PTE {                                 \
> -      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
> -      .IndextoMOCSTables                           = 1  \
> +#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) {   \
> +      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
> +      .IndextoMOCSTables                           = 1                     \
>      }
>   
>   struct anv_device_memory {
> diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
> index cf1f8ee2829..da51cb9781c 100644
> --- a/src/intel/vulkan/gen7_cmd_buffer.c
> +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> @@ -246,7 +246,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
>            ib.CutIndexEnable             = pipeline->primitive_restart;
>   #endif
>            ib.IndexFormat                = cmd_buffer->state.gfx.gen7.index_type;
> -         ib.MemoryObjectControlState   = GENX(MOCS);
> +         ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
> +                                                         buffer->address.bo);
>   
>            ib.BufferStartingAddress      = anv_address_add(buffer->address,
>                                                            offset);
> diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
> index ca2baf84a19..752d04f3013 100644
> --- a/src/intel/vulkan/gen8_cmd_buffer.c
> +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> @@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)(
>   
>      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
>         ib.IndexFormat                = vk_to_gen_index_type[indexType];
> -      ib.MemoryObjectControlState   = GENX(MOCS);
> +      ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
> +                                                      buffer->address.bo);
>         ib.BufferStartingAddress      = anv_address_add(buffer->address, offset);
>         ib.BufferSize                 = buffer->size - offset;
>      }
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> index a543860b976..c112a4afa10 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -2547,12 +2547,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
>            struct GENX(VERTEX_BUFFER_STATE) state = {
>               .VertexBufferIndex = vb,
>   
> -#if GEN_GEN >= 8
> -            .MemoryObjectControlState = GENX(MOCS),
> -#else
> +            .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
> +                                                buffer->address.bo),
> +#if GEN_GEN <= 7
>               .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
>               .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
> -            .VertexBufferMemoryObjectControlState = GENX(MOCS),
>   #endif
>   
>               .AddressModifyEnable = true,
> @@ -2691,12 +2690,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
>            .VertexBufferIndex = index,
>            .AddressModifyEnable = true,
>            .BufferPitch = 0,
> +         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
>   #if (GEN_GEN >= 8)
> -         .MemoryObjectControlState = GENX(MOCS),
>            .BufferStartingAddress = addr,
>            .BufferSize = size
>   #else
> -         .VertexBufferMemoryObjectControlState = GENX(MOCS),
>            .BufferStartingAddress = addr,
>            .EndAddress = anv_address_add(addr, size),
>   #endif
> @@ -3681,9 +3679,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
>      if (dw == NULL)
>         return;
>   
> -   struct isl_depth_stencil_hiz_emit_info info = {
> -      .mocs = device->default_mocs,
> -   };
> +   struct isl_depth_stencil_hiz_emit_info info = { };
>   
>      if (iview)
>         info.view = &iview->planes[0].isl;
> @@ -3701,6 +3697,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
>                                 image->planes[depth_plane].address.bo,
>                                 image->planes[depth_plane].address.offset +
>                                 surface->offset);
> +      info.mocs =
> +         anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
>   
>         const uint32_t ds =
>            cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
> @@ -3732,6 +3730,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
>                                 image->planes[stencil_plane].address.bo,
>                                 image->planes[stencil_plane].address.offset +
>                                 surface->offset);
> +      info.mocs =
> +         anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
>      }
>   
>      isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
> diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
> index fd78f4d125b..81522986550 100644
> --- a/src/intel/vulkan/genX_gpu_memcpy.c
> +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> @@ -167,11 +167,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
>            .AddressModifyEnable = true,
>            .BufferStartingAddress = src,
>            .BufferPitch = bs,
> +         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
>   #if (GEN_GEN >= 8)
> -         .MemoryObjectControlState = GENX(MOCS),
>            .BufferSize = size,
>   #else
> -         .VertexBufferMemoryObjectControlState = GENX(MOCS),
>            .EndAddress = anv_address_add(src, size - 1),
>   #endif
>         });
> @@ -228,7 +227,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
>   
>      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
>         sob.SOBufferIndex = 0;
> -      sob.SOBufferObjectControlState = GENX(MOCS);
> +      sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
>         sob.SurfaceBaseAddress = dst;
>   
>   #if GEN_GEN >= 8
> diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
> index aa5bce5a801..75bcd96d78a 100644
> --- a/src/intel/vulkan/genX_state.c
> +++ b/src/intel/vulkan/genX_state.c
> @@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device)
>   {
>      GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
>                                             &GENX(MOCS));
> +#if GEN_GEN >= 8
> +   GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
> +                                          &GENX(EXTERNAL_MOCS));
> +#else
> +   device->external_mocs = device->default_mocs;
> +#endif
>   
>      struct anv_batch batch;
>   




More information about the mesa-stable mailing list