[Mesa-dev] [PATCH 2/2] anv: add support for VK_EXT_inline_uniform_block

Lionel Landwerlin lionel.g.landwerlin at intel.com
Sun Sep 16 19:51:55 UTC 2018


Hey Tapani,

Descriptors were kind of tricky to get my head around so thanks a lot 
for looking into this.

Regarding this max values, there isn't really a limit with our hardware. 
I just picked the minimum required by the spec.
I think the assert are somewhat unnecessary but I don't really object to 
them.

Thanks again for your time on this!

-
Lionel

On 14/09/2018 11:32, Tapani Pälli wrote:
> I can't say I know enough of all these parts but I went through the 
> API functions and tried to check that you have proper checks in place. 
> Will try to still review :)
>
> I did not see any check against MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 
> when creating pipeline layout. I'm not sure if such is necessary 
> (since it's implicit rule), do you think there should there be 
> check/assert?
>
> one minor possible addition below ..
>
> On 11.09.2018 23:22, Lionel Landwerlin wrote:
>> This new extension adds an implicitly allocated block of uniforms into
>> the descriptors sets through a new descriptor type. > We implement 
>> this by having a single BO in the descriptor set pool
>> from which we source uniforms.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>>   src/intel/vulkan/anv_cmd_buffer.c             |   3 +
>>   src/intel/vulkan/anv_descriptor_set.c         | 238 +++++++++++++++++-
>>   src/intel/vulkan/anv_device.c                 |  22 ++
>>   src/intel/vulkan/anv_extensions.py            |   1 +
>>   .../vulkan/anv_nir_apply_pipeline_layout.c    |  52 ++++
>>   src/intel/vulkan/anv_private.h                |  33 +++
>>   src/intel/vulkan/genX_cmd_buffer.c            |  32 ++-
>>   7 files changed, 367 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
>> b/src/intel/vulkan/anv_cmd_buffer.c
>> index 8ef71b0ed9c..b14be94f470 100644
>> --- a/src/intel/vulkan/anv_cmd_buffer.c
>> +++ b/src/intel/vulkan/anv_cmd_buffer.c
>> @@ -651,6 +651,7 @@ 
>> anv_isl_format_for_descriptor_type(VkDescriptorType type)
>>      switch (type) {
>>      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>> +   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>         return ISL_FORMAT_R32G32B32A32_FLOAT;
>>        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
>> @@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR(
>>            }
>>            break;
>>   +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>> +         unreachable("Invalid descriptor type for push descriptors");
>>         default:
>>            break;
>>         }
>> diff --git a/src/intel/vulkan/anv_descriptor_set.c 
>> b/src/intel/vulkan/anv_descriptor_set.c
>> index 3439f828900..2e5f2a1f288 100644
>> --- a/src/intel/vulkan/anv_descriptor_set.c
>> +++ b/src/intel/vulkan/anv_descriptor_set.c
>> @@ -26,8 +26,10 @@
>>   #include <string.h>
>>   #include <unistd.h>
>>   #include <fcntl.h>
>> +#include <sys/mman.h>
>>     #include "util/mesa-sha1.h"
>> +#include "vk_util.h"
>>     #include "anv_private.h"
>>   @@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport(
>>       const VkDescriptorSetLayoutCreateInfo*      pCreateInfo,
>>       VkDescriptorSetLayoutSupport*               pSupport)
>>   {
>> -   uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
>> +   int16_t surface_count[MESA_SHADER_STAGES] = { 0, };
>> +   int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, };
>>        for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) {
>>         const VkDescriptorSetLayoutBinding *binding = 
>> &pCreateInfo->pBindings[b];
>> @@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport(
>>            /* There is no real limit on samplers */
>>            break;
>>   +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>> +         anv_foreach_stage(s, binding->stageFlags) {
>> +            if (inline_surface_indexes[s] < 0) {
>> +               inline_surface_indexes[s] = surface_count[s];
>> +               surface_count[s] += 1;
>> +            }
>> +         }
>> +         break;
>> +
>>         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
>>            if (binding->pImmutableSamplers) {
>>               for (uint32_t i = 0; i < binding->descriptorCount; i++) {
>> @@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout(
>>      memset(set_layout, 0, sizeof(*set_layout));
>>      set_layout->ref_cnt = 1;
>>      set_layout->binding_count = max_binding + 1;
>> +   set_layout->inline_blocks_descriptor_index = -1;
>> +   memset(set_layout->inline_blocks_surface_indexes,
>> +          -1, sizeof(set_layout->inline_blocks_surface_indexes));
>>        for (uint32_t b = 0; b <= max_binding; b++) {
>>         /* Initialize all binding_layout entries to -1 */
>> @@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout(
>>   #ifndef NDEBUG
>>         set_layout->binding[b].type = binding->descriptorType;
>>   #endif
>> -      set_layout->binding[b].array_size = binding->descriptorCount;
>> -      set_layout->binding[b].descriptor_index = set_layout->size;
>> -      set_layout->size += binding->descriptorCount;
>> +
>> +      if (binding->descriptorType == 
>> VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
>
> Maybe add here
>
> assert(binding->descriptorCount % 4 == 0 &&
>        binding->descriptorCount <= MAX_INLINE_UNIFORM_BLOCK_SIZE);


Sure, added locally.


>
> ?
>
>> +         /* We only a single descriptor entry for all the inline 
>> uniforms. */
>> +         set_layout->binding[b].array_size = 1;
>> +         if (set_layout->inline_blocks_descriptor_index < 0) {
>> +            set_layout->binding[b].descriptor_index =
>> +               set_layout->inline_blocks_descriptor_index =
>> +               set_layout->size;
>> +            set_layout->size += 1;
>> +         } else {
>> +            set_layout->binding[b].descriptor_index =
>> +               set_layout->inline_blocks_descriptor_index;
>> +         }
>> +      } else {
>> +         set_layout->binding[b].array_size = binding->descriptorCount;
>> +         set_layout->binding[b].descriptor_index = set_layout->size;
>> +         set_layout->size += binding->descriptorCount;
>> +      }
>>           switch (binding->descriptorType) {
>>         case VK_DESCRIPTOR_TYPE_SAMPLER:
>> @@ -176,6 +206,24 @@ VkResult anv_CreateDescriptorSetLayout(
>>         }
>>           switch (binding->descriptorType) {
>> +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>> +         set_layout->binding[b].inline_block_offset = 
>> set_layout->inline_blocks_size;
>> +         set_layout->binding[b].inline_block_length = 
>> binding->descriptorCount;
>> +         set_layout->inline_blocks_size += binding->descriptorCount;
>> +
>> +         anv_foreach_stage(s, binding->stageFlags) {
>> +            if (set_layout->inline_blocks_surface_indexes[s] < 0) {
>> +               set_layout->binding[b].stage[s].surface_index =
>> + set_layout->inline_blocks_surface_indexes[s] =
>> +                  surface_count[s];
>> +               surface_count[s] += 1;
>> +            } else {
>> +               set_layout->binding[b].stage[s].surface_index =
>> + set_layout->inline_blocks_surface_indexes[s];
>> +            }
>> +         }
>> +         break;
>> +
>>         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
>>         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>> @@ -235,6 +283,12 @@ VkResult anv_CreateDescriptorSetLayout(
>>         set_layout->shader_stages |= binding->stageFlags;
>>      }
>>   +   /* Align inline uniforms total size to 32 because we source the 
>> allocation
>> +    * from a single BO in the descriptor set pool and we want the 
>> alignment to
>> +    * match the push constant alignment constraint.
>> +    */
>> +   set_layout->inline_blocks_size = 
>> ALIGN(set_layout->inline_blocks_size, 32);
>> +
>>      set_layout->buffer_count = buffer_count;
>>      set_layout->dynamic_offset_count = dynamic_offset_count;
>>   @@ -405,21 +459,45 @@ VkResult anv_CreateDescriptorPool(
>>      ANV_FROM_HANDLE(anv_device, device, _device);
>>      struct anv_descriptor_pool *pool;
>>   +   vk_foreach_struct(ext, pCreateInfo->pNext) {
>> +      switch (ext->sType) {
>> +      case 
>> VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT:
>> +         /* Our driver doesn't need to know about this as we use a 
>> single
>> +          * binding table entry per stage if one of more inline 
>> descriptor
>> +          * blocks are used.
>> +          */
>> +         break;
>> +
>> +      default:
>> +         anv_debug_ignored_stype(ext->sType);
>> +         break;
>> +      }
>> +   }
>> +
>>      uint32_t descriptor_count = 0;
>>      uint32_t buffer_count = 0;
>> +   uint32_t inline_blocks_size = 0;
>>      for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
>>         switch (pCreateInfo->pPoolSizes[i].type) {
>> +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>> +         inline_blocks_size += 
>> pCreateInfo->pPoolSizes[i].descriptorCount;
>> +         break;
>>         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
>>         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>>         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
>>            buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
>> +         /* Fallthrough */
>>         default:
>>            descriptor_count += 
>> pCreateInfo->pPoolSizes[i].descriptorCount;
>>            break;
>>         }
>>      }
>>   +   /* We'll need one more descriptor for inline uniforms. */
>> +   if (inline_blocks_size > 0)
>> +      descriptor_count += MIN2(pCreateInfo->maxSets, 
>> inline_blocks_size / 4);
>> +
>>      const size_t pool_size =
>>         pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
>>         descriptor_count * sizeof(struct anv_descriptor) +
>> @@ -435,6 +513,34 @@ VkResult anv_CreateDescriptorPool(
>>      pool->next_set = 0;
>>      pool->free_set_list = EMPTY;
>>   +   pool->inline_blocks_size = inline_blocks_size;
>> +   pool->next_block = 0;
>> +   pool->free_block_list = EMPTY;
>> +
>> +   pool->inline_blocks_bo = NULL;
>> +   if (pool->inline_blocks_size > 0) {
>> +      struct anv_physical_device *pdevice = 
>> &device->instance->physicalDevice;
>> +      uint64_t bo_flags =
>> +         (pdevice->supports_48bit_addresses ? 
>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) |
>> +         (pdevice->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
>> +         (pdevice->use_softpin ? EXEC_OBJECT_PINNED : 0);
>> +
>> +      VkResult result = anv_bo_cache_alloc(device, &device->bo_cache,
>> + pool->inline_blocks_size,
>> +                                           bo_flags,
>> + &pool->inline_blocks_bo);
>> +      if (result != VK_SUCCESS)
>> +         goto fail_iubo_alloc;
>> +
>> +      uint32_t gem_flags = !device->info.has_llc ? I915_MMAP_WC : 0;
>> +      void *map = anv_gem_mmap(device, 
>> pool->inline_blocks_bo->gem_handle,
>> +                               0, pool->inline_blocks_bo->size, 
>> gem_flags);
>> +      if (map == MAP_FAILED)
>> +         goto fail_iubo_mmap;
>> +
>> +      pool->inline_blocks_bo->map = map;
>> +   }
>> +
>>      anv_state_stream_init(&pool->surface_state_stream,
>>                            &device->surface_state_pool, 4096);
>>      pool->surface_state_free_list = NULL;
>> @@ -442,6 +548,12 @@ VkResult anv_CreateDescriptorPool(
>>      *pDescriptorPool = anv_descriptor_pool_to_handle(pool);
>>        return VK_SUCCESS;
>> +
>> +fail_iubo_mmap:
>> +   anv_bo_cache_release(device, &device->bo_cache, 
>> pool->inline_blocks_bo);
>> +fail_iubo_alloc:
>> +   vk_free2(&device->alloc, pAllocator, pool);
>> +   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
>>   }
>>     void anv_DestroyDescriptorPool(
>> @@ -455,6 +567,8 @@ void anv_DestroyDescriptorPool(
>>      if (!pool)
>>         return;
>>   +   if (pool->inline_blocks_bo)
>> +      anv_bo_cache_release(device, &device->bo_cache, 
>> pool->inline_blocks_bo);
>> anv_state_stream_finish(&pool->surface_state_stream);
>>      vk_free2(&device->alloc, pAllocator, pool);
>>   }
>> @@ -469,6 +583,8 @@ VkResult anv_ResetDescriptorPool(
>>        pool->next_set = 0;
>>      pool->free_set_list = EMPTY;
>> +   pool->next_block = 0;
>> +   pool->free_block_list = EMPTY;
>> anv_state_stream_finish(&pool->surface_state_stream);
>>      anv_state_stream_init(&pool->surface_state_stream,
>>                            &device->surface_state_pool, 4096);
>> @@ -496,6 +612,36 @@ struct surface_state_free_list_entry {
>>      struct anv_state state;
>>   };
>>   +static struct anv_descriptor_set *
>> +anv_descriptor_inline_block_alloc(struct anv_descriptor_pool *pool,
>> +                                  struct anv_descriptor_set_layout 
>> *layout,
>> +                                  struct anv_descriptor_set *set)
>> +{
>> +   if (layout->inline_blocks_size == 0) {
>> +      set->inline_blocks = NULL;
>> +      return set;
>> +   }
>> +
>> +   if (layout->inline_blocks_size <= pool->inline_blocks_size - 
>> pool->next_block) {
>> +      set->inline_blocks = pool->inline_blocks_bo->map + 
>> pool->next_block;
>> +      pool->next_block += layout->inline_blocks_size;
>> +      return set;
>> +   }
>> +
>> +   struct pool_free_list_entry *entry;
>> +   uint32_t *link = &pool->free_block_list;
>> +   for (uint32_t f = pool->free_block_list; f != EMPTY; f = 
>> entry->next) {
>> +      entry = (struct pool_free_list_entry *) 
>> (pool->inline_blocks_bo->map + f);
>> +      if (layout->inline_blocks_size <= entry->size) {
>> +         *link = entry->next;
>> +         set->inline_blocks = entry;
>> +         return set;
>> +      }
>> +   }
>> +
>> +   return NULL;
>> +}
>> +
>>   static struct anv_descriptor_set *
>>   anv_descriptor_alloc(struct anv_descriptor_pool *pool,
>>                        struct anv_descriptor_set_layout *layout,
>> @@ -504,8 +650,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool 
>> *pool,
>>      struct anv_descriptor_set *set = NULL;
>>        if (size <= pool->size - pool->next_set) {
>> -      set = (struct anv_descriptor_set *) (pool->data + 
>> pool->next_set);
>> -      pool->next_set += size;
>> +      set = anv_descriptor_inline_block_alloc(pool, layout,
>> +         (struct anv_descriptor_set *) (pool->data + pool->next_set));
>> +      if (set)
>> +         pool->next_set += size;
>>         return set;
>>      }
>>   @@ -515,8 +663,10 @@ anv_descriptor_alloc(struct 
>> anv_descriptor_pool *pool,
>>         entry = (struct pool_free_list_entry *) (pool->data + f);
>>         if (size <= entry->size) {
>>            uint32_t next = entry->next;
>> -         set = (struct anv_descriptor_set *) entry;
>> -         *link = next;
>> +         set = anv_descriptor_inline_block_alloc(pool, layout,
>> +            (struct anv_descriptor_set *) entry);
>> +         if (set)
>> +            *link = next;
>>            return set;
>>         }
>>         link = &entry->next;
>> @@ -573,6 +723,18 @@ anv_descriptor_set_create(struct anv_device 
>> *device,
>>         desc += layout->binding[b].array_size;
>>      }
>>   +   /* Also fill the inline uniforms blocks if needed. */
>> +   if (layout->inline_blocks_descriptor_index >= 0) {
>> + set->descriptors[layout->inline_blocks_descriptor_index] = (struct 
>> anv_descriptor) {
>> +         .type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
>> +         .inline_address = (struct anv_address) {
>> +            .bo = pool->inline_blocks_bo,
>> +            .offset = set->inline_blocks - pool->inline_blocks_bo->map,
>> +         },
>> +         .inline_range = layout->inline_blocks_size,
>> +      };
>> +   }
>> +
>>      /* Allocate surface state for the buffer views. */
>>      for (uint32_t b = 0; b < layout->buffer_count; b++) {
>>         struct surface_state_free_list_entry *entry =
>> @@ -600,8 +762,6 @@ anv_descriptor_set_destroy(struct anv_device 
>> *device,
>>                              struct anv_descriptor_pool *pool,
>>                              struct anv_descriptor_set *set)
>>   {
>> -   anv_descriptor_set_layout_unref(device, set->layout);
>> -
>>      /* Put the buffer view surface state back on the free list. */
>>      for (uint32_t b = 0; b < set->buffer_count; b++) {
>>         struct surface_state_free_list_entry *entry =
>> @@ -611,7 +771,22 @@ anv_descriptor_set_destroy(struct anv_device 
>> *device,
>>         pool->surface_state_free_list = entry;
>>      }
>>   -   /* Put the descriptor set allocation back on the free list. */
>> +   /* Put the inline uniform blocks back on the free list. */
>> +   if (set->inline_blocks) {
>> +      const uint32_t index = set->inline_blocks - 
>> pool->inline_blocks_bo->map;
>> +      if (index + set->layout->inline_blocks_size == 
>> pool->next_block) {
>> +         pool->next_block = index;
>> +      } else {
>> +         struct pool_free_list_entry *entry = (struct 
>> pool_free_list_entry *) set->inline_blocks;
>> +         entry->next = pool->free_block_list;
>> +         entry->size = set->layout->inline_blocks_size;
>> +         pool->free_block_list = (char *) entry - pool->data;
>> +      }
>> +   }
>> +
>> +   anv_descriptor_set_layout_unref(device, set->layout);
>> +
>> +   /* Finally, put the descriptor set allocation back on the free 
>> list. */
>>      const uint32_t index = (char *) set - pool->data;
>>      if (index + set->size == pool->next_set) {
>>         pool->next_set = index;
>> @@ -798,6 +973,23 @@ anv_descriptor_set_write_buffer(struct 
>> anv_descriptor_set *set,
>>      }
>>   }
>>   +static void
>> +anv_descriptor_set_write_inline_uniforms(struct anv_descriptor_set 
>> *set,
>> +                                         VkDescriptorType type,
>> +                                         uint32_t binding,
>> +                                         uint32_t dst_offset,
>> +                                         const void *data,
>> +                                         uint32_t data_length)
>> +{
>> +   const struct anv_descriptor_set_binding_layout *bind_layout =
>> +      &set->layout->binding[binding];
>> +
>> +   assert(type == bind_layout->type);
>> +
>> +   memcpy(set->inline_blocks + bind_layout->inline_block_offset + 
>> dst_offset,
>> +          data, data_length);
>> +}
>> +
>>   void anv_UpdateDescriptorSets(
>>       VkDevice                                    _device,
>>       uint32_t descriptorWriteCount,
>> @@ -826,6 +1018,20 @@ void anv_UpdateDescriptorSets(
>>            }
>>            break;
>>   +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
>> +         const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
>> +            vk_find_struct_const(write->pNext,
>> + WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
>> +         assert(inline_ub);
>> +         anv_descriptor_set_write_inline_uniforms(set,
>> + write->descriptorType,
>> + write->dstBinding,
>> + write->dstArrayElement,
>> + inline_ub->pData,
>> + inline_ub->dataSize);
>> +         break;
>> +      }
>> +
>>         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
>>         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
>>            for (uint32_t j = 0; j < write->descriptorCount; j++) {
>> @@ -954,6 +1160,16 @@ anv_descriptor_set_write_template(struct 
>> anv_descriptor_set *set,
>>            }
>>            break;
>>   +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
>> +         anv_descriptor_set_write_inline_uniforms(set,
>> + entry->type,
>> + entry->binding,
>> + entry->array_element,
>> +                                                  data + entry->offset,
>> + entry->array_count);
>> +         break;
>> +      }
>> +
>>         default:
>>            break;
>>         }
>> diff --git a/src/intel/vulkan/anv_device.c 
>> b/src/intel/vulkan/anv_device.c
>> index 1e37876eb43..5bc9f7e7e88 100644
>> --- a/src/intel/vulkan/anv_device.c
>> +++ b/src/intel/vulkan/anv_device.c
>> @@ -942,6 +942,15 @@ void anv_GetPhysicalDeviceFeatures2(
>>            break;
>>         }
>>   +      case 
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
>> +         VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
>> +            (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
>> +
>> +         features->inlineUniformBlock = true;
>> + features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
>> +         break;
>> +      }
>> +
>>         default:
>>            anv_debug_ignored_stype(ext->sType);
>>            break;
>> @@ -1198,6 +1207,19 @@ void anv_GetPhysicalDeviceProperties2(
>>            break;
>>         }
>>   +      case 
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
>> +         VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
>> +            (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
>> +
>> +         /* All required minimum values. */
>> +         props->maxInlineUniformBlockSize = 
>> MAX_INLINE_UNIFORM_BLOCK_SIZE;
>> +         props->maxPerStageDescriptorInlineUniformBlocks = 
>> MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
>> + props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 4;
>> +         props->maxDescriptorSetInlineUniformBlocks = 4;
>> + props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 4;
>> +         break;
>> +      }
>> +
>>         default:
>>            anv_debug_ignored_stype(ext->sType);
>>            break;
>> diff --git a/src/intel/vulkan/anv_extensions.py 
>> b/src/intel/vulkan/anv_extensions.py
>> index 951505a854e..61803c9d7fa 100644
>> --- a/src/intel/vulkan/anv_extensions.py
>> +++ b/src/intel/vulkan/anv_extensions.py
>> @@ -125,6 +125,7 @@ EXTENSIONS = [
>>       Extension('VK_EXT_vertex_attribute_divisor',          3, True),
>>       Extension('VK_EXT_post_depth_coverage',               1, 
>> 'device->info.gen >= 9'),
>>       Extension('VK_EXT_sampler_filter_minmax',             1, 
>> 'device->info.gen >= 9'),
>> +    Extension('VK_EXT_inline_uniform_block',              1, True),
>>   ]
>>     class VkVersion:
>> diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
>> b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>> index 856101cc2ff..75bf33806f9 100644
>> --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>> +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>> @@ -160,6 +160,22 @@ lower_res_index_intrinsic(nir_intrinsic_instr 
>> *intrin,
>>         block_index = nir_iadd(b, nir_imm_int(b, surface_index), 
>> block_index);
>>      }
>>   +   /* We use a single binding table entry for all inline uniform 
>> blocks. That
>> +    * means each block is at a different offset in that entry. We 
>> can get this
>> +    * offset from the layout using (set, binding) but we need to add 
>> that
>> +    * offset into the actual load_ubo intrinsic.
>> +    *
>> +    * Here instead of just setting the block index we set a tuple
>> +    * (block_index, inline_block_offset) which will be replace when 
>> run into a
>> +    * load_ubo intrinsic (see lower_inline_uniform_block function).
>> +    */
>> +   uint32_t inline_block_offset =
>> + state->layout->set[set].layout->binding[binding].inline_block_offset;
>> +   if (inline_block_offset != -1) {
>> +      block_index = nir_vec2(b, block_index,
>> +                             nir_imm_int(b, inline_block_offset));
>> +   }
>> +
>>      assert(intrin->dest.is_ssa);
>>      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 
>> nir_src_for_ssa(block_index));
>>      nir_instr_remove(&intrin->instr);
>> @@ -268,6 +284,39 @@ lower_load_constant(nir_intrinsic_instr *intrin,
>>      nir_instr_remove(&intrin->instr);
>>   }
>>   +static void
>> +lower_inline_uniform_block(nir_intrinsic_instr *intrin,
>> +                           struct apply_pipeline_layout_state *state)
>> +{
>> +   if (!intrin->src[0].ssa->parent_instr ||
>> +       intrin->src[0].ssa->parent_instr->type != nir_instr_type_alu)
>> +      return;
>> +
>> +   nir_alu_instr *alu = 
>> nir_instr_as_alu(intrin->src[0].ssa->parent_instr);
>> +   if (alu->op != nir_op_vec2)
>> +      return;
>> +
>> +   nir_builder *b = &state->builder;
>> +   b->cursor = nir_before_instr(&intrin->instr);
>> +
>> +   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 
>> alu->src[0].src);
>> +
>> +   nir_const_value *const_inline_block_offset =
>> +      nir_src_as_const_value(alu->src[1].src);
>> +   assert(const_inline_block_offset);
>> +
>> +   nir_const_value *const_offset = 
>> nir_src_as_const_value(intrin->src[1]);
>> +   nir_ssa_def *offset;
>> +   if (const_offset) {
>> +      offset = nir_iadd(b, nir_imm_int(b, const_offset->u32[0]),
>> +                        nir_imm_int(b, 
>> const_inline_block_offset->u32[0]));
>> +   } else {
>> +      offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[1], 1),
>> +                        nir_imm_int(b, 
>> const_inline_block_offset->u32[0]));
>> +   }
>> +   nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], 
>> nir_src_for_ssa(offset));
>> +}
>> +
>>   static void
>>   lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
>>                   unsigned *base_index,
>> @@ -401,6 +450,9 @@ apply_pipeline_layout_block(nir_block *block,
>>            case nir_intrinsic_load_constant:
>>               lower_load_constant(intrin, state);
>>               break;
>> +         case nir_intrinsic_load_ubo:
>> +            lower_inline_uniform_block(intrin, state);
>> +            break;
>>            default:
>>               break;
>>            }
>> diff --git a/src/intel/vulkan/anv_private.h 
>> b/src/intel/vulkan/anv_private.h
>> index 372b7c69635..cea8e5786f5 100644
>> --- a/src/intel/vulkan/anv_private.h
>> +++ b/src/intel/vulkan/anv_private.h
>> @@ -159,6 +159,8 @@ struct gen_l3_config;
>>   #define MAX_DYNAMIC_BUFFERS 16
>>   #define MAX_IMAGES 8
>>   #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
>> +#define MAX_INLINE_UNIFORM_BLOCK_SIZE 256 /* Minimum requirement */
>> +#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 4 /* Minimum 
>> requirement */
>>     /* The kernel relocation API has a limitation of a 32-bit delta 
>> value
>>    * applied to the address before it is written which, in spite of 
>> it being
>> @@ -1394,6 +1396,16 @@ struct anv_descriptor_set_binding_layout {
>>      /* Index into the descriptor set buffer views */
>>      int16_t buffer_index;
>>   +   /* TODO/question: should we union fields a bit? inline uniform 
>> blocks have
>> +    * no use for array_size, buffer_index & dynamic_offset_index.
>> +    */
>> +
>> +   /* Offset into the portion of data allocated for the inline 
>> uniforms. */
>> +   uint32_t inline_block_offset;
>> +
>> +   /* Length of the portion of data allocated for inline uniforms */
>> +   uint32_t inline_block_length;
>> +
>>      struct {
>>         /* Index into the binding table for the associated surface */
>>         int16_t surface_index;
>> @@ -1428,6 +1440,15 @@ struct anv_descriptor_set_layout {
>>      /* Number of dynamic offsets used by this descriptor set */
>>      uint16_t dynamic_offset_count;
>>   +   /* Index into the flattend descriptor set (-1 if unused). */
>> +   int16_t inline_blocks_descriptor_index;
>> +
>> +   /* Data to allocate into the pool descriptor's inline uniforms BO */
>> +   uint32_t inline_blocks_size;
>> +
>> +   /* Index into the binding table for the associated surface */
>> +   int16_t inline_blocks_surface_indexes[MESA_SHADER_STAGES];
>> +
>>      /* Bindings in this descriptor set */
>>      struct anv_descriptor_set_binding_layout binding[0];
>>   };
>> @@ -1464,6 +1485,11 @@ struct anv_descriptor {
>>            uint64_t range;
>>         };
>>   +      struct {
>> +         struct anv_address inline_address;
>> +         uint64_t inline_range;
>> +      };
>> +
>>         struct anv_buffer_view *buffer_view;
>>      };
>>   };
>> @@ -1472,6 +1498,7 @@ struct anv_descriptor_set {
>>      struct anv_descriptor_set_layout *layout;
>>      uint32_t size;
>>      uint32_t buffer_count;
>> +   void *inline_blocks;
>>      struct anv_buffer_view *buffer_views;
>>      struct anv_descriptor descriptors[0];
>>   };
>> @@ -1507,6 +1534,12 @@ struct anv_descriptor_pool {
>>      struct anv_state_stream surface_state_stream;
>>      void *surface_state_free_list;
>>   +   struct anv_bo *inline_blocks_bo;
>> +   uint32_t inline_blocks_size;
>> +
>> +   uint32_t free_block_list;
>> +   uint32_t next_block;
>> +
>>      char data[0];
>>   };
>>   diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
>> b/src/intel/vulkan/genX_cmd_buffer.c
>> index 80bebf5a12c..5cb4c0f13af 100644
>> --- a/src/intel/vulkan/genX_cmd_buffer.c
>> +++ b/src/intel/vulkan/genX_cmd_buffer.c
>> @@ -2146,6 +2146,19 @@ emit_binding_table(struct anv_cmd_buffer 
>> *cmd_buffer,
>>                              desc->buffer_view->address);
>>            break;
>>   +      case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
>> +         surface_state =
>> + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
>> +         enum isl_format format =
>> +            anv_isl_format_for_descriptor_type(desc->type);
>> +
>> +         anv_fill_buffer_surface_state(cmd_buffer->device, 
>> surface_state,
>> +                                       format, desc->inline_address,
>> +                                       desc->inline_range, 1);
>> +         add_surface_reloc(cmd_buffer, surface_state, 
>> desc->inline_address);
>> +         break;
>> +      }
>> +
>>         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>>         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
>>            /* Compute the offset within the buffer */
>> @@ -2445,14 +2458,15 @@ cmd_buffer_flush_push_constants(struct 
>> anv_cmd_buffer *cmd_buffer,
>>                     const struct anv_descriptor *desc =
>> anv_descriptor_for_binding(&gfx_state->base, binding);
>>   -                  if (desc->type == 
>> VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
>> +                  switch (desc->type) {
>> +                  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>                        read_len = MIN2(range->length,
>> DIV_ROUND_UP(desc->buffer_view->range, 32) - range->start);
>>                        read_addr = 
>> anv_address_add(desc->buffer_view->address,
>> range->start * 32);
>> -                  } else {
>> -                     assert(desc->type == 
>> VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
>> +                     break;
>>   +                  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
>>                        uint32_t dynamic_offset =
>> dynamic_offset_for_binding(&gfx_state->base, binding);
>>                        uint32_t buf_offset =
>> @@ -2464,6 +2478,18 @@ cmd_buffer_flush_push_constants(struct 
>> anv_cmd_buffer *cmd_buffer,
>>                           DIV_ROUND_UP(buf_range, 32) - range->start);
>>                        read_addr = 
>> anv_address_add(desc->buffer->address,
>>                                                    buf_offset + 
>> range->start * 32);
>> +                     break;
>> +                  }
>> +
>> +                  case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>> +                     read_len = MIN2(range->length,
>> +                        DIV_ROUND_UP(desc->inline_range, 32) - 
>> range->start);
>> +                     read_addr = anv_address_add(desc->inline_address,
>> + range->start * 32);
>> +                     break;
>> +
>> +                  default:
>> +                     unreachable("Invalid descriptor");
>>                     }
>>                  }
>>
>



More information about the mesa-dev mailing list