[Mesa-dev] [PATCH 2/2] anv: add support for VK_EXT_inline_uniform_block
Tapani Pälli
tapani.palli at intel.com
Thu Oct 4 06:45:57 UTC 2018
On 9/17/18 1:43 AM, Lionel Landwerlin wrote:
> On 16/09/2018 21:57, Bas Nieuwenhuizen wrote:
>> On Tue, Sep 11, 2018 at 10:23 PM Lionel Landwerlin
>> <lionel.g.landwerlin at intel.com> wrote:
>>> This new extension adds an implicitly allocated block of uniforms into
>>> the descriptors sets through a new descriptor type.
>>>
>>> We implement this by having a single BO in the descriptor set pool
>>> from which we source uniforms.
>>>
>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>>> ---
>>> src/intel/vulkan/anv_cmd_buffer.c | 3 +
>>> src/intel/vulkan/anv_descriptor_set.c | 238 +++++++++++++++++-
>>> src/intel/vulkan/anv_device.c | 22 ++
>>> src/intel/vulkan/anv_extensions.py | 1 +
>>> .../vulkan/anv_nir_apply_pipeline_layout.c | 52 ++++
>>> src/intel/vulkan/anv_private.h | 33 +++
>>> src/intel/vulkan/genX_cmd_buffer.c | 32 ++-
>>> 7 files changed, 367 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/src/intel/vulkan/anv_cmd_buffer.c
>>> b/src/intel/vulkan/anv_cmd_buffer.c
>>> index 8ef71b0ed9c..b14be94f470 100644
>>> --- a/src/intel/vulkan/anv_cmd_buffer.c
>>> +++ b/src/intel/vulkan/anv_cmd_buffer.c
>>> @@ -651,6 +651,7 @@
>>> anv_isl_format_for_descriptor_type(VkDescriptorType type)
>>> switch (type) {
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>> return ISL_FORMAT_R32G32B32A32_FLOAT;
>>>
>>> case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
>>> @@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR(
>>> }
>>> break;
>>>
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>> + unreachable("Invalid descriptor type for push descriptors");
>>> default:
>>> break;
>>> }
>>> diff --git a/src/intel/vulkan/anv_descriptor_set.c
>>> b/src/intel/vulkan/anv_descriptor_set.c
>>> index 3439f828900..2e5f2a1f288 100644
>>> --- a/src/intel/vulkan/anv_descriptor_set.c
>>> +++ b/src/intel/vulkan/anv_descriptor_set.c
>>> @@ -26,8 +26,10 @@
>>> #include <string.h>
>>> #include <unistd.h>
>>> #include <fcntl.h>
>>> +#include <sys/mman.h>
>>>
>>> #include "util/mesa-sha1.h"
>>> +#include "vk_util.h"
>>>
>>> #include "anv_private.h"
>>>
>>> @@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport(
>>> const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
>>> VkDescriptorSetLayoutSupport* pSupport)
>>> {
>>> - uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
>>> + int16_t surface_count[MESA_SHADER_STAGES] = { 0, };
>>> + int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, };
>>>
>>> for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) {
>>> const VkDescriptorSetLayoutBinding *binding =
>>> &pCreateInfo->pBindings[b];
>>> @@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport(
>>> /* There is no real limit on samplers */
>>> break;
>>>
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>> + anv_foreach_stage(s, binding->stageFlags) {
>>> + if (inline_surface_indexes[s] < 0) {
>>> + inline_surface_indexes[s] = surface_count[s];
>>> + surface_count[s] += 1;
>>> + }
>>> + }
>>> + break;
>>> +
>>> case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
>>> if (binding->pImmutableSamplers) {
>>> for (uint32_t i = 0; i < binding->descriptorCount; i++) {
>>> @@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout(
>>> memset(set_layout, 0, sizeof(*set_layout));
>>> set_layout->ref_cnt = 1;
>>> set_layout->binding_count = max_binding + 1;
>>> + set_layout->inline_blocks_descriptor_index = -1;
>>> + memset(set_layout->inline_blocks_surface_indexes,
>>> + -1, sizeof(set_layout->inline_blocks_surface_indexes));
>>>
>>> for (uint32_t b = 0; b <= max_binding; b++) {
>>> /* Initialize all binding_layout entries to -1 */
>>> @@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout(
>>> #ifndef NDEBUG
>>> set_layout->binding[b].type = binding->descriptorType;
>>> #endif
>>> - set_layout->binding[b].array_size = binding->descriptorCount;
>>> - set_layout->binding[b].descriptor_index = set_layout->size;
>>> - set_layout->size += binding->descriptorCount;
>>> +
>>> + if (binding->descriptorType ==
>>> VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
>>> + /* We only a single descriptor entry for all the inline
>>> uniforms. */
>>> + set_layout->binding[b].array_size = 1;
>>> + if (set_layout->inline_blocks_descriptor_index < 0) {
>>> + set_layout->binding[b].descriptor_index =
>>> + set_layout->inline_blocks_descriptor_index =
>>> + set_layout->size;
>>> + set_layout->size += 1;
>>> + } else {
>>> + set_layout->binding[b].descriptor_index =
>>> + set_layout->inline_blocks_descriptor_index;
>>> + }
>>> + } else {
>>> + set_layout->binding[b].array_size = binding->descriptorCount;
>>> + set_layout->binding[b].descriptor_index = set_layout->size;
>>> + set_layout->size += binding->descriptorCount;
>>> + }
>>>
>>> switch (binding->descriptorType) {
>>> case VK_DESCRIPTOR_TYPE_SAMPLER:
>>> @@ -176,6 +206,24 @@ VkResult anv_CreateDescriptorSetLayout(
>>> }
>>>
>>> switch (binding->descriptorType) {
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>> + set_layout->binding[b].inline_block_offset =
>>> set_layout->inline_blocks_size;
>>> + set_layout->binding[b].inline_block_length =
>>> binding->descriptorCount;
>>> + set_layout->inline_blocks_size += binding->descriptorCount;
>>> +
>>> + anv_foreach_stage(s, binding->stageFlags) {
>>> + if (set_layout->inline_blocks_surface_indexes[s] < 0) {
>>> + set_layout->binding[b].stage[s].surface_index =
>>> + set_layout->inline_blocks_surface_indexes[s] =
>>> + surface_count[s];
>>> + surface_count[s] += 1;
>>> + } else {
>>> + set_layout->binding[b].stage[s].surface_index =
>>> + set_layout->inline_blocks_surface_indexes[s];
>>> + }
>>> + }
>>> + break;
>>> +
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>> case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>>> @@ -235,6 +283,12 @@ VkResult anv_CreateDescriptorSetLayout(
>>> set_layout->shader_stages |= binding->stageFlags;
>>> }
>>>
>>> + /* Align inline uniforms total size to 32 because we source the
>>> allocation
>>> + * from a single BO in the descriptor set pool and we want the
>>> alignment to
>>> + * match the push constant alignment constraint.
>>> + */
>>> + set_layout->inline_blocks_size =
>>> ALIGN(set_layout->inline_blocks_size, 32);
>>> +
>>> set_layout->buffer_count = buffer_count;
>>> set_layout->dynamic_offset_count = dynamic_offset_count;
>>>
>>> @@ -405,21 +459,45 @@ VkResult anv_CreateDescriptorPool(
>>> ANV_FROM_HANDLE(anv_device, device, _device);
>>> struct anv_descriptor_pool *pool;
>>>
>>> + vk_foreach_struct(ext, pCreateInfo->pNext) {
>>> + switch (ext->sType) {
>>> + case
>>> VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT:
>>> + /* Our driver doesn't need to know about this as we use a
>>> single
>>> + * binding table entry per stage if one of more inline
>>> descriptor
>>> + * blocks are used.
>>> + */
>>> + break;
>>> +
>>> + default:
>>> + anv_debug_ignored_stype(ext->sType);
>>> + break;
>>> + }
>>> + }
>>> +
>>> uint32_t descriptor_count = 0;
>>> uint32_t buffer_count = 0;
>>> + uint32_t inline_blocks_size = 0;
>>> for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
>>> switch (pCreateInfo->pPoolSizes[i].type) {
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>> + inline_blocks_size +=
>>> pCreateInfo->pPoolSizes[i].descriptorCount;
>>> + break;
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>> case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>>> case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
>>> buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
>>> + /* Fallthrough */
>>> default:
>>> descriptor_count +=
>>> pCreateInfo->pPoolSizes[i].descriptorCount;
>>> break;
>>> }
>>> }
>>>
>>> + /* We'll need one more descriptor for inline uniforms. */
>>> + if (inline_blocks_size > 0)
>>> + descriptor_count += MIN2(pCreateInfo->maxSets,
>>> inline_blocks_size / 4);
>>> +
>>> const size_t pool_size =
>>> pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
>>> descriptor_count * sizeof(struct anv_descriptor) +
>>> @@ -435,6 +513,34 @@ VkResult anv_CreateDescriptorPool(
>>> pool->next_set = 0;
>>> pool->free_set_list = EMPTY;
>>>
>>> + pool->inline_blocks_size = inline_blocks_size;
>>> + pool->next_block = 0;
>>> + pool->free_block_list = EMPTY;
>>> +
>>> + pool->inline_blocks_bo = NULL;
>>> + if (pool->inline_blocks_size > 0) {
>>> + struct anv_physical_device *pdevice =
>>> &device->instance->physicalDevice;
>>> + uint64_t bo_flags =
>>> + (pdevice->supports_48bit_addresses ?
>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) |
>>> + (pdevice->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
>>> + (pdevice->use_softpin ? EXEC_OBJECT_PINNED : 0);
>>> +
>>> + VkResult result = anv_bo_cache_alloc(device, &device->bo_cache,
>>> + pool->inline_blocks_size,
>>> + bo_flags,
>>> + &pool->inline_blocks_bo);
>>> + if (result != VK_SUCCESS)
>>> + goto fail_iubo_alloc;
>>> +
>>> + uint32_t gem_flags = !device->info.has_llc ? I915_MMAP_WC : 0;
>>> + void *map = anv_gem_mmap(device,
>>> pool->inline_blocks_bo->gem_handle,
>>> + 0, pool->inline_blocks_bo->size,
>>> gem_flags);
>>> + if (map == MAP_FAILED)
>>> + goto fail_iubo_mmap;
>>> +
>>> + pool->inline_blocks_bo->map = map;
>>> + }
>>> +
>>> anv_state_stream_init(&pool->surface_state_stream,
>>> &device->surface_state_pool, 4096);
>>> pool->surface_state_free_list = NULL;
>>> @@ -442,6 +548,12 @@ VkResult anv_CreateDescriptorPool(
>>> *pDescriptorPool = anv_descriptor_pool_to_handle(pool);
>>>
>>> return VK_SUCCESS;
>>> +
>>> +fail_iubo_mmap:
>>> + anv_bo_cache_release(device, &device->bo_cache,
>>> pool->inline_blocks_bo);
>>> +fail_iubo_alloc:
>>> + vk_free2(&device->alloc, pAllocator, pool);
>>> + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
>>> }
>>>
>>> void anv_DestroyDescriptorPool(
>>> @@ -455,6 +567,8 @@ void anv_DestroyDescriptorPool(
>>> if (!pool)
>>> return;
>>>
>>> + if (pool->inline_blocks_bo)
>>> + anv_bo_cache_release(device, &device->bo_cache,
>>> pool->inline_blocks_bo);
>>> anv_state_stream_finish(&pool->surface_state_stream);
>>> vk_free2(&device->alloc, pAllocator, pool);
>>> }
>>> @@ -469,6 +583,8 @@ VkResult anv_ResetDescriptorPool(
>>>
>>> pool->next_set = 0;
>>> pool->free_set_list = EMPTY;
>>> + pool->next_block = 0;
>>> + pool->free_block_list = EMPTY;
>>> anv_state_stream_finish(&pool->surface_state_stream);
>>> anv_state_stream_init(&pool->surface_state_stream,
>>> &device->surface_state_pool, 4096);
>>> @@ -496,6 +612,36 @@ struct surface_state_free_list_entry {
>>> struct anv_state state;
>>> };
>>>
>>> +static struct anv_descriptor_set *
>>> +anv_descriptor_inline_block_alloc(struct anv_descriptor_pool *pool,
>>> + struct anv_descriptor_set_layout
>>> *layout,
>>> + struct anv_descriptor_set *set)
>>> +{
>>> + if (layout->inline_blocks_size == 0) {
>>> + set->inline_blocks = NULL;
>>> + return set;
>>> + }
>>> +
>>> + if (layout->inline_blocks_size <= pool->inline_blocks_size -
>>> pool->next_block) {
>>> + set->inline_blocks = pool->inline_blocks_bo->map +
>>> pool->next_block;
>>> + pool->next_block += layout->inline_blocks_size;
>>> + return set;
>>> + }
>>> +
>>> + struct pool_free_list_entry *entry;
>>> + uint32_t *link = &pool->free_block_list;
>>> + for (uint32_t f = pool->free_block_list; f != EMPTY; f =
>>> entry->next) {
>>> + entry = (struct pool_free_list_entry *)
>>> (pool->inline_blocks_bo->map + f);
>>> + if (layout->inline_blocks_size <= entry->size) {
>>> + *link = entry->next;
>>> + set->inline_blocks = entry;
>>> + return set;
>>> + }
>>> + }
>>> +
>>> + return NULL;
>>> +}
>>> +
>>> static struct anv_descriptor_set *
>>> anv_descriptor_alloc(struct anv_descriptor_pool *pool,
>>> struct anv_descriptor_set_layout *layout,
>>> @@ -504,8 +650,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool
>>> *pool,
>>> struct anv_descriptor_set *set = NULL;
>>>
>>> if (size <= pool->size - pool->next_set) {
>>> - set = (struct anv_descriptor_set *) (pool->data +
>>> pool->next_set);
>>> - pool->next_set += size;
>>> + set = anv_descriptor_inline_block_alloc(pool, layout,
>>> + (struct anv_descriptor_set *) (pool->data + pool->next_set));
>>> + if (set)
>>> + pool->next_set += size;
>>> return set;
>>> }
>>>
>>> @@ -515,8 +663,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool
>>> *pool,
>>> entry = (struct pool_free_list_entry *) (pool->data + f);
>>> if (size <= entry->size) {
>>> uint32_t next = entry->next;
>>> - set = (struct anv_descriptor_set *) entry;
>>> - *link = next;
>>> + set = anv_descriptor_inline_block_alloc(pool, layout,
>>> + (struct anv_descriptor_set *) entry);
>>> + if (set)
>>> + *link = next;
>>> return set;
>>> }
>>> link = &entry->next;
>>> @@ -573,6 +723,18 @@ anv_descriptor_set_create(struct anv_device
>>> *device,
>>> desc += layout->binding[b].array_size;
>>> }
>>>
>>> + /* Also fill the inline uniforms blocks if needed. */
>>> + if (layout->inline_blocks_descriptor_index >= 0) {
>>> + set->descriptors[layout->inline_blocks_descriptor_index] =
>>> (struct anv_descriptor) {
>>> + .type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
>>> + .inline_address = (struct anv_address) {
>>> + .bo = pool->inline_blocks_bo,
>>> + .offset = set->inline_blocks - pool->inline_blocks_bo->map,
>>> + },
>>> + .inline_range = layout->inline_blocks_size,
>>> + };
>>> + }
>>> +
>>> /* Allocate surface state for the buffer views. */
>>> for (uint32_t b = 0; b < layout->buffer_count; b++) {
>>> struct surface_state_free_list_entry *entry =
>>> @@ -600,8 +762,6 @@ anv_descriptor_set_destroy(struct anv_device
>>> *device,
>>> struct anv_descriptor_pool *pool,
>>> struct anv_descriptor_set *set)
>>> {
>>> - anv_descriptor_set_layout_unref(device, set->layout);
>>> -
>>> /* Put the buffer view surface state back on the free list. */
>>> for (uint32_t b = 0; b < set->buffer_count; b++) {
>>> struct surface_state_free_list_entry *entry =
>>> @@ -611,7 +771,22 @@ anv_descriptor_set_destroy(struct anv_device
>>> *device,
>>> pool->surface_state_free_list = entry;
>>> }
>>>
>>> - /* Put the descriptor set allocation back on the free list. */
>>> + /* Put the inline uniform blocks back on the free list. */
>>> + if (set->inline_blocks) {
>>> + const uint32_t index = set->inline_blocks -
>>> pool->inline_blocks_bo->map;
>>> + if (index + set->layout->inline_blocks_size ==
>>> pool->next_block) {
>>> + pool->next_block = index;
>>> + } else {
>>> + struct pool_free_list_entry *entry = (struct
>>> pool_free_list_entry *) set->inline_blocks;
>>> + entry->next = pool->free_block_list;
>>> + entry->size = set->layout->inline_blocks_size;
>>> + pool->free_block_list = (char *) entry - pool->data;
>>> + }
>>> + }
>>> +
>>> + anv_descriptor_set_layout_unref(device, set->layout);
>>> +
>>> + /* Finally, put the descriptor set allocation back on the free
>>> list. */
>>> const uint32_t index = (char *) set - pool->data;
>>> if (index + set->size == pool->next_set) {
>>> pool->next_set = index;
>>> @@ -798,6 +973,23 @@ anv_descriptor_set_write_buffer(struct
>>> anv_descriptor_set *set,
>>> }
>>> }
>>>
>>> +static void
>>> +anv_descriptor_set_write_inline_uniforms(struct anv_descriptor_set
>>> *set,
>>> + VkDescriptorType type,
>>> + uint32_t binding,
>>> + uint32_t dst_offset,
>>> + const void *data,
>>> + uint32_t data_length)
>>> +{
>>> + const struct anv_descriptor_set_binding_layout *bind_layout =
>>> + &set->layout->binding[binding];
>>> +
>>> + assert(type == bind_layout->type);
>>> +
>>> + memcpy(set->inline_blocks + bind_layout->inline_block_offset +
>>> dst_offset,
>>> + data, data_length);
>>> +}
>>> +
>>> void anv_UpdateDescriptorSets(
>>> VkDevice _device,
>>> uint32_t descriptorWriteCount,
>>> @@ -826,6 +1018,20 @@ void anv_UpdateDescriptorSets(
>>> }
>>> break;
>>>
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
>>> + const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
>>> + vk_find_struct_const(write->pNext,
>>> +
>>> WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
>>> + assert(inline_ub);
>>> + anv_descriptor_set_write_inline_uniforms(set,
>>> +
>>> write->descriptorType,
>>> + write->dstBinding,
>>> +
>>> write->dstArrayElement,
>>> + inline_ub->pData,
>>> + inline_ub->dataSize);
>>> + break;
>>> + }
>>> +
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
>>> case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
>>> for (uint32_t j = 0; j < write->descriptorCount; j++) {
>>> @@ -954,6 +1160,16 @@ anv_descriptor_set_write_template(struct
>>> anv_descriptor_set *set,
>>> }
>>> break;
>>>
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
>>> + anv_descriptor_set_write_inline_uniforms(set,
>>> + entry->type,
>>> + entry->binding,
>>> + entry->array_element,
>>> + data + entry->offset,
>>> + entry->array_count);
>> Where in the spec did you find that the data is specified directly
>> instead of a VkWriteDescriptorSetInlineUniformBlockEXT struct?
>
>
> It's not well stated, but what else could the data be about?
>
> The description of the fields in VkDescriptorUpdateTemplateEntryKHR was
> the best hint for me.
>
FWIW this makes some of the dEQP-VK.binding_model.descriptorset_random*
pass so I think vk-gl-cts utilizes it this way.
I agree it seems strange (at least from user POV) that the usage is not
similar to vkUpdateDescriptorSets and using
VkWriteDescriptorSetInlineUniformBlockEXT like would expect from extension.
>
>
>>
>>> + break;
>>> + }
>>> +
>>> default:
>>> break;
>>> }
>>> diff --git a/src/intel/vulkan/anv_device.c
>>> b/src/intel/vulkan/anv_device.c
>>> index 1e37876eb43..5bc9f7e7e88 100644
>>> --- a/src/intel/vulkan/anv_device.c
>>> +++ b/src/intel/vulkan/anv_device.c
>>> @@ -942,6 +942,15 @@ void anv_GetPhysicalDeviceFeatures2(
>>> break;
>>> }
>>>
>>> + case
>>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
>>> + VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
>>> + (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
>>> +
>>> + features->inlineUniformBlock = true;
>>> +
>>> features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
>>> + break;
>>> + }
>>> +
>>> default:
>>> anv_debug_ignored_stype(ext->sType);
>>> break;
>>> @@ -1198,6 +1207,19 @@ void anv_GetPhysicalDeviceProperties2(
>>> break;
>>> }
>>>
>>> + case
>>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
>>> + VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
>>> + (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
>>> +
>>> + /* All required minimum values. */
>>> + props->maxInlineUniformBlockSize =
>>> MAX_INLINE_UNIFORM_BLOCK_SIZE;
>>> + props->maxPerStageDescriptorInlineUniformBlocks =
>>> MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
>>> +
>>> props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 4;
>>> + props->maxDescriptorSetInlineUniformBlocks = 4;
>>> + props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 4;
>>> + break;
>>> + }
>>> +
>>> default:
>>> anv_debug_ignored_stype(ext->sType);
>>> break;
>>> diff --git a/src/intel/vulkan/anv_extensions.py
>>> b/src/intel/vulkan/anv_extensions.py
>>> index 951505a854e..61803c9d7fa 100644
>>> --- a/src/intel/vulkan/anv_extensions.py
>>> +++ b/src/intel/vulkan/anv_extensions.py
>>> @@ -125,6 +125,7 @@ EXTENSIONS = [
>>> Extension('VK_EXT_vertex_attribute_divisor', 3, True),
>>> Extension('VK_EXT_post_depth_coverage', 1,
>>> 'device->info.gen >= 9'),
>>> Extension('VK_EXT_sampler_filter_minmax', 1,
>>> 'device->info.gen >= 9'),
>>> + Extension('VK_EXT_inline_uniform_block', 1, True),
>>> ]
>>>
>>> class VkVersion:
>>> diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>>> b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>>> index 856101cc2ff..75bf33806f9 100644
>>> --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>>> +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
>>> @@ -160,6 +160,22 @@ lower_res_index_intrinsic(nir_intrinsic_instr
>>> *intrin,
>>> block_index = nir_iadd(b, nir_imm_int(b, surface_index),
>>> block_index);
>>> }
>>>
>>> + /* We use a single binding table entry for all inline uniform
>>> blocks. That
>>> + * means each block is at a different offset in that entry. We
>>> can get this
>>> + * offset from the layout using (set, binding) but we need to add
>>> that
>>> + * offset into the actual load_ubo intrinsic.
>>> + *
>>> + * Here instead of just setting the block index we set a tuple
>>> + * (block_index, inline_block_offset) which will be replace when
>>> run into a
>>> + * load_ubo intrinsic (see lower_inline_uniform_block function).
>>> + */
>>> + uint32_t inline_block_offset =
>>> +
>>> state->layout->set[set].layout->binding[binding].inline_block_offset;
>>> + if (inline_block_offset != -1) {
>>> + block_index = nir_vec2(b, block_index,
>>> + nir_imm_int(b, inline_block_offset));
>>> + }
>>> +
>>> assert(intrin->dest.is_ssa);
>>> nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>>> nir_src_for_ssa(block_index));
>>> nir_instr_remove(&intrin->instr);
>>> @@ -268,6 +284,39 @@ lower_load_constant(nir_intrinsic_instr *intrin,
>>> nir_instr_remove(&intrin->instr);
>>> }
>>>
>>> +static void
>>> +lower_inline_uniform_block(nir_intrinsic_instr *intrin,
>>> + struct apply_pipeline_layout_state *state)
>>> +{
>>> + if (!intrin->src[0].ssa->parent_instr ||
>>> + intrin->src[0].ssa->parent_instr->type != nir_instr_type_alu)
>>> + return;
>>> +
>>> + nir_alu_instr *alu =
>>> nir_instr_as_alu(intrin->src[0].ssa->parent_instr);
>>> + if (alu->op != nir_op_vec2)
>>> + return;
>>> +
>>> + nir_builder *b = &state->builder;
>>> + b->cursor = nir_before_instr(&intrin->instr);
>>> +
>>> + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
>>> alu->src[0].src);
>>> +
>>> + nir_const_value *const_inline_block_offset =
>>> + nir_src_as_const_value(alu->src[1].src);
>>> + assert(const_inline_block_offset);
>>> +
>>> + nir_const_value *const_offset =
>>> nir_src_as_const_value(intrin->src[1]);
>>> + nir_ssa_def *offset;
>>> + if (const_offset) {
>>> + offset = nir_iadd(b, nir_imm_int(b, const_offset->u32[0]),
>>> + nir_imm_int(b,
>>> const_inline_block_offset->u32[0]));
>>> + } else {
>>> + offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[1], 1),
>>> + nir_imm_int(b,
>>> const_inline_block_offset->u32[0]));
>>> + }
>>> + nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
>>> nir_src_for_ssa(offset));
>>> +}
>>> +
>>> static void
>>> lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
>>> unsigned *base_index,
>>> @@ -401,6 +450,9 @@ apply_pipeline_layout_block(nir_block *block,
>>> case nir_intrinsic_load_constant:
>>> lower_load_constant(intrin, state);
>>> break;
>>> + case nir_intrinsic_load_ubo:
>>> + lower_inline_uniform_block(intrin, state);
>>> + break;
>>> default:
>>> break;
>>> }
>>> diff --git a/src/intel/vulkan/anv_private.h
>>> b/src/intel/vulkan/anv_private.h
>>> index 372b7c69635..cea8e5786f5 100644
>>> --- a/src/intel/vulkan/anv_private.h
>>> +++ b/src/intel/vulkan/anv_private.h
>>> @@ -159,6 +159,8 @@ struct gen_l3_config;
>>> #define MAX_DYNAMIC_BUFFERS 16
>>> #define MAX_IMAGES 8
>>> #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
>>> +#define MAX_INLINE_UNIFORM_BLOCK_SIZE 256 /* Minimum requirement */
>>> +#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 4 /* Minimum
>>> requirement */
>>>
>>> /* The kernel relocation API has a limitation of a 32-bit delta value
>>> * applied to the address before it is written which, in spite of
>>> it being
>>> @@ -1394,6 +1396,16 @@ struct anv_descriptor_set_binding_layout {
>>> /* Index into the descriptor set buffer views */
>>> int16_t buffer_index;
>>>
>>> + /* TODO/question: should we union fields a bit? inline uniform
>>> blocks have
>>> + * no use for array_size, buffer_index & dynamic_offset_index.
>>> + */
>>> +
>>> + /* Offset into the portion of data allocated for the inline
>>> uniforms. */
>>> + uint32_t inline_block_offset;
>>> +
>>> + /* Length of the portion of data allocated for inline uniforms */
>>> + uint32_t inline_block_length;
>>> +
>>> struct {
>>> /* Index into the binding table for the associated surface */
>>> int16_t surface_index;
>>> @@ -1428,6 +1440,15 @@ struct anv_descriptor_set_layout {
>>> /* Number of dynamic offsets used by this descriptor set */
>>> uint16_t dynamic_offset_count;
>>>
>>> + /* Index into the flattend descriptor set (-1 if unused). */
>>> + int16_t inline_blocks_descriptor_index;
>>> +
>>> + /* Data to allocate into the pool descriptor's inline uniforms BO */
>>> + uint32_t inline_blocks_size;
>>> +
>>> + /* Index into the binding table for the associated surface */
>>> + int16_t inline_blocks_surface_indexes[MESA_SHADER_STAGES];
>>> +
>>> /* Bindings in this descriptor set */
>>> struct anv_descriptor_set_binding_layout binding[0];
>>> };
>>> @@ -1464,6 +1485,11 @@ struct anv_descriptor {
>>> uint64_t range;
>>> };
>>>
>>> + struct {
>>> + struct anv_address inline_address;
>>> + uint64_t inline_range;
>>> + };
>>> +
>>> struct anv_buffer_view *buffer_view;
>>> };
>>> };
>>> @@ -1472,6 +1498,7 @@ struct anv_descriptor_set {
>>> struct anv_descriptor_set_layout *layout;
>>> uint32_t size;
>>> uint32_t buffer_count;
>>> + void *inline_blocks;
>>> struct anv_buffer_view *buffer_views;
>>> struct anv_descriptor descriptors[0];
>>> };
>>> @@ -1507,6 +1534,12 @@ struct anv_descriptor_pool {
>>> struct anv_state_stream surface_state_stream;
>>> void *surface_state_free_list;
>>>
>>> + struct anv_bo *inline_blocks_bo;
>>> + uint32_t inline_blocks_size;
>>> +
>>> + uint32_t free_block_list;
>>> + uint32_t next_block;
>>> +
>>> char data[0];
>>> };
>>>
>>> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
>>> b/src/intel/vulkan/genX_cmd_buffer.c
>>> index 80bebf5a12c..5cb4c0f13af 100644
>>> --- a/src/intel/vulkan/genX_cmd_buffer.c
>>> +++ b/src/intel/vulkan/genX_cmd_buffer.c
>>> @@ -2146,6 +2146,19 @@ emit_binding_table(struct anv_cmd_buffer
>>> *cmd_buffer,
>>> desc->buffer_view->address);
>>> break;
>>>
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
>>> + surface_state =
>>> +
>>> anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
>>> + enum isl_format format =
>>> + anv_isl_format_for_descriptor_type(desc->type);
>>> +
>>> + anv_fill_buffer_surface_state(cmd_buffer->device,
>>> surface_state,
>>> + format, desc->inline_address,
>>> + desc->inline_range, 1);
>>> + add_surface_reloc(cmd_buffer, surface_state,
>>> desc->inline_address);
>>> + break;
>>> + }
>>> +
>>> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
>>> case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
>>> /* Compute the offset within the buffer */
>>> @@ -2445,14 +2458,15 @@ cmd_buffer_flush_push_constants(struct
>>> anv_cmd_buffer *cmd_buffer,
>>> const struct anv_descriptor *desc =
>>> anv_descriptor_for_binding(&gfx_state->base,
>>> binding);
>>>
>>> - if (desc->type ==
>>> VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
>>> + switch (desc->type) {
>>> + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
>>> read_len = MIN2(range->length,
>>> DIV_ROUND_UP(desc->buffer_view->range, 32)
>>> - range->start);
>>> read_addr =
>>> anv_address_add(desc->buffer_view->address,
>>> range->start * 32);
>>> - } else {
>>> - assert(desc->type ==
>>> VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
>>> + break;
>>>
>>> + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
>>> uint32_t dynamic_offset =
>>>
>>> dynamic_offset_for_binding(&gfx_state->base, binding);
>>> uint32_t buf_offset =
>>> @@ -2464,6 +2478,18 @@ cmd_buffer_flush_push_constants(struct
>>> anv_cmd_buffer *cmd_buffer,
>>> DIV_ROUND_UP(buf_range, 32) - range->start);
>>> read_addr =
>>> anv_address_add(desc->buffer->address,
>>> buf_offset +
>>> range->start * 32);
>>> + break;
>>> + }
>>> +
>>> + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
>>> + read_len = MIN2(range->length,
>>> + DIV_ROUND_UP(desc->inline_range, 32) -
>>> range->start);
>>> + read_addr = anv_address_add(desc->inline_address,
>>> + range->start * 32);
>>> + break;
>>> +
>>> + default:
>>> + unreachable("Invalid descriptor");
>>> }
>>> }
>>>
>>> --
>>> 2.19.0.rc1
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list