[Mesa-dev] [PATCH v2 1/3] Revert "radv: Don't store buffer references in the descriptor set."

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu Apr 19 15:46:37 UTC 2018


In order to reduce a performance regression introduced by
4b13fe55a4 ("radv: Keep a global BO list for VkMemory."),
we are going to maintain two different paths.

One when VK_EXT_descriptor_indexing is enabled by the
application because we need to have a global BO list, and
one (the old one) when it's not enabled.

With Talos on Polaris, the global BO list reduces performance
by 10% which is too much for me.

This reverts commit ab6cadd3ecc7fbdd9079808b407674e0b19c52f0.
---
 src/amd/vulkan/radv_cmd_buffer.c     |  4 ++
 src/amd/vulkan/radv_debug.c          |  3 +
 src/amd/vulkan/radv_descriptor_set.c | 82 +++++++++++++++++++++++-----
 src/amd/vulkan/radv_descriptor_set.h |  4 ++
 src/amd/vulkan/radv_private.h        |  2 +
 5 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f6b23f6e739..72fb6d63576 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2206,6 +2206,10 @@ radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
 
 	assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
 
+	for (unsigned j = 0; j < set->layout->buffer_count; ++j)
+		if (set->descriptors[j])
+			radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j], 7);
+
 	if(set->bo)
 		radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo, 8);
 }
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index 2e9e0165237..368bc4b5d05 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -250,6 +250,7 @@ radv_dump_descriptor_set(enum chip_class chip_class,
 	fprintf(f, "\tshader_stages: %x\n", layout->shader_stages);
 	fprintf(f, "\tdynamic_shader_stages: %x\n",
 		layout->dynamic_shader_stages);
+	fprintf(f, "\tbuffer_count: %d\n", layout->buffer_count);
 	fprintf(f, "\tdynamic_offset_count: %d\n",
 		layout->dynamic_offset_count);
 	fprintf(f, "\n");
@@ -265,6 +266,8 @@ radv_dump_descriptor_set(enum chip_class chip_class,
 			layout->binding[i].array_size);
 		fprintf(f, "\t\toffset: %d\n",
 			layout->binding[i].offset);
+		fprintf(f, "\t\tbuffer_offset: %d\n",
+			layout->binding[i].buffer_offset);
 		fprintf(f, "\t\tdynamic_offset_offset: %d\n",
 			layout->binding[i].dynamic_offset_offset);
 		fprintf(f, "\t\tdynamic_offset_count: %d\n",
diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c
index 55b4aaa388c..4b08a1f0f80 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -117,12 +117,14 @@ VkResult radv_CreateDescriptorSetLayout(
 
 	memset(set_layout->binding, 0, size - sizeof(struct radv_descriptor_set_layout));
 
+	uint32_t buffer_count = 0;
 	uint32_t dynamic_offset_count = 0;
 
 	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
 		const VkDescriptorSetLayoutBinding *binding = bindings + j;
 		uint32_t b = binding->binding;
 		uint32_t alignment;
+		unsigned binding_buffer_count = 0;
 
 		switch (binding->descriptorType) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -131,6 +133,7 @@ VkResult radv_CreateDescriptorSetLayout(
 			set_layout->binding[b].dynamic_offset_count = 1;
 			set_layout->dynamic_shader_stages |= binding->stageFlags;
 			set_layout->binding[b].size = 0;
+			binding_buffer_count = 1;
 			alignment = 1;
 			break;
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@@ -138,6 +141,7 @@ VkResult radv_CreateDescriptorSetLayout(
 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
 			set_layout->binding[b].size = 16;
+			binding_buffer_count = 1;
 			alignment = 16;
 			break;
 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@@ -145,11 +149,13 @@ VkResult radv_CreateDescriptorSetLayout(
 		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
 			/* main descriptor + fmask descriptor */
 			set_layout->binding[b].size = 64;
+			binding_buffer_count = 1;
 			alignment = 32;
 			break;
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 			/* main descriptor + fmask descriptor + sampler */
 			set_layout->binding[b].size = 96;
+			binding_buffer_count = 1;
 			alignment = 32;
 			break;
 		case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -165,6 +171,7 @@ VkResult radv_CreateDescriptorSetLayout(
 		set_layout->binding[b].type = binding->descriptorType;
 		set_layout->binding[b].array_size = binding->descriptorCount;
 		set_layout->binding[b].offset = set_layout->size;
+		set_layout->binding[b].buffer_offset = buffer_count;
 		set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
 
 		if (variable_flags && binding->binding < variable_flags->bindingCount &&
@@ -197,6 +204,7 @@ VkResult radv_CreateDescriptorSetLayout(
 		}
 
 		set_layout->size += binding->descriptorCount * set_layout->binding[b].size;
+		buffer_count += binding->descriptorCount * binding_buffer_count;
 		dynamic_offset_count += binding->descriptorCount *
 			set_layout->binding[b].dynamic_offset_count;
 		set_layout->shader_stages |= binding->stageFlags;
@@ -204,6 +212,7 @@ VkResult radv_CreateDescriptorSetLayout(
 
 	free(bindings);
 
+	set_layout->buffer_count = buffer_count;
 	set_layout->dynamic_offset_count = dynamic_offset_count;
 
 	*pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
@@ -396,7 +405,8 @@ radv_descriptor_set_create(struct radv_device *device,
 			   struct radv_descriptor_set **out_set)
 {
 	struct radv_descriptor_set *set;
-	unsigned range_offset = sizeof(struct radv_descriptor_set);
+	unsigned range_offset = sizeof(struct radv_descriptor_set) +
+		sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
 	unsigned mem_size = range_offset +
 		sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
 
@@ -700,16 +710,25 @@ VkResult radv_FreeDescriptorSets(
 }
 
 static void write_texel_buffer_descriptor(struct radv_device *device,
+					  struct radv_cmd_buffer *cmd_buffer,
 					  unsigned *dst,
+					  struct radeon_winsys_bo **buffer_list,
 					  const VkBufferView _buffer_view)
 {
 	RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
 
 	memcpy(dst, buffer_view->state, 4 * 4);
+
+	if (cmd_buffer)
+		radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo, 7);
+	else
+		*buffer_list = buffer_view->bo;
 }
 
 static void write_buffer_descriptor(struct radv_device *device,
+                                    struct radv_cmd_buffer *cmd_buffer,
                                     unsigned *dst,
+                                    struct radeon_winsys_bo **buffer_list,
                                     const VkDescriptorBufferInfo *buffer_info)
 {
 	RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
@@ -730,10 +749,15 @@ static void write_buffer_descriptor(struct radv_device *device,
 		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
+	if (cmd_buffer)
+		radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo, 7);
+	else
+		*buffer_list = buffer->bo;
 }
 
 static void write_dynamic_buffer_descriptor(struct radv_device *device,
                                             struct radv_descriptor_range *range,
+                                            struct radeon_winsys_bo **buffer_list,
                                             const VkDescriptorBufferInfo *buffer_info)
 {
 	RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
@@ -746,11 +770,15 @@ static void write_dynamic_buffer_descriptor(struct radv_device *device,
 	va += buffer_info->offset + buffer->offset;
 	range->va = va;
 	range->size = size;
+
+	*buffer_list = buffer->bo;
 }
 
 static void
 write_image_descriptor(struct radv_device *device,
+		       struct radv_cmd_buffer *cmd_buffer,
 		       unsigned *dst,
+		       struct radeon_winsys_bo **buffer_list,
 		       VkDescriptorType descriptor_type,
 		       const VkDescriptorImageInfo *image_info)
 {
@@ -764,18 +792,25 @@ write_image_descriptor(struct radv_device *device,
 	}
 
 	memcpy(dst, descriptor, 16 * 4);
+
+	if (cmd_buffer)
+		radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo, 7);
+	else
+		*buffer_list = iview->bo;
 }
 
 static void
 write_combined_image_sampler_descriptor(struct radv_device *device,
+					struct radv_cmd_buffer *cmd_buffer,
 					unsigned *dst,
+					struct radeon_winsys_bo **buffer_list,
 					VkDescriptorType descriptor_type,
 					const VkDescriptorImageInfo *image_info,
 					bool has_sampler)
 {
 	RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
 
-	write_image_descriptor(device, dst, descriptor_type, image_info);
+	write_image_descriptor(device, cmd_buffer, dst, buffer_list, descriptor_type, image_info);
 	/* copy over sampler state */
 	if (has_sampler)
 		memcpy(dst + 16, sampler->state, 16);
@@ -808,7 +843,7 @@ void radv_update_descriptor_sets(
 		const struct radv_descriptor_set_binding_layout *binding_layout =
 			set->layout->binding + writeset->dstBinding;
 		uint32_t *ptr = set->mapped_ptr;
-
+		struct radeon_winsys_bo **buffer_list =  set->descriptors;
 		/* Immutable samplers are not copied into push descriptors when they are
 		 * allocated, so if we are writing push descriptors we have to copy the
 		 * immutable samplers into them now.
@@ -819,6 +854,8 @@ void radv_update_descriptor_sets(
 
 		ptr += binding_layout->offset / 4;
 		ptr += binding_layout->size * writeset->dstArrayElement / 4;
+		buffer_list += binding_layout->buffer_offset;
+		buffer_list += writeset->dstArrayElement;
 		for (j = 0; j < writeset->descriptorCount; ++j) {
 			switch(writeset->descriptorType) {
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -827,25 +864,28 @@ void radv_update_descriptor_sets(
 				idx += binding_layout->dynamic_offset_offset;
 				assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
 				write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
-								writeset->pBufferInfo + j);
+								buffer_list, writeset->pBufferInfo + j);
 				break;
 			}
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-				write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j);
+				write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+							writeset->pBufferInfo + j);
 				break;
 			case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 			case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-				write_texel_buffer_descriptor(device, ptr, writeset->pTexelBufferView[j]);
+				write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+							      writeset->pTexelBufferView[j]);
 				break;
 			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 			case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-				write_image_descriptor(device, ptr, writeset->descriptorType,
+				write_image_descriptor(device, cmd_buffer, ptr, buffer_list,
+						       writeset->descriptorType,
 						       writeset->pImageInfo + j);
 				break;
 			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-				write_combined_image_sampler_descriptor(device, ptr,
+				write_combined_image_sampler_descriptor(device, cmd_buffer, ptr, buffer_list,
 									writeset->descriptorType,
 									writeset->pImageInfo + j,
 									!binding_layout->immutable_samplers_offset);
@@ -868,6 +908,7 @@ void radv_update_descriptor_sets(
 				break;
 			}
 			ptr += binding_layout->size / 4;
+			++buffer_list;
 		}
 
 	}
@@ -884,6 +925,8 @@ void radv_update_descriptor_sets(
 			dst_set->layout->binding + copyset->dstBinding;
 		uint32_t *src_ptr = src_set->mapped_ptr;
 		uint32_t *dst_ptr = dst_set->mapped_ptr;
+		struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+		struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
 
 		src_ptr += src_binding_layout->offset / 4;
 		dst_ptr += dst_binding_layout->offset / 4;
@@ -891,6 +934,12 @@ void radv_update_descriptor_sets(
 		src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
 		dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
 
+		src_buffer_list += src_binding_layout->buffer_offset;
+		src_buffer_list += copyset->srcArrayElement;
+
+		dst_buffer_list += dst_binding_layout->buffer_offset;
+		dst_buffer_list += copyset->dstArrayElement;
+
 		for (j = 0; j < copyset->descriptorCount; ++j) {
 			switch (src_binding_layout->type) {
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -911,6 +960,9 @@ void radv_update_descriptor_sets(
 			}
 			src_ptr += src_binding_layout->size / 4;
 			dst_ptr += dst_binding_layout->size / 4;
+			dst_buffer_list[j] = src_buffer_list[j];
+			++src_buffer_list;
+			++dst_buffer_list;
 		}
 	}
 }
@@ -952,6 +1004,7 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
 		const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
 		const struct radv_descriptor_set_binding_layout *binding_layout =
 			set_layout->binding + entry->dstBinding;
+		const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
 		const uint32_t *immutable_samplers = NULL;
 		uint32_t dst_offset;
 		uint32_t dst_stride;
@@ -990,6 +1043,7 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
 			.src_stride = entry->stride,
 			.dst_offset = dst_offset,
 			.dst_stride = dst_stride,
+			.buffer_offset = buffer_offset,
 			.has_sampler = !binding_layout->immutable_samplers_offset,
 			.immutable_samplers = immutable_samplers
 		};
@@ -1022,6 +1076,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
 	uint32_t i;
 
 	for (i = 0; i < templ->entry_count; ++i) {
+		struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
 		uint32_t *pDst = set->mapped_ptr + templ->entry[i].dst_offset;
 		const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset;
 		uint32_t j;
@@ -1033,28 +1088,28 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
 				const unsigned idx = templ->entry[i].dst_offset + j;
 				assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
 				write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
-								(struct VkDescriptorBufferInfo *) pSrc);
+								buffer_list, (struct VkDescriptorBufferInfo *) pSrc);
 				break;
 			}
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-				write_buffer_descriptor(device, pDst,
+				write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
 				                        (struct VkDescriptorBufferInfo *) pSrc);
 				break;
 			case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 			case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-				write_texel_buffer_descriptor(device, pDst,
+				write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
 						              *(VkBufferView *) pSrc);
 				break;
 			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 			case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-				write_image_descriptor(device, pDst,
+				write_image_descriptor(device, cmd_buffer, pDst, buffer_list,
 						       templ->entry[i].descriptor_type,
 					               (struct VkDescriptorImageInfo *) pSrc);
 				break;
 			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-				write_combined_image_sampler_descriptor(device, pDst,
+				write_combined_image_sampler_descriptor(device, cmd_buffer, pDst, buffer_list,
 									templ->entry[i].descriptor_type,
 									(struct VkDescriptorImageInfo *) pSrc,
 									templ->entry[i].has_sampler);
@@ -1074,6 +1129,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
 			}
 		        pSrc += templ->entry[i].src_stride;
 			pDst += templ->entry[i].dst_stride;
+			++buffer_list;
 		}
 	}
 }
diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h
index d1cba953f7e..d8431241fd9 100644
--- a/src/amd/vulkan/radv_descriptor_set.h
+++ b/src/amd/vulkan/radv_descriptor_set.h
@@ -35,6 +35,7 @@ struct radv_descriptor_set_binding_layout {
    uint32_t array_size;
 
    uint32_t offset;
+   uint32_t buffer_offset;
    uint16_t dynamic_offset_offset;
 
    uint16_t dynamic_offset_count;
@@ -61,6 +62,9 @@ struct radv_descriptor_set_layout {
    uint16_t shader_stages;
    uint16_t dynamic_shader_stages;
 
+   /* Number of buffers in this descriptor set */
+   uint32_t buffer_count;
+
    /* Number of dynamic offsets used by this descriptor set */
    uint16_t dynamic_offset_count;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4a860c595fb..dfe4c5f9422 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -698,6 +698,8 @@ struct radv_descriptor_set {
 	uint64_t va;
 	uint32_t *mapped_ptr;
 	struct radv_descriptor_range *dynamic_descriptors;
+
+	struct radeon_winsys_bo *descriptors[0];
 };
 
 struct radv_push_descriptor_set
-- 
2.17.0



More information about the mesa-dev mailing list