[Mesa-dev] [PATCH 4/9] radv: Add compute DCC decompress.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Fri Dec 29 02:06:12 UTC 2017


We do an in place copy where we read compressed and write decompressed.
By doing this in sizes that cover entire DCC blocks and waiting for all
reads in the block before starting to write we avoid corruption.

In the end we clear the DCC metadata to 0xffffffff.
---
 src/amd/vulkan/radv_meta.h            |   3 +
 src/amd/vulkan/radv_meta_fast_clear.c | 268 ++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h         |   4 +
 3 files changed, 275 insertions(+)

diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index 3edf5fa6461..9f3198e8797 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -171,6 +171,9 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 const VkImageSubresourceRange *subresourceRange);
+void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
+			struct radv_image *image,
+                        const VkImageSubresourceRange *subresourceRange);
 
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 2603229a1f7..98e8f6ac18a 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -28,6 +28,160 @@
 #include "radv_private.h"
 #include "sid.h"
 
+
+static nir_shader *
+build_dcc_decompress_compute_shader(struct radv_device *dev)
+{
+	nir_builder b;
+	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, "dcc_decompress_compute");
+
+	/* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      buf_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+						       img_type, "out_img");
+	output_img->data.descriptor_set = 0;
+	output_img->data.binding = 1;
+
+	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
+	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
+	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+	tex->op = nir_texop_txf;
+	tex->src[0].src_type = nir_tex_src_coord;
+	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
+	tex->src[1].src_type = nir_tex_src_lod;
+	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+	tex->dest_type = nir_type_float;
+	tex->is_array = false;
+	tex->coord_components = 2;
+	tex->texture = nir_deref_var_create(tex, input_img);
+	tex->sampler = NULL;
+
+	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+	nir_builder_instr_insert(&b, &tex->instr);
+
+	nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier);
+	nir_builder_instr_insert(&b, &membar->instr);
+
+	nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_barrier);
+	nir_builder_instr_insert(&b, &bar->instr);
+
+	nir_ssa_def *outval = &tex->dest.ssa;
+	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
+	store->src[0] = nir_src_for_ssa(global_id);
+	store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+	store->src[2] = nir_src_for_ssa(outval);
+	store->variables[0] = nir_deref_var_create(store, output_img);
+
+	nir_builder_instr_insert(&b, &store->instr);
+	return b.shader;
+}
+
+static VkResult
+create_dcc_compress_compute(struct radv_device *device)
+{
+	VkResult result = VK_SUCCESS;
+	struct radv_shader_module cs = { .nir = NULL };
+
+	cs.nir = build_dcc_decompress_compute_shader(device);
+
+	VkDescriptorSetLayoutCreateInfo ds_create_info = {
+		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+		.bindingCount = 2,
+		.pBindings = (VkDescriptorSetLayoutBinding[]) {
+			{
+				.binding = 0,
+				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+			{
+				.binding = 1,
+				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+		}
+	};
+
+	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+						&ds_create_info,
+						&device->meta_state.alloc,
+						&device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
+	if (result != VK_SUCCESS)
+		goto cleanup;
+
+
+	VkPipelineLayoutCreateInfo pl_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 1,
+		.pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
+		.pushConstantRangeCount = 1,
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 8},
+	};
+
+	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					  &pl_create_info,
+					  &device->meta_state.alloc,
+					  &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
+	if (result != VK_SUCCESS)
+		goto cleanup;
+
+	/* compute shader */
+
+	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = pipeline_shader_stage,
+		.flags = 0,
+		.layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					     1, &vk_pipeline_info, NULL,
+					     &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+	if (result != VK_SUCCESS)
+		goto cleanup;
+
+cleanup:
+	ralloc_free(cs.nir);
+	return result;
+}
+
 static VkResult
 create_pass(struct radv_device *device)
 {
@@ -322,6 +476,16 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
 				   state->fast_clear_flush.p_layout,
 				   &state->alloc);
+
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->fast_clear_flush.dcc_decompress_compute_pipeline,
+			     &state->alloc);
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->fast_clear_flush.dcc_decompress_compute_p_layout,
+				   &state->alloc);
+	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+	                                state->fast_clear_flush.dcc_decompress_compute_ds_layout,
+	                                &state->alloc);
 }
 
 VkResult
@@ -351,6 +515,10 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)
 	if (res != VK_SUCCESS)
 		goto fail;
 
+	res = create_dcc_compress_compute(device);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	goto cleanup;
 
 fail:
@@ -521,3 +689,103 @@ radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
 {
 	radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
 }
+
+static void
+radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
+                            struct radv_image *image,
+                            const VkImageSubresourceRange *subresourceRange)
+{
+	struct radv_meta_saved_state saved_state;
+	struct radv_image_view iview = {0};
+	struct radv_device *device = cmd_buffer->device;
+
+	/* This assumes the image is 2d with 1 layer and 1 mipmap level */
+	struct radv_cmd_state *state = &cmd_buffer->state;
+
+	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+			     RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+	radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
+	                                         RADV_META_SAVE_COMPUTE_PIPELINE);
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+	                     VK_PIPELINE_BIND_POINT_COMPUTE,
+	                     device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+
+	radv_image_view_init(&iview, cmd_buffer->device,
+			     &(VkImageViewCreateInfo) {
+				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					     .image = radv_image_to_handle(image),
+					     .viewType = VK_IMAGE_VIEW_TYPE_2D,
+					     .format = image->vk_format,
+					     .subresourceRange = {
+						.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+						.baseMipLevel = 0,
+						.levelCount = 1,
+						.baseArrayLayer = 0,
+						.layerCount = 1
+					     },
+			     });
+
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_COMPUTE,
+				      device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+				      0, /* set */
+				      2, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+				              {
+				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+				                       .dstBinding = 0,
+				                       .dstArrayElement = 0,
+				                       .descriptorCount = 1,
+				                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+				                       .pImageInfo = (VkDescriptorImageInfo[]) {
+				                               {
+				                                       .sampler = VK_NULL_HANDLE,
+				                                       .imageView = radv_image_view_to_handle(&iview),
+				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+				                               },
+				                       }
+				              },
+				              {
+				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+				                       .dstBinding = 1,
+				                       .dstArrayElement = 0,
+				                       .descriptorCount = 1,
+				                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+				                       .pImageInfo = (VkDescriptorImageInfo[]) {
+				                               {
+				                                       .sampler = VK_NULL_HANDLE,
+				                                       .imageView = radv_image_view_to_handle(&iview),
+				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+				                               },
+				                       }
+				              }
+				      });
+
+	radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
+
+	/* The fill buffer below does its own saving */
+	radv_meta_restore(&saved_state, cmd_buffer);
+
+	state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+			     RADV_CMD_FLAG_INV_VMEM_L1;
+
+	state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo,
+					      image->offset + image->dcc_offset,
+					      image->surface.dcc_size, 0xffffffff);
+
+	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+			     RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+}
+
+void
+radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
+                    struct radv_image *image,
+                    const VkImageSubresourceRange *subresourceRange)
+{
+	if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+		radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
+	else
+		radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4fb3c218eb3..d7e9070fbb8 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -492,6 +492,10 @@ struct radv_meta_state {
 		VkPipeline                                fmask_decompress_pipeline;
 		VkPipeline                                dcc_decompress_pipeline;
 		VkRenderPass                              pass;
+
+		VkDescriptorSetLayout                     dcc_decompress_compute_ds_layout;
+		VkPipelineLayout                          dcc_decompress_compute_p_layout;
+		VkPipeline                                dcc_decompress_compute_pipeline;
 	} fast_clear_flush;
 
 	struct {
-- 
2.15.1



More information about the mesa-dev mailing list