Mesa (master): radv: implement buffer to image operations for R32G32B32

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Oct 16 07:21:54 UTC 2018


Module: Mesa
Branch: master
Commit: 593996bc026c9e383da9683ff30e784b0ea09015
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=593996bc026c9e383da9683ff30e784b0ea09015

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Fri Oct 12 11:30:13 2018 +0200

radv: implement buffer to image operations for R32G32B32

This should fix rendering issues with Batman Arkham City.
We will probably need to implement itob and itoi at some
point, but currently nothing hits these paths.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107765
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_meta_bufimage.c | 342 ++++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_meta_copy.c     |   8 +-
 src/amd/vulkan/radv_private.h       |   5 +
 3 files changed, 353 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c
index 9efb971638..73a5034222 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -483,6 +483,214 @@ radv_device_finish_meta_btoi_state(struct radv_device *device)
 			     state->btoi.pipeline_3d, &state->alloc);
 }
 
+/* Buffer to image - special path for R32G32B32 */
+static nir_shader *
+build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
+{
+	nir_builder b;
+	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      buf_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+						       img_type, "out_img");
+	output_img->data.descriptor_set = 0;
+	output_img->data.binding = 1;
+
+	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
+	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(offset, 0);
+	nir_intrinsic_set_range(offset, 16);
+	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	offset->num_components = 2;
+	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
+	nir_builder_instr_insert(&b, &offset->instr);
+
+	nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(pitch, 0);
+	nir_intrinsic_set_range(pitch, 16);
+	pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	pitch->num_components = 1;
+	nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
+	nir_builder_instr_insert(&b, &pitch->instr);
+
+	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(stride, 0);
+	nir_intrinsic_set_range(stride, 16);
+	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
+	stride->num_components = 1;
+	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
+	nir_builder_instr_insert(&b, &stride->instr);
+
+	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
+	tmp = nir_iadd(&b, tmp, pos_x);
+
+	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
+
+	nir_ssa_def *global_pos =
+		nir_iadd(&b,
+			 nir_imul(&b, pos_y, &pitch->dest.ssa),
+			 nir_imul(&b, pos_x, nir_imm_int(&b, 3)));
+
+	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+	tex->op = nir_texop_txf;
+	tex->src[0].src_type = nir_tex_src_coord;
+	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
+	tex->src[1].src_type = nir_tex_src_lod;
+	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+	tex->src[2].src_type = nir_tex_src_texture_deref;
+	tex->src[2].src = nir_src_for_ssa(input_img_deref);
+	tex->dest_type = nir_type_float;
+	tex->is_array = false;
+	tex->coord_components = 1;
+	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+	nir_builder_instr_insert(&b, &tex->instr);
+
+	nir_ssa_def *outval = &tex->dest.ssa;
+
+	for (int chan = 0; chan < 3; chan++) {
+		nir_ssa_def *local_pos =
+                       nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
+
+               nir_ssa_def *coord =
+                       nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
+
+		nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+		store->num_components = 1;
+		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
+		store->src[1] = nir_src_for_ssa(coord);
+		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+		store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
+		nir_builder_instr_insert(&b, &store->instr);
+	}
+
+	return b.shader;
+}
+
+static VkResult
+radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
+{
+	VkResult result;
+	struct radv_shader_module cs = { .nir = NULL };
+
+	cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
+
+	VkDescriptorSetLayoutCreateInfo ds_create_info = {
+		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+		.bindingCount = 2,
+		.pBindings = (VkDescriptorSetLayoutBinding[]) {
+			{
+				.binding = 0,
+				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+			{
+				.binding = 1,
+				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+		}
+	};
+
+	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+						&ds_create_info,
+						&device->meta_state.alloc,
+						&device->meta_state.btoi_r32g32b32.img_ds_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+
+	VkPipelineLayoutCreateInfo pl_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 1,
+		.pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
+		.pushConstantRangeCount = 1,
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+	};
+
+	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					  &pl_create_info,
+					  &device->meta_state.alloc,
+					  &device->meta_state.btoi_r32g32b32.img_p_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	/* compute shader */
+
+	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = pipeline_shader_stage,
+		.flags = 0,
+		.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					     1, &vk_pipeline_info, NULL,
+					     &device->meta_state.btoi_r32g32b32.pipeline);
+
+fail:
+	ralloc_free(cs.nir);
+	return result;
+}
+
+static void
+radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->btoi_r32g32b32.img_p_layout, &state->alloc);
+	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+				        state->btoi_r32g32b32.img_ds_layout,
+					&state->alloc);
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->btoi_r32g32b32.pipeline, &state->alloc);
+}
+
 static nir_shader *
 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
 {
@@ -1056,6 +1264,7 @@ radv_device_finish_meta_bufimage_state(struct radv_device *device)
 {
 	radv_device_finish_meta_itob_state(device);
 	radv_device_finish_meta_btoi_state(device);
+	radv_device_finish_meta_btoi_r32g32b32_state(device);
 	radv_device_finish_meta_itoi_state(device);
 	radv_device_finish_meta_cleari_state(device);
 	radv_device_finish_meta_cleari_r32g32b32_state(device);
@@ -1074,6 +1283,10 @@ radv_device_init_meta_bufimage_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail_btoi;
 
+	result = radv_device_init_meta_btoi_r32g32b32_state(device);
+	if (result != VK_SUCCESS)
+		goto fail_btoi_r32g32b32;
+
 	result = radv_device_init_meta_itoi_state(device);
 	if (result != VK_SUCCESS)
 		goto fail_itoi;
@@ -1093,6 +1306,8 @@ fail_cleari:
 	radv_device_finish_meta_cleari_state(device);
 fail_itoi:
 	radv_device_finish_meta_itoi_state(device);
+fail_btoi_r32g32b32:
+	radv_device_finish_meta_btoi_r32g32b32_state(device);
 fail_btoi:
 	radv_device_finish_meta_btoi_state(device);
 fail_itob:
@@ -1220,6 +1435,125 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 }
 
 static void
+btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
+				struct radv_buffer_view *src,
+				struct radv_buffer_view *dst)
+{
+	struct radv_device *device = cmd_buffer->device;
+
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_COMPUTE,
+				      device->meta_state.btoi_r32g32b32.img_p_layout,
+				      0, /* set */
+				      2, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+				              {
+				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+				                      .dstBinding = 0,
+				                      .dstArrayElement = 0,
+				                      .descriptorCount = 1,
+				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
+				              },
+				              {
+				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+				                      .dstBinding = 1,
+				                      .dstArrayElement = 0,
+				                      .descriptorCount = 1,
+				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
+				              }
+				      });
+}
+
+static void
+radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
+				       struct radv_meta_blit2d_buffer *src,
+				       struct radv_meta_blit2d_surf *dst,
+				       unsigned num_rects,
+				       struct radv_meta_blit2d_rect *rects)
+{
+	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
+	struct radv_device_memory mem = { .bo = dst->image->bo };
+	struct radv_device *device = cmd_buffer->device;
+	struct radv_buffer_view src_view, dst_view;
+	unsigned dst_offset = 0;
+	unsigned stride;
+	VkFormat dst_format;
+	VkBuffer buffer;
+
+	switch (dst->format) {
+	case VK_FORMAT_R32G32B32_UINT:
+		dst_format = VK_FORMAT_R32_UINT;
+		break;
+	case VK_FORMAT_R32G32B32_SINT:
+		dst_format = VK_FORMAT_R32_SINT;
+		break;
+	case VK_FORMAT_R32G32B32_SFLOAT:
+		dst_format = VK_FORMAT_R32_SFLOAT;
+		break;
+	default:
+		unreachable("invalid R32G32B32 format");
+	}
+
+	/* This special btoi path for R32G32B32 formats will write the linear
+	 * image as a buffer with the same underlying memory. The compute
+	 * shader will clear all components separately using a R32 format.
+	 */
+	radv_CreateBuffer(radv_device_to_handle(device),
+			  &(VkBufferCreateInfo) {
+				.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+				.flags = 0,
+				.size = dst->image->size,
+				.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
+				.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+			  }, NULL, &buffer);
+
+	radv_BindBufferMemory2(radv_device_to_handle(device), 1,
+			       (VkBindBufferMemoryInfoKHR[]) {
+				    {
+					.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+					.buffer = buffer,
+					.memory = radv_device_memory_to_handle(&mem),
+					.memoryOffset = dst->image->offset,
+				    }
+			       });
+
+	create_bview(cmd_buffer, src->buffer, src->offset,
+		     src->format, &src_view);
+	create_bview(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset,
+		     dst_format, &dst_view);
+	btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+		stride = dst->image->surface.u.gfx9.surf_pitch;
+	} else {
+		stride = dst->image->surface.u.legacy.level[0].nblk_x * 3;
+	}
+
+	for (unsigned r = 0; r < num_rects; ++r) {
+		unsigned push_constants[4] = {
+			rects[r].dst_x,
+			rects[r].dst_y,
+			stride,
+			src->pitch,
+		};
+
+		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+				      device->meta_state.btoi_r32g32b32.img_p_layout,
+				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+				      push_constants);
+
+		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+	}
+
+	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
+}
+
+static void
 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 		      struct radv_buffer_view *src,
 		      struct radv_image_view *dst)
@@ -1269,6 +1603,14 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_buffer_view src_view;
 	struct radv_image_view dst_view;
 
+	if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+	    dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+	    dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+		radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
+						       num_rects, rects);
+		return;
+	}
+
 	create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
 	create_iview(cmd_buffer, dst, &dst_view);
 	btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index f4de5528ed..41da302cf8 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -195,10 +195,14 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 
 
 			/* Perform Blit */
-			if (cs)
+			if (cs ||
+			    (img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+			     img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+			     img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)) {
 				radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
-			else
+			} else {
 				radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
+			}
 
 			/* Once we've done the blit, all of the actual information about
 			 * the image is embedded in the command buffer so we can just
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b35aa8d818..0464fa4a41 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -509,6 +509,11 @@ struct radv_meta_state {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+	} btoi_r32g32b32;
+	struct {
+		VkPipelineLayout                          img_p_layout;
+		VkDescriptorSetLayout                     img_ds_layout;
+		VkPipeline pipeline;
 		VkPipeline pipeline_3d;
 	} itoi;
 	struct {




More information about the mesa-commit mailing list