Mesa (master): radv: implement all depth/stencil resolve modes using compute

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jun 21 12:48:02 UTC 2019


Module: Mesa
Branch: master
Commit: 5cf350f56511e94caccdab4afafadca7c6f316f2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5cf350f56511e94caccdab4afafadca7c6f316f2

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Wed May 22 09:42:12 2019 +0200

radv: implement all depth/stencil resolve modes using compute

This path supports layers but it requires to decompress HTILE
before resolving. The driver also needs to fixup HTILE after
the resolve. This path is probably slower than the graphics one.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_meta_resolve_cs.c | 506 ++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h         |  16 ++
 2 files changed, 522 insertions(+)

diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index fc4bcf27bb9..c06f0f2c5ce 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -139,6 +139,165 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
 	return b.shader;
 }
 
+enum {
+	DEPTH_RESOLVE,
+	STENCIL_RESOLVE,
+};
+
+static const char *
+get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode)
+{
+	switch (resolve_mode) {
+	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+		return "zero";
+	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+		return "average";
+	case VK_RESOLVE_MODE_MIN_BIT_KHR:
+		return "min";
+	case VK_RESOLVE_MODE_MAX_BIT_KHR:
+		return "max";
+	default:
+		unreachable("invalid resolve mode");
+	}
+}
+
+static nir_shader *
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
+					   int index,
+					   VkResolveModeFlagBitsKHR resolve_mode)
+{
+	nir_builder b;
+	char name[64];
+	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+								 false,
+								 false,
+								 GLSL_TYPE_FLOAT);
+	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
+		 index == DEPTH_RESOLVE ? "depth" : "stencil",
+		 get_resolve_mode_str(resolve_mode), samples);
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
+
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      sampler_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+						       img_type, "out_img");
+	output_img->data.descriptor_set = 0;
+	output_img->data.binding = 1;
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(src_offset, 0);
+	nir_intrinsic_set_range(src_offset, 16);
+	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	src_offset->num_components = 2;
+	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+	nir_builder_instr_insert(&b, &src_offset->instr);
+
+	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(dst_offset, 0);
+	nir_intrinsic_set_range(dst_offset, 16);
+	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	dst_offset->num_components = 2;
+	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+	nir_builder_instr_insert(&b, &dst_offset->instr);
+
+	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
+
+	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+	nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
+
+	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+	tex->op = nir_texop_txf_ms;
+	tex->src[0].src_type = nir_tex_src_coord;
+	tex->src[0].src = nir_src_for_ssa(img_coord);
+	tex->src[1].src_type = nir_tex_src_ms_index;
+	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+	tex->src[2].src_type = nir_tex_src_texture_deref;
+	tex->src[2].src = nir_src_for_ssa(input_img_deref);
+	tex->dest_type = type;
+	tex->is_array = false;
+	tex->coord_components = 2;
+
+	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+	nir_builder_instr_insert(&b, &tex->instr);
+
+	nir_ssa_def *outval = &tex->dest.ssa;
+
+	if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+		for (int i = 1; i < samples; i++) {
+			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+			tex_add->op = nir_texop_txf_ms;
+			tex_add->src[0].src_type = nir_tex_src_coord;
+			tex_add->src[0].src = nir_src_for_ssa(img_coord);
+			tex_add->src[1].src_type = nir_tex_src_ms_index;
+			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+			tex_add->src[2].src_type = nir_tex_src_texture_deref;
+			tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+			tex_add->dest_type = type;
+			tex_add->is_array = false;
+			tex_add->coord_components = 2;
+
+			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+			nir_builder_instr_insert(&b, &tex_add->instr);
+
+			switch (resolve_mode) {
+			case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+				assert(index == DEPTH_RESOLVE);
+				outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+				break;
+			case VK_RESOLVE_MODE_MIN_BIT_KHR:
+				if (index == DEPTH_RESOLVE)
+					outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+				else
+					outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+				break;
+			case VK_RESOLVE_MODE_MAX_BIT_KHR:
+				if (index == DEPTH_RESOLVE)
+					outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+				else
+					outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+				break;
+			default:
+				unreachable("invalid resolve mode");
+			}
+		}
+
+		if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+			outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+	}
+
+	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
+	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+	store->num_components = 4;
+	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
+	store->src[1] = nir_src_for_ssa(coord);
+	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+	store->src[3] = nir_src_for_ssa(outval);
+	nir_builder_instr_insert(&b, &store->instr);
+	return b.shader;
+}
 
 static VkResult
 create_layout(struct radv_device *device)
@@ -248,6 +407,57 @@ fail:
 	return result;
 }
 
+static VkResult
+create_depth_stencil_resolve_pipeline(struct radv_device *device,
+				      int samples,
+				      int index,
+				      VkResolveModeFlagBitsKHR resolve_mode,
+				      VkPipeline *pipeline)
+{
+	VkResult result;
+	struct radv_shader_module cs = { .nir = NULL };
+
+	mtx_lock(&device->meta_state.mtx);
+	if (*pipeline) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
+	cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
+							    index, resolve_mode);
+
+	/* compute shader */
+	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = pipeline_shader_stage,
+		.flags = 0,
+		.layout = device->meta_state.resolve_compute.p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					     1, &vk_pipeline_info, NULL,
+					     pipeline);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	ralloc_free(cs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return VK_SUCCESS;
+fail:
+	ralloc_free(cs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return result;
+}
+
 VkResult
 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
 {
@@ -279,8 +489,56 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_
 		if (res != VK_SUCCESS)
 			goto fail;
 
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+							    &state->resolve_compute.depth[i].average_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_MAX_BIT_KHR,
+							    &state->resolve_compute.depth[i].max_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_MIN_BIT_KHR,
+							    &state->resolve_compute.depth[i].min_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    STENCIL_RESOLVE,
+							    VK_RESOLVE_MODE_MAX_BIT_KHR,
+							    &state->resolve_compute.stencil[i].max_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    STENCIL_RESOLVE,
+							    VK_RESOLVE_MODE_MIN_BIT_KHR,
+							    &state->resolve_compute.stencil[i].min_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
 	}
 
+	res = create_depth_stencil_resolve_pipeline(device, 0,
+						    DEPTH_RESOLVE,
+						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+						    &state->resolve_compute.depth_zero_pipeline);
+	if (res != VK_SUCCESS)
+		goto fail;
+
+	res = create_depth_stencil_resolve_pipeline(device, 0,
+						    STENCIL_RESOLVE,
+						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+						    &state->resolve_compute.stencil_zero_pipeline);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	return VK_SUCCESS;
 fail:
 	radv_device_finish_meta_resolve_compute_state(device);
@@ -303,8 +561,36 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
 		radv_DestroyPipeline(radv_device_to_handle(device),
 				     state->resolve_compute.rc[i].srgb_pipeline,
 				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].average_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].max_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].min_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.stencil[i].max_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.stencil[i].min_pipeline,
+				     &state->alloc);
 	}
 
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->resolve_compute.depth_zero_pipeline,
+			     &state->alloc);
+
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->resolve_compute.stencil_zero_pipeline,
+			     &state->alloc);
+
 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
 					state->resolve_compute.ds_layout,
 					&state->alloc);
@@ -411,6 +697,113 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 
 }
 
+static void
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
+			   struct radv_image_view *src_iview,
+			   struct radv_image_view *dest_iview,
+			   const VkOffset2D *src_offset,
+			   const VkOffset2D *dest_offset,
+			   const VkExtent2D *resolve_extent,
+			   VkImageAspectFlags aspects,
+			   VkResolveModeFlagBitsKHR resolve_mode)
+{
+	struct radv_device *device = cmd_buffer->device;
+	const uint32_t samples = src_iview->image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+	VkPipeline *pipeline;
+
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_COMPUTE,
+				      device->meta_state.resolve_compute.p_layout,
+				      0, /* set */
+				      2, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+					{
+						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+						.dstBinding = 0,
+						.dstArrayElement = 0,
+						.descriptorCount = 1,
+						.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+			                      .pImageInfo = (VkDescriptorImageInfo[]) {
+		                              {
+	                                      .sampler = VK_NULL_HANDLE,
+					      .imageView = radv_image_view_to_handle(src_iview),
+	                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL	                              },
+	                      }
+		              },
+		              {
+		                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+		                      .dstBinding = 1,
+		                      .dstArrayElement = 0,
+				      .descriptorCount = 1,
+				      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+	                      .pImageInfo = (VkDescriptorImageInfo[]) {
+                              {
+                                      .sampler = VK_NULL_HANDLE,
+                                     .imageView = radv_image_view_to_handle(dest_iview),
+                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                              },
+                      }
+			      }
+				      });
+
+	switch (resolve_mode) {
+	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+		break;
+	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+		assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+		pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+		break;
+	case VK_RESOLVE_MODE_MIN_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+		break;
+	case VK_RESOLVE_MODE_MAX_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+		break;
+	default:
+		unreachable("invalid resolve mode");
+	}
+
+	if (!*pipeline) {
+		int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+		VkResult ret;
+
+		ret = create_depth_stencil_resolve_pipeline(device, samples,
+							    index, resolve_mode,
+							    pipeline);
+		if (ret != VK_SUCCESS) {
+			cmd_buffer->record_result = ret;
+			return;
+		}
+	}
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+			     VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+
+	unsigned push_constants[4] = {
+		src_offset->x,
+		src_offset->y,
+		dest_offset->x,
+		dest_offset->y,
+	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.resolve_compute.p_layout,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+			      push_constants);
+	radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
+
+}
+
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
 				     VkFormat src_format,
@@ -561,3 +954,116 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
 	                                RADV_CMD_FLAG_INV_VMEM_L1;
 }
+
+void
+radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+				      VkImageAspectFlags aspects,
+				      VkResolveModeFlagBitsKHR resolve_mode)
+{
+	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	struct radv_meta_saved_state saved_state;
+	struct radv_subpass_barrier barrier;
+	uint32_t layer_count = fb->layers;
+
+	if (subpass->view_mask)
+		layer_count = util_last_bit(subpass->view_mask);
+
+	/* Resolves happen before the end-of-subpass barriers get executed, so
+	 * we have to make the attachment shader-readable.
+	 */
+	barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+	barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+	barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+	radv_subpass_barrier(cmd_buffer, &barrier);
+
+	radv_decompress_resolve_subpass_src(cmd_buffer);
+
+	radv_meta_save(&saved_state, cmd_buffer,
+		       RADV_META_SAVE_COMPUTE_PIPELINE |
+		       RADV_META_SAVE_CONSTANTS |
+		       RADV_META_SAVE_DESCRIPTORS);
+
+	struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+	struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+
+	struct radv_image_view *src_iview =
+		cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+	struct radv_image_view *dst_iview =
+		cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+
+	struct radv_image *src_image = src_iview->image;
+	struct radv_image *dst_image = dst_iview->image;
+
+	for (uint32_t layer = 0; layer < layer_count; layer++) {
+		struct radv_image_view tsrc_iview;
+		radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+				     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(src_image),
+					.viewType = radv_meta_get_view_type(src_image),
+					.format = src_iview->vk_format,
+					.subresourceRange = {
+						.aspectMask = aspects,
+						.baseMipLevel = src_iview->base_mip,
+						.levelCount = 1,
+						.baseArrayLayer = src_iview->base_layer + layer,
+						.layerCount = 1,
+					},
+				     });
+
+		struct radv_image_view tdst_iview;
+		radv_image_view_init(&tdst_iview, cmd_buffer->device,
+				     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(dst_image),
+					.viewType = radv_meta_get_view_type(dst_image),
+					.format = dst_iview->vk_format,
+					.subresourceRange = {
+						.aspectMask = aspects,
+						.baseMipLevel = dst_iview->base_mip,
+						.levelCount = 1,
+						.baseArrayLayer = dst_iview->base_layer + layer,
+						.layerCount = 1,
+					},
+				     });
+
+		emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+					   &(VkOffset2D) { 0, 0 },
+					   &(VkOffset2D) { 0, 0 },
+					   &(VkExtent2D) { fb->width, fb->height },
+					   aspects,
+					   resolve_mode);
+	}
+
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
+	if (radv_image_has_htile(dst_image)) {
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+			VkImageSubresourceRange range = {};
+			range.aspectMask = aspects;
+			range.baseMipLevel = dst_iview->base_mip;
+			range.levelCount = 1;
+			range.baseArrayLayer = dst_iview->base_layer;
+			range.layerCount = layer_count;
+
+			uint32_t clear_value = 0xfffc000f;
+
+			if (vk_format_is_stencil(dst_image->vk_format) &&
+			    subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+				/* Only clear the stencil part of the HTILE
+				 * buffer if it's resolved, otherwise this
+				 * might break if the stencil has been cleared.
+				 */
+				clear_value = 0xfffff30f;
+			}
+
+			cmd_buffer->state.flush_bits |=
+				radv_clear_htile(cmd_buffer, dst_image, &range,
+						 clear_value);
+		}
+	}
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 66483e306d9..9de46494454 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -577,6 +577,19 @@ struct radv_meta_state {
 			VkPipeline                                i_pipeline;
 			VkPipeline                                srgb_pipeline;
 		} rc[MAX_SAMPLES_LOG2];
+
+		VkPipeline depth_zero_pipeline;
+		struct {
+			VkPipeline average_pipeline;
+			VkPipeline max_pipeline;
+			VkPipeline min_pipeline;
+		} depth[MAX_SAMPLES_LOG2];
+
+		VkPipeline stencil_zero_pipeline;
+		struct {
+			VkPipeline max_pipeline;
+			VkPipeline min_pipeline;
+		} stencil[MAX_SAMPLES_LOG2];
 	} resolve_compute;
 
 	struct {
@@ -1256,6 +1269,9 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
+void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+					   VkImageAspectFlags aspects,
+					   VkResolveModeFlagBitsKHR resolve_mode);
 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
 					   VkImageAspectFlags aspects,




More information about the mesa-commit mailing list