[Mesa-dev] [PATCH v2 4/9] radv: implement all depth/stencil resolve modes using compute

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Jun 12 09:47:46 UTC 2019


This path supports layers but it requires to decompress HTILE
before resolving. The driver also needs to fixup HTILE after
the resolve. This path is probably slower than the graphics one.

v2: - use image view format
    - make HTILE uncompressed after resolving

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/vulkan/radv_meta_resolve_cs.c | 506 ++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h         |  16 +
 2 files changed, 522 insertions(+)

diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index fc4bcf27bb9..c06f0f2c5ce 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -139,6 +139,165 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
 	return b.shader;
 }
 
+enum {
+	DEPTH_RESOLVE,
+	STENCIL_RESOLVE,
+};
+
+static const char *
+get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode)
+{
+	switch (resolve_mode) {
+	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+		return "zero";
+	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+		return "average";
+	case VK_RESOLVE_MODE_MIN_BIT_KHR:
+		return "min";
+	case VK_RESOLVE_MODE_MAX_BIT_KHR:
+		return "max";
+	default:
+		unreachable("invalid resolve mode");
+	}
+}
+
+static nir_shader *
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
+					   int index,
+					   VkResolveModeFlagBitsKHR resolve_mode)
+{
+	nir_builder b;
+	char name[64];
+	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+								 false,
+								 false,
+								 GLSL_TYPE_FLOAT);
+	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
+		 index == DEPTH_RESOLVE ? "depth" : "stencil",
+		 get_resolve_mode_str(resolve_mode), samples);
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
+
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      sampler_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+						       img_type, "out_img");
+	output_img->data.descriptor_set = 0;
+	output_img->data.binding = 1;
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(src_offset, 0);
+	nir_intrinsic_set_range(src_offset, 16);
+	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	src_offset->num_components = 2;
+	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+	nir_builder_instr_insert(&b, &src_offset->instr);
+
+	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(dst_offset, 0);
+	nir_intrinsic_set_range(dst_offset, 16);
+	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	dst_offset->num_components = 2;
+	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+	nir_builder_instr_insert(&b, &dst_offset->instr);
+
+	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
+
+	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+	nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
+
+	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+	tex->op = nir_texop_txf_ms;
+	tex->src[0].src_type = nir_tex_src_coord;
+	tex->src[0].src = nir_src_for_ssa(img_coord);
+	tex->src[1].src_type = nir_tex_src_ms_index;
+	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+	tex->src[2].src_type = nir_tex_src_texture_deref;
+	tex->src[2].src = nir_src_for_ssa(input_img_deref);
+	tex->dest_type = type;
+	tex->is_array = false;
+	tex->coord_components = 2;
+
+	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+	nir_builder_instr_insert(&b, &tex->instr);
+
+	nir_ssa_def *outval = &tex->dest.ssa;
+
+	if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+		for (int i = 1; i < samples; i++) {
+			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+			tex_add->op = nir_texop_txf_ms;
+			tex_add->src[0].src_type = nir_tex_src_coord;
+			tex_add->src[0].src = nir_src_for_ssa(img_coord);
+			tex_add->src[1].src_type = nir_tex_src_ms_index;
+			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+			tex_add->src[2].src_type = nir_tex_src_texture_deref;
+			tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+			tex_add->dest_type = type;
+			tex_add->is_array = false;
+			tex_add->coord_components = 2;
+
+			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+			nir_builder_instr_insert(&b, &tex_add->instr);
+
+			switch (resolve_mode) {
+			case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+				assert(index == DEPTH_RESOLVE);
+				outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+				break;
+			case VK_RESOLVE_MODE_MIN_BIT_KHR:
+				if (index == DEPTH_RESOLVE)
+					outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+				else
+					outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+				break;
+			case VK_RESOLVE_MODE_MAX_BIT_KHR:
+				if (index == DEPTH_RESOLVE)
+					outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+				else
+					outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+				break;
+			default:
+				unreachable("invalid resolve mode");
+			}
+		}
+
+		if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+			outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+	}
+
+	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
+	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+	store->num_components = 4;
+	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
+	store->src[1] = nir_src_for_ssa(coord);
+	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+	store->src[3] = nir_src_for_ssa(outval);
+	nir_builder_instr_insert(&b, &store->instr);
+	return b.shader;
+}
 
 static VkResult
 create_layout(struct radv_device *device)
@@ -248,6 +407,57 @@ fail:
 	return result;
 }
 
+static VkResult
+create_depth_stencil_resolve_pipeline(struct radv_device *device,
+				      int samples,
+				      int index,
+				      VkResolveModeFlagBitsKHR resolve_mode,
+				      VkPipeline *pipeline)
+{
+	VkResult result;
+	struct radv_shader_module cs = { .nir = NULL };
+
+	mtx_lock(&device->meta_state.mtx);
+	if (*pipeline) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
+	cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
+							    index, resolve_mode);
+
+	/* compute shader */
+	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = pipeline_shader_stage,
+		.flags = 0,
+		.layout = device->meta_state.resolve_compute.p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					     1, &vk_pipeline_info, NULL,
+					     pipeline);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	ralloc_free(cs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return VK_SUCCESS;
+fail:
+	ralloc_free(cs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return result;
+}
+
 VkResult
 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
 {
@@ -279,8 +489,56 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_
 		if (res != VK_SUCCESS)
 			goto fail;
 
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+							    &state->resolve_compute.depth[i].average_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_MAX_BIT_KHR,
+							    &state->resolve_compute.depth[i].max_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_MIN_BIT_KHR,
+							    &state->resolve_compute.depth[i].min_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    STENCIL_RESOLVE,
+							    VK_RESOLVE_MODE_MAX_BIT_KHR,
+							    &state->resolve_compute.stencil[i].max_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    STENCIL_RESOLVE,
+							    VK_RESOLVE_MODE_MIN_BIT_KHR,
+							    &state->resolve_compute.stencil[i].min_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
 	}
 
+	res = create_depth_stencil_resolve_pipeline(device, 0,
+						    DEPTH_RESOLVE,
+						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+						    &state->resolve_compute.depth_zero_pipeline);
+	if (res != VK_SUCCESS)
+		goto fail;
+
+	res = create_depth_stencil_resolve_pipeline(device, 0,
+						    STENCIL_RESOLVE,
+						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+						    &state->resolve_compute.stencil_zero_pipeline);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	return VK_SUCCESS;
 fail:
 	radv_device_finish_meta_resolve_compute_state(device);
@@ -303,8 +561,36 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
 		radv_DestroyPipeline(radv_device_to_handle(device),
 				     state->resolve_compute.rc[i].srgb_pipeline,
 				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].average_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].max_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].min_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.stencil[i].max_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.stencil[i].min_pipeline,
+				     &state->alloc);
 	}
 
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->resolve_compute.depth_zero_pipeline,
+			     &state->alloc);
+
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->resolve_compute.stencil_zero_pipeline,
+			     &state->alloc);
+
 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
 					state->resolve_compute.ds_layout,
 					&state->alloc);
@@ -411,6 +697,113 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 
 }
 
+static void
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
+			   struct radv_image_view *src_iview,
+			   struct radv_image_view *dest_iview,
+			   const VkOffset2D *src_offset,
+			   const VkOffset2D *dest_offset,
+			   const VkExtent2D *resolve_extent,
+			   VkImageAspectFlags aspects,
+			   VkResolveModeFlagBitsKHR resolve_mode)
+{
+	struct radv_device *device = cmd_buffer->device;
+	const uint32_t samples = src_iview->image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+	VkPipeline *pipeline;
+
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_COMPUTE,
+				      device->meta_state.resolve_compute.p_layout,
+				      0, /* set */
+				      2, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+					{
+						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+						.dstBinding = 0,
+						.dstArrayElement = 0,
+						.descriptorCount = 1,
+						.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+			                      .pImageInfo = (VkDescriptorImageInfo[]) {
+		                              {
+	                                      .sampler = VK_NULL_HANDLE,
+					      .imageView = radv_image_view_to_handle(src_iview),
+	                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL	                              },
+	                      }
+		              },
+		              {
+		                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+		                      .dstBinding = 1,
+		                      .dstArrayElement = 0,
+				      .descriptorCount = 1,
+				      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+	                      .pImageInfo = (VkDescriptorImageInfo[]) {
+                              {
+                                      .sampler = VK_NULL_HANDLE,
+                                     .imageView = radv_image_view_to_handle(dest_iview),
+                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                              },
+                      }
+			      }
+				      });
+
+	switch (resolve_mode) {
+	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+		break;
+	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+		assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+		pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+		break;
+	case VK_RESOLVE_MODE_MIN_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+		break;
+	case VK_RESOLVE_MODE_MAX_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+		break;
+	default:
+		unreachable("invalid resolve mode");
+	}
+
+	if (!*pipeline) {
+		int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+		VkResult ret;
+
+		ret = create_depth_stencil_resolve_pipeline(device, samples,
+							    index, resolve_mode,
+							    pipeline);
+		if (ret != VK_SUCCESS) {
+			cmd_buffer->record_result = ret;
+			return;
+		}
+	}
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+			     VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+
+	unsigned push_constants[4] = {
+		src_offset->x,
+		src_offset->y,
+		dest_offset->x,
+		dest_offset->y,
+	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.resolve_compute.p_layout,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+			      push_constants);
+	radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
+
+}
+
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
 				     VkFormat src_format,
@@ -561,3 +954,116 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
 	                                RADV_CMD_FLAG_INV_VMEM_L1;
 }
+
+void
+radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+				      VkImageAspectFlags aspects,
+				      VkResolveModeFlagBitsKHR resolve_mode)
+{
+	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	struct radv_meta_saved_state saved_state;
+	struct radv_subpass_barrier barrier;
+	uint32_t layer_count = fb->layers;
+
+	if (subpass->view_mask)
+		layer_count = util_last_bit(subpass->view_mask);
+
+	/* Resolves happen before the end-of-subpass barriers get executed, so
+	 * we have to make the attachment shader-readable.
+	 */
+	barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+	barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+	barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+	radv_subpass_barrier(cmd_buffer, &barrier);
+
+	radv_decompress_resolve_subpass_src(cmd_buffer);
+
+	radv_meta_save(&saved_state, cmd_buffer,
+		       RADV_META_SAVE_COMPUTE_PIPELINE |
+		       RADV_META_SAVE_CONSTANTS |
+		       RADV_META_SAVE_DESCRIPTORS);
+
+	struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+	struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+
+	struct radv_image_view *src_iview =
+		cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+	struct radv_image_view *dst_iview =
+		cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+
+	struct radv_image *src_image = src_iview->image;
+	struct radv_image *dst_image = dst_iview->image;
+
+	for (uint32_t layer = 0; layer < layer_count; layer++) {
+		struct radv_image_view tsrc_iview;
+		radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+				     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(src_image),
+					.viewType = radv_meta_get_view_type(src_image),
+					.format = src_iview->vk_format,
+					.subresourceRange = {
+						.aspectMask = aspects,
+						.baseMipLevel = src_iview->base_mip,
+						.levelCount = 1,
+						.baseArrayLayer = src_iview->base_layer + layer,
+						.layerCount = 1,
+					},
+				     });
+
+		struct radv_image_view tdst_iview;
+		radv_image_view_init(&tdst_iview, cmd_buffer->device,
+				     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(dst_image),
+					.viewType = radv_meta_get_view_type(dst_image),
+					.format = dst_iview->vk_format,
+					.subresourceRange = {
+						.aspectMask = aspects,
+						.baseMipLevel = dst_iview->base_mip,
+						.levelCount = 1,
+						.baseArrayLayer = dst_iview->base_layer + layer,
+						.layerCount = 1,
+					},
+				     });
+
+		emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+					   &(VkOffset2D) { 0, 0 },
+					   &(VkOffset2D) { 0, 0 },
+					   &(VkExtent2D) { fb->width, fb->height },
+					   aspects,
+					   resolve_mode);
+	}
+
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
+	if (radv_image_has_htile(dst_image)) {
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+			VkImageSubresourceRange range = {};
+			range.aspectMask = aspects;
+			range.baseMipLevel = dst_iview->base_mip;
+			range.levelCount = 1;
+			range.baseArrayLayer = dst_iview->base_layer;
+			range.layerCount = layer_count;
+
+			uint32_t clear_value = 0xfffc000f;
+
+			if (vk_format_is_stencil(dst_image->vk_format) &&
+			    subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+				/* Only clear the stencil part of the HTILE
+				 * buffer if it's resolved, otherwise this
+				 * might break if the stencil has been cleared.
+				 */
+				clear_value = 0xfffff30f;
+			}
+
+			cmd_buffer->state.flush_bits |=
+				radv_clear_htile(cmd_buffer, dst_image, &range,
+						 clear_value);
+		}
+	}
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ad36096e68b..5c42f4f418a 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -574,6 +574,19 @@ struct radv_meta_state {
 			VkPipeline                                i_pipeline;
 			VkPipeline                                srgb_pipeline;
 		} rc[MAX_SAMPLES_LOG2];
+
+		VkPipeline depth_zero_pipeline;
+		struct {
+			VkPipeline average_pipeline;
+			VkPipeline max_pipeline;
+			VkPipeline min_pipeline;
+		} depth[MAX_SAMPLES_LOG2];
+
+		VkPipeline stencil_zero_pipeline;
+		struct {
+			VkPipeline max_pipeline;
+			VkPipeline min_pipeline;
+		} stencil[MAX_SAMPLES_LOG2];
 	} resolve_compute;
 
 	struct {
@@ -1252,6 +1265,9 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
+void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+					   VkImageAspectFlags aspects,
+					   VkResolveModeFlagBitsKHR resolve_mode);
 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
 					   VkImageAspectFlags aspects,
-- 
2.22.0



More information about the mesa-dev mailing list