[Mesa-dev] [PATCH 3/3] radv: Add on-demand compilation of built-in shaders.
Dave Airlie
airlied at gmail.com
Tue Aug 14 05:22:14 UTC 2018
On Tue, 14 Aug 2018 at 09:01, Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> wrote:
>
> In environments where we cannot cache, e.g. Android (no homedir),
> ChromeOS (readonly rootfs) or sandboxes (cannot open cache), the
> startup cost of creating a device in radv is rather high, due
> to compiling all possible built-in pipelines up front. This meant
> depending on the CPU a 1-4 sec cost of creating a Device.
>
> For CTS this cost is unacceptable, and likely for starting random
> apps too.
>
> So if there is no cache, with this patch radv will compile shaders
> on demand. Once there is a cache from the first run, even if
> incomplete, the driver knows that it can likely write the cache
> and precompiles everything.
>
> Note that I did not switch the buffer and itob/btoi compute pipelines
> to on-demand, since you cannot really do anything in Vulkan without
> them and there are only a few.
>
> This reduces the CTS runtime for the no caches scenario on my
> threadripper from 32 minutes to 8 minutes.
> ---
> src/amd/vulkan/radv_meta.c | 35 ++++++----
> src/amd/vulkan/radv_meta.h | 18 ++---
> src/amd/vulkan/radv_meta_blit.c | 86 ++++++++++++++++++------
> src/amd/vulkan/radv_meta_blit2d.c | 69 ++++++++++++++++++-
> src/amd/vulkan/radv_meta_clear.c | 96 +++++++++++++++++++++++++--
> src/amd/vulkan/radv_meta_decompress.c | 32 ++++++++-
> src/amd/vulkan/radv_meta_fast_clear.c | 29 +++++++-
> src/amd/vulkan/radv_meta_resolve.c | 48 +++++++++++++-
> src/amd/vulkan/radv_meta_resolve_cs.c | 34 ++++++++--
> src/amd/vulkan/radv_meta_resolve_fs.c | 31 +++++++--
> src/amd/vulkan/radv_pipeline_cache.c | 16 +++--
> src/amd/vulkan/radv_private.h | 8 ++-
> src/amd/vulkan/radv_query.c | 32 +++++++--
> 13 files changed, 454 insertions(+), 80 deletions(-)
> - result = radv_device_init_meta_fast_clear_flush_state(device);
> + result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
> if (result != VK_SUCCESS)
> goto fail_fast_clear;
>
> - result = radv_device_init_meta_resolve_compute_state(device);
> + result = radv_device_init_meta_resolve_compute_state(device, on_demand);
> if (result != VK_SUCCESS)
> goto fail_resolve_compute;
>
> - result = radv_device_init_meta_resolve_fragment_state(device);
> + result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
> if (result != VK_SUCCESS)
> goto fail_resolve_fragment;
> return VK_SUCCESS;
> @@ -407,6 +413,7 @@ fail_blit:
> fail_resolve:
> radv_device_finish_meta_clear_state(device);
> fail_clear:
> + mtx_destroy(&device->meta_state.mtx);
You destroy the mutex here in the error path, but you don't destroy it
in the normal cleanup
path from what I can see.
>
> +static enum glsl_sampler_dim
> +translate_sampler_dim(VkImageType type) {
> + switch(type) {
> + case VK_IMAGE_TYPE_1D:
> + return GLSL_SAMPLER_DIM_1D;
> + case VK_IMAGE_TYPE_2D:
> + return GLSL_SAMPLER_DIM_2D;
> + case VK_IMAGE_TYPE_3D:
> + return GLSL_SAMPLER_DIM_3D;
> + default:
> + unreachable("Unhandled image type");
> + }
> +}
> +
> static void
> meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
> struct radv_image *src_image,
> @@ -333,11 +354,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
> .height = dst_height,
> .layers = 1,
> }, &cmd_buffer->pool->alloc, &fb);
> - VkPipeline pipeline;
> + VkPipeline* pipeline = NULL;
> + unsigned fs_key = 0;
> switch (src_iview->aspect_mask) {
> case VK_IMAGE_ASPECT_COLOR_BIT: {
> - unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
> unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
> + fs_key = radv_format_meta_fs_key(dest_image->vk_format);
>
> radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
> &(VkRenderPassBeginInfo) {
> @@ -353,13 +375,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
> }, VK_SUBPASS_CONTENTS_INLINE);
> switch (src_image->type) {
> case VK_IMAGE_TYPE_1D:
> - pipeline = device->meta_state.blit.pipeline_1d_src[fs_key];
> + pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
> break;
> case VK_IMAGE_TYPE_2D:
> - pipeline = device->meta_state.blit.pipeline_2d_src[fs_key];
> + pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
> break;
> case VK_IMAGE_TYPE_3D:
> - pipeline = device->meta_state.blit.pipeline_3d_src[fs_key];
> + pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
> break;
> default:
> unreachable(!"bad VkImageType");
> @@ -382,13 +404,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
> }, VK_SUBPASS_CONTENTS_INLINE);
> switch (src_image->type) {
> case VK_IMAGE_TYPE_1D:
> - pipeline = device->meta_state.blit.depth_only_1d_pipeline;
> + pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
> break;
> case VK_IMAGE_TYPE_2D:
> - pipeline = device->meta_state.blit.depth_only_2d_pipeline;
> + pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
> break;
> case VK_IMAGE_TYPE_3D:
> - pipeline = device->meta_state.blit.depth_only_3d_pipeline;
> + pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
> break;
> default:
> unreachable(!"bad VkImageType");
> @@ -411,13 +433,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
> }, VK_SUBPASS_CONTENTS_INLINE);
> switch (src_image->type) {
> case VK_IMAGE_TYPE_1D:
> - pipeline = device->meta_state.blit.stencil_only_1d_pipeline;
> + pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
> break;
> case VK_IMAGE_TYPE_2D:
> - pipeline = device->meta_state.blit.stencil_only_2d_pipeline;
> + pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
> break;
> case VK_IMAGE_TYPE_3D:
> - pipeline = device->meta_state.blit.stencil_only_3d_pipeline;
> + pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
> break;
> default:
> unreachable(!"bad VkImageType");
> @@ -428,8 +450,16 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
> unreachable(!"bad VkImageType");
> }
>
> + if (!*pipeline) {
> + VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + goto fail_pipeline;
> + }
> + }
> +
> radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
> - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
> + VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
>
> radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
> device->meta_state.blit.pipeline_layout,
> @@ -471,6 +501,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
>
> radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
>
> +fail_pipeline:
> radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
>
> /* At the point where we emit the draw call, all data from the
> @@ -722,6 +753,14 @@ build_pipeline(struct radv_device *device,
> VkPipeline *pipeline)
> {
> VkResult result = VK_SUCCESS;
> +
> + mtx_lock(&device->meta_state.mtx);
> +
> + if (*pipeline) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> struct radv_shader_module fs = {0};
> struct radv_shader_module vs = {.nir = build_nir_vertex_shader()};
> VkRenderPass rp;
> @@ -871,11 +910,12 @@ build_pipeline(struct radv_device *device,
> &device->meta_state.alloc, pipeline);
> ralloc_free(vs.nir);
> ralloc_free(fs.nir);
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> static VkResult
> -radv_device_init_meta_blit_color(struct radv_device *device)
> +radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
> {
> VkResult result;
>
> @@ -917,6 +957,9 @@ radv_device_init_meta_blit_color(struct radv_device *device)
> goto fail;
> }
>
> + if (on_demand)
> + continue;
> +
> result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]);
> if (result != VK_SUCCESS)
> goto fail;
> @@ -937,7 +980,7 @@ fail:
> }
>
> static VkResult
> -radv_device_init_meta_blit_depth(struct radv_device *device)
> +radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
> {
> VkResult result;
>
> @@ -974,6 +1017,9 @@ radv_device_init_meta_blit_depth(struct radv_device *device)
> goto fail;
> }
>
> + if (on_demand)
> + return VK_SUCCESS;
> +
> result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline);
> if (result != VK_SUCCESS)
> goto fail;
> @@ -991,7 +1037,7 @@ fail:
> }
>
> static VkResult
> -radv_device_init_meta_blit_stencil(struct radv_device *device)
> +radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
> {
> VkResult result;
>
> @@ -1028,6 +1074,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device)
> if (result != VK_SUCCESS)
> goto fail;
>
> + if (on_demand)
> + return VK_SUCCESS;
>
> result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline);
> if (result != VK_SUCCESS)
> @@ -1047,7 +1095,7 @@ fail:
> }
>
> VkResult
> -radv_device_init_meta_blit_state(struct radv_device *device)
> +radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand)
> {
> VkResult result;
>
> @@ -1086,15 +1134,15 @@ radv_device_init_meta_blit_state(struct radv_device *device)
> if (result != VK_SUCCESS)
> goto fail;
>
> - result = radv_device_init_meta_blit_color(device);
> + result = radv_device_init_meta_blit_color(device, on_demand);
> if (result != VK_SUCCESS)
> goto fail;
>
> - result = radv_device_init_meta_blit_depth(device);
> + result = radv_device_init_meta_blit_depth(device, on_demand);
> if (result != VK_SUCCESS)
> goto fail;
>
> - result = radv_device_init_meta_blit_stencil(device);
> + result = radv_device_init_meta_blit_stencil(device, on_demand);
>
> fail:
> if (result != VK_SUCCESS)
> diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c
> index 79652856942..d2975532d4b 100644
> --- a/src/amd/vulkan/radv_meta_blit2d.c
> +++ b/src/amd/vulkan/radv_meta_blit2d.c
> @@ -35,6 +35,22 @@ enum blit2d_src_type {
> BLIT2D_NUM_SRC_TYPES,
> };
>
> +static VkResult
> +blit2d_init_color_pipeline(struct radv_device *device,
> + enum blit2d_src_type src_type,
> + VkFormat format,
> + uint32_t log2_samples);
> +
> +static VkResult
> +blit2d_init_depth_only_pipeline(struct radv_device *device,
> + enum blit2d_src_type src_type,
> + uint32_t log2_samples);
> +
> +static VkResult
> +blit2d_init_stencil_only_pipeline(struct radv_device *device,
> + enum blit2d_src_type src_type,
> + uint32_t log2_samples);
> +
> static void
> create_iview(struct radv_cmd_buffer *cmd_buffer,
> struct radv_meta_blit2d_surf *surf,
> @@ -268,6 +284,14 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
> unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
> unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
>
> + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
> + VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + goto fail_pipeline;
> + }
> + }
> +
> radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
> &(VkRenderPassBeginInfo) {
> .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
> @@ -285,6 +309,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
> bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
> } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
> enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
> +
> + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
> + VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + goto fail_pipeline;
> + }
> + }
> +
> radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
> &(VkRenderPassBeginInfo) {
> .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
> @@ -303,6 +336,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
>
> } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
> enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
> +
> + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
> + VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + goto fail_pipeline;
> + }
> + }
> +
> radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
> &(VkRenderPassBeginInfo) {
> .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
> @@ -357,6 +399,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
> radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
> radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
>
> +fail_pipeline:
> /* At the point where we emit the draw call, all data from the
> * descriptor sets, etc. has been used. We are free to delete it.
> */
> @@ -737,6 +780,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
> unsigned fs_key = radv_format_meta_fs_key(format);
> const char *name;
>
> + mtx_lock(&device->meta_state.mtx);
> + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> texel_fetch_build_func src_func;
> switch(src_type) {
> case BLIT2D_SRC_TYPE_IMAGE:
> @@ -894,6 +943,7 @@ blit2d_init_color_pipeline(struct radv_device *device,
> ralloc_free(vs.nir);
> ralloc_free(fs.nir);
>
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> @@ -905,6 +955,12 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
> VkResult result;
> const char *name;
>
> + mtx_lock(&device->meta_state.mtx);
> + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> texel_fetch_build_func src_func;
> switch(src_type) {
> case BLIT2D_SRC_TYPE_IMAGE:
> @@ -1057,6 +1113,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
> ralloc_free(vs.nir);
> ralloc_free(fs.nir);
>
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> @@ -1068,6 +1125,12 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
> VkResult result;
> const char *name;
>
> + mtx_lock(&device->meta_state.mtx);
> + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> texel_fetch_build_func src_func;
> switch(src_type) {
> case BLIT2D_SRC_TYPE_IMAGE:
> @@ -1236,6 +1299,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
> ralloc_free(vs.nir);
> ralloc_free(fs.nir);
>
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> @@ -1287,7 +1351,7 @@ fail:
> }
>
> VkResult
> -radv_device_init_meta_blit2d_state(struct radv_device *device)
> +radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand)
> {
> VkResult result;
> bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
> @@ -1305,6 +1369,9 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
> if (result != VK_SUCCESS)
> goto fail;
>
> + if (on_demand)
> + continue;
> +
> for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
> result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
> if (result != VK_SUCCESS)
> diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
> index 4f77e32b83f..0ae7191f17d 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -200,7 +200,13 @@ create_color_renderpass(struct radv_device *device,
> uint32_t samples,
> VkRenderPass *pass)
> {
> - return radv_CreateRenderPass(radv_device_to_handle(device),
> + mtx_lock(&device->meta_state.mtx);
> + if (*pass) {
> + mtx_unlock (&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> + VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
> &(VkRenderPassCreateInfo) {
> .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
> .attachmentCount = 1,
> @@ -231,6 +237,8 @@ create_color_renderpass(struct radv_device *device,
> },
> .dependencyCount = 0,
> }, &device->meta_state.alloc, pass);
> + mtx_unlock(&device->meta_state.mtx);
> + return result;
> }
>
> static VkResult
> @@ -243,6 +251,13 @@ create_color_pipeline(struct radv_device *device,
> struct nir_shader *vs_nir;
> struct nir_shader *fs_nir;
> VkResult result;
> +
> + mtx_lock(&device->meta_state.mtx);
> + if (*pipeline) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> build_color_shaders(&vs_nir, &fs_nir, frag_output);
>
> const VkPipelineVertexInputStateCreateInfo vi_state = {
> @@ -284,6 +299,7 @@ create_color_pipeline(struct radv_device *device,
> device->meta_state.clear_color_p_layout,
> &extra, &device->meta_state.alloc, pipeline);
>
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> @@ -349,6 +365,26 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
> return;
> }
>
> + if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
> + VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
> + samples,
> + &device->meta_state.clear[samples_log2].render_pass[fs_key]);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + return;
> + }
> + }
> +
> + if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
> + VkResult ret = create_color_pipeline(device, samples, 0,
> + &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
> + device->meta_state.clear[samples_log2].render_pass[fs_key]);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + return;
> + }
> + }
> +
> pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
> if (!pipeline) {
> radv_finishme("color clears incomplete");
> @@ -449,7 +485,13 @@ create_depthstencil_renderpass(struct radv_device *device,
> uint32_t samples,
> VkRenderPass *render_pass)
> {
> - return radv_CreateRenderPass(radv_device_to_handle(device),
> + mtx_lock(&device->meta_state.mtx);
> + if (*render_pass) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> + VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
> &(VkRenderPassCreateInfo) {
> .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
> .attachmentCount = 1,
> @@ -477,6 +519,8 @@ create_depthstencil_renderpass(struct radv_device *device,
> },
> .dependencyCount = 0,
> }, &device->meta_state.alloc, render_pass);
> + mtx_unlock(&device->meta_state.mtx);
> + return result;
> }
>
> static VkResult
> @@ -489,6 +533,13 @@ create_depthstencil_pipeline(struct radv_device *device,
> {
> struct nir_shader *vs_nir, *fs_nir;
> VkResult result;
> +
> + mtx_lock(&device->meta_state.mtx);
> + if (*pipeline) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> build_depthstencil_shader(&vs_nir, &fs_nir);
>
> const VkPipelineVertexInputStateCreateInfo vi_state = {
> @@ -536,6 +587,8 @@ create_depthstencil_pipeline(struct radv_device *device,
> samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
> device->meta_state.clear_depth_p_layout,
> &extra, &device->meta_state.alloc, pipeline);
> +
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> @@ -579,6 +632,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
> {
> bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
> int index = DEPTH_CLEAR_SLOW;
> + VkPipeline *pipeline;
>
> if (fast) {
> /* we don't know the previous clear values, so we always have
> @@ -588,13 +642,36 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
>
> switch (aspects) {
> case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
> - return meta_state->clear[samples_log2].depthstencil_pipeline[index];
> + pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index];
> + break;
> case VK_IMAGE_ASPECT_DEPTH_BIT:
> - return meta_state->clear[samples_log2].depth_only_pipeline[index];
> + pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index];
> + break;
> case VK_IMAGE_ASPECT_STENCIL_BIT:
> - return meta_state->clear[samples_log2].stencil_only_pipeline[index];
> + pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index];
> + break;
> + default:
> + unreachable("expected depth or stencil aspect");
> + }
> +
> + if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
> + VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
> + &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + return VK_NULL_HANDLE;
> + }
> }
> - unreachable("expected depth or stencil aspect");
> +
> + if (*pipeline == VK_NULL_HANDLE) {
> + VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index,
> + pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + return VK_NULL_HANDLE;
> + }
> + }
> + return *pipeline;
> }
>
> static void
> @@ -638,6 +715,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
> subpass->depth_stencil_attachment.layout,
> clear_rect,
> clear_value);
> + if (!pipeline)
> + return;
>
> radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
> pipeline);
> @@ -758,7 +837,7 @@ fail:
> }
>
> VkResult
> -radv_device_init_meta_clear_state(struct radv_device *device)
> +radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
> {
> VkResult res;
> struct radv_meta_state *state = &device->meta_state;
> @@ -791,6 +870,9 @@ radv_device_init_meta_clear_state(struct radv_device *device)
> if (res != VK_SUCCESS)
> goto fail;
>
> + if (on_demand)
> + return VK_SUCCESS;
> +
> for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
> uint32_t samples = 1 << i;
> for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
> diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c
> index 1a8058c7cc5..41ed7b6d043 100644
> --- a/src/amd/vulkan/radv_meta_decompress.c
> +++ b/src/amd/vulkan/radv_meta_decompress.c
> @@ -103,6 +103,18 @@ create_pipeline(struct radv_device *device,
> {
> VkResult result;
> VkDevice device_h = radv_device_to_handle(device);
> + struct radv_shader_module vs_module = {0};
> +
> + mtx_lock(&device->meta_state.mtx);
> + if (*decompress_pipeline) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> + if (!vs_module_h) {
> + vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
> + vs_module_h = radv_shader_module_to_handle(&vs_module);
> + }
>
> struct radv_shader_module fs_module = {
> .nir = radv_meta_build_nir_fs_noop(),
> @@ -219,6 +231,9 @@ create_pipeline(struct radv_device *device,
>
> cleanup:
> ralloc_free(fs_module.nir);
> + if (vs_module.nir)
> + ralloc_free(vs_module.nir);
> + mtx_unlock(&device->meta_state.mtx);
> return result;
> }
>
> @@ -244,7 +259,7 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
> }
>
> VkResult
> -radv_device_init_meta_depth_decomp_state(struct radv_device *device)
> +radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
> {
> struct radv_meta_state *state = &device->meta_state;
> VkResult res = VK_SUCCESS;
> @@ -270,6 +285,9 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
> if (res != VK_SUCCESS)
> goto fail;
>
> + if (on_demand)
> + continue;
> +
> res = create_pipeline(device, vs_module_h, samples,
> state->depth_decomp[i].pass,
> state->depth_decomp[i].p_layout,
> @@ -343,6 +361,18 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
> if (!radv_image_has_htile(image))
> return;
>
> + if (!meta_state->depth_decomp[samples_log2].decompress_pipeline) {
> + VkResult ret = create_pipeline(cmd_buffer->device, NULL, samples,
> + meta_state->depth_decomp[samples_log2].pass,
> + meta_state->depth_decomp[samples_log2].p_layout,
> + &meta_state->depth_decomp[samples_log2].decompress_pipeline,
> + &meta_state->depth_decomp[samples_log2].resummarize_pipeline);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + return;
> + }
> + }
> +
> radv_meta_save(&saved_state, cmd_buffer,
> RADV_META_SAVE_GRAPHICS_PIPELINE |
> RADV_META_SAVE_PASS);
> diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
> index b42a6783fd2..f469a9ee8f8 100644
> --- a/src/amd/vulkan/radv_meta_fast_clear.c
> +++ b/src/amd/vulkan/radv_meta_fast_clear.c
> @@ -489,11 +489,17 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
> &state->alloc);
> }
>
> -VkResult
> -radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)
> +static VkResult
> +radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
> {
> VkResult res = VK_SUCCESS;
>
> + mtx_lock(&device->meta_state.mtx);
> + if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
> + mtx_unlock(&device->meta_state.mtx);
> + return VK_SUCCESS;
> + }
> +
> struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
> if (!vs_module.nir) {
> /* XXX: Need more accurate error */
> @@ -527,10 +533,21 @@ fail:
>
> cleanup:
> ralloc_free(vs_module.nir);
> + mtx_unlock(&device->meta_state.mtx);
>
> return res;
> }
>
> +
> +VkResult
> +radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
> +{
> + if (on_demand)
> + return VK_SUCCESS;
> +
> + return radv_device_init_meta_fast_clear_flush_state_internal(device);
> +}
> +
> static void
> emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
> const VkExtent2D *resolve_extent,
> @@ -591,6 +608,14 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
>
> assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
>
> + if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
> + VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
> + if (ret != VK_SUCCESS) {
> + cmd_buffer->record_result = ret;
> + return;
> + }
> + }
> +
> radv_meta_save(&saved_state, cmd_buffer,
> RADV_META_SAVE_GRAPHICS_PIPELINE |
> RADV_META_SAVE_PASS);
> diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c
> index 30fed974414..f7cd7876701 100644
> --- a/src/amd/vulkan/radv_meta_resolve.c
> +++ b/src/amd/vulkan/radv_meta_resolve.c
> @@ -252,8 +252,11 @@ radv_device_finish_meta_resolve_state(struct radv_device *device)
> }
>
> VkResult
> -radv_device_init_meta_resolve_state(struct radv_device *device)
> +radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
> {
> + if (on_demand)
> + return VK_SUCCESS;
> +
> VkResult res = VK_SUCCESS;
> struct radv_meta_state *state = &device->meta_state;
> struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
> @@ -353,6 +356,36 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
> }
> }
>
> +static VkResult
> +build_resolve_pipeline(struct radv_device *device,
> + unsigned fs_key)
> +{
> + VkResult result = VK_SUCCESS;
> +
> + if (device->meta_state.resolve.pipeline[fs_key])
> + return result;
> +
^ whitespace.
Otherwise for the series.
Reviewed-by: Dave Airlie <airlied at redhat.com>
More information about the mesa-dev
mailing list