<div dir="auto">It's pushed already.<div dir="auto"><br></div><div dir="auto">Marek</div></div><br><div class="gmail_quote"><div dir="ltr">On Mon, Feb 11, 2019, 11:41 PM Dieter Nützel <<a href="mailto:Dieter@nuetzel-hh.de">Dieter@nuetzel-hh.de</a> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Maybe rebase?<br>
<br>
Dieter<br>
<br>
Am 24.01.2019 00:28, schrieb Marek Olšák:<br>
> From: Sonny Jiang <<a href="mailto:sonny.jiang@amd.com" target="_blank" rel="noreferrer">sonny.jiang@amd.com</a>><br>
> <br>
> Signed-off-by: Sonny Jiang <<a href="mailto:sonny.jiang@amd.com" target="_blank" rel="noreferrer">sonny.jiang@amd.com</a>><br>
> Signed-off-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank" rel="noreferrer">marek.olsak@amd.com</a>><br>
> ---<br>
> src/gallium/drivers/radeonsi/si_clear.c | 6 ++<br>
> .../drivers/radeonsi/si_compute_blit.c | 96 +++++++++++++++++++<br>
> src/gallium/drivers/radeonsi/si_pipe.c | 4 +<br>
> src/gallium/drivers/radeonsi/si_pipe.h | 9 ++<br>
> .../drivers/radeonsi/si_shaderlib_tgsi.c | 69 +++++++++++++<br>
> 5 files changed, 184 insertions(+)<br>
> <br>
> diff --git a/src/gallium/drivers/radeonsi/si_clear.c<br>
> b/src/gallium/drivers/radeonsi/si_clear.c<br>
> index b3910a4651c..8afc01f2ccc 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_clear.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_clear.c<br>
> @@ -664,20 +664,26 @@ static void si_clear(struct pipe_context *ctx,<br>
> unsigned buffers,<br>
> }<br>
> <br>
> static void si_clear_render_target(struct pipe_context *ctx,<br>
> struct pipe_surface *dst,<br>
> const union pipe_color_union *color,<br>
> unsigned dstx, unsigned dsty,<br>
> unsigned width, unsigned height,<br>
> bool render_condition_enabled)<br>
> {<br>
> struct si_context *sctx = (struct si_context *)ctx;<br>
> + struct si_texture *sdst = (struct si_texture*)dst->texture;<br>
> +<br>
> + if (dst->texture->nr_samples <= 1 && !sdst->dcc_offset) {<br>
> + si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, <br>
> height);<br>
> + return;<br>
> + }<br>
> <br>
> si_blitter_begin(sctx, SI_CLEAR_SURFACE |<br>
> (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));<br>
> util_blitter_clear_render_target(sctx->blitter, dst, color,<br>
> dstx, dsty, width, height);<br>
> si_blitter_end(sctx);<br>
> }<br>
> <br>
> static void si_clear_depth_stencil(struct pipe_context *ctx,<br>
> struct pipe_surface *dst,<br>
> diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c<br>
> b/src/gallium/drivers/radeonsi/si_compute_blit.c<br>
> index 38c48c30be9..f06497f4dac 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_compute_blit.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c<br>
> @@ -18,20 +18,21 @@<br>
> * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT <br>
> SHALL<br>
> * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,<br>
> * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT <br>
> OR<br>
> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE <br>
> OR THE<br>
> * USE OR OTHER DEALINGS IN THE SOFTWARE.<br>
> *<br>
> */<br>
> <br>
> #include "si_pipe.h"<br>
> #include "util/u_format.h"<br>
> +#include "util/format_srgb.h"<br>
> <br>
> /* Note: Compute shaders always use SI_COMPUTE_DST_CACHE_POLICY for <br>
> dst<br>
> * and L2_STREAM for src.<br>
> */<br>
> static enum si_cache_policy get_cache_policy(struct si_context *sctx,<br>
> enum si_coherency coher,<br>
> uint64_t size)<br>
> {<br>
> if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META ||<br>
> coher == SI_COHERENCY_CP)) ||<br>
> @@ -418,10 +419,105 @@ void si_compute_copy_image(struct si_context <br>
> *sctx,<br>
> ctx->bind_compute_state(ctx, saved_cs);<br>
> ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);<br>
> ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);<br>
> si_compute_internal_end(sctx);<br>
> }<br>
> <br>
> void si_init_compute_blit_functions(struct si_context *sctx)<br>
> {<br>
> sctx->b.clear_buffer = si_pipe_clear_buffer;<br>
> }<br>
> +<br>
> +/* Clear a region of a color surface to a constant value. */<br>
> +void si_compute_clear_render_target(struct pipe_context *ctx,<br>
> + struct pipe_surface *dstsurf,<br>
> + const union pipe_color_union *color,<br>
> + unsigned dstx, unsigned dsty,<br>
> + unsigned width, unsigned height)<br>
> +{<br>
> + struct si_context *sctx = (struct si_context *)ctx;<br>
> + unsigned num_layers = dstsurf->u.tex.last_layer -<br>
> dstsurf->u.tex.first_layer + 1;<br>
> + unsigned data[4 + sizeof(color->ui)] = {dstx, dsty,<br>
> dstsurf->u.tex.first_layer, 0};<br>
> +<br>
> + if (width == 0 || height == 0)<br>
> + return;<br>
> +<br>
> + if (util_format_is_srgb(dstsurf->format)) {<br>
> + union pipe_color_union color_srgb;<br>
> + for (int i = 0; i < 3; i++)<br>
> + color_srgb.f[i] = util_format_linear_to_srgb_float(color->f[i]);<br>
> + color_srgb.f[3] = color->f[3];<br>
> + memcpy(data + 4, color_srgb.ui, sizeof(color->ui));<br>
> + } else {<br>
> + memcpy(data + 4, color->ui, sizeof(color->ui));<br>
> + }<br>
> +<br>
> + si_compute_internal_begin(sctx);<br>
> + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |<br>
> + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);<br>
> + si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true);<br>
> +<br>
> + struct pipe_constant_buffer saved_cb = {};<br>
> + si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);<br>
> +<br>
> + struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];<br>
> + struct pipe_image_view saved_image = {0};<br>
> + util_copy_image_view(&saved_image, &images->views[0]);<br>
> +<br>
> + void *saved_cs = sctx->cs_shader_state.program;<br>
> +<br>
> + struct pipe_constant_buffer cb = {};<br>
> + cb.buffer_size = sizeof(data);<br>
> + cb.user_buffer = data;<br>
> + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb);<br>
> +<br>
> + struct pipe_image_view image = {0};<br>
> + image.resource = dstsurf->texture;<br>
> + image.shader_access = image.access = PIPE_IMAGE_ACCESS_WRITE;<br>
> + image.format = util_format_linear(dstsurf->format);<br>
> + image.u.tex.level = dstsurf->u.tex.level;<br>
> + image.u.tex.first_layer = 0; /* 3D images ignore first_layer <br>
> (BASE_ARRAY) */<br>
> + image.u.tex.last_layer = dstsurf->u.tex.last_layer;<br>
> +<br>
> + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image);<br>
> +<br>
> + struct pipe_grid_info info = {0};<br>
> +<br>
> + if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) {<br>
> + if (!sctx->cs_clear_render_target)<br>
> + sctx->cs_clear_render_target = si_clear_render_target_shader(ctx);<br>
> + ctx->bind_compute_state(ctx, sctx->cs_clear_render_target);<br>
> + info.block[0] = 8;<br>
> + sctx->compute_last_block[0] = width % 8;<br>
> + info.block[1] = 8;<br>
> + sctx->compute_last_block[1] = height % 8;<br>
> + info.block[2] = 1;<br>
> + info.grid[0] = DIV_ROUND_UP(width, 8);<br>
> + info.grid[1] = DIV_ROUND_UP(height, 8);<br>
> + info.grid[2] = num_layers;<br>
> + } else {<br>
> + if (!sctx->cs_clear_render_target_1d_array)<br>
> + sctx->cs_clear_render_target_1d_array =<br>
> + si_clear_render_target_shader_1d_array(ctx);<br>
> + ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array);<br>
> + info.block[0] = 64;<br>
> + sctx->compute_last_block[0] = width % 64;<br>
> + info.block[1] = 1;<br>
> + info.block[2] = 1;<br>
> + info.grid[0] = DIV_ROUND_UP(width, 64);<br>
> + info.grid[1] = num_layers;<br>
> + info.grid[2] = 1;<br>
> + }<br>
> +<br>
> + ctx->launch_grid(ctx, &info);<br>
> +<br>
> + sctx->compute_last_block[0] = 0;<br>
> + sctx->compute_last_block[1] = 0;<br>
> +<br>
> + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |<br>
> + (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) <br>
> |<br>
> + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);<br>
> + ctx->bind_compute_state(ctx, saved_cs);<br>
> + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);<br>
> + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);<br>
> + si_compute_internal_end(sctx);<br>
> +}<br>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c<br>
> b/src/gallium/drivers/radeonsi/si_pipe.c<br>
> index 41d395d7d3f..439b550c4cf 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_pipe.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c<br>
> @@ -198,20 +198,24 @@ static void si_destroy_context(struct<br>
> pipe_context *context)<br>
> if (sctx->vs_blit_texcoord)<br>
> sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_texcoord);<br>
> if (sctx->cs_clear_buffer)<br>
> sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer);<br>
> if (sctx->cs_copy_buffer)<br>
> sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);<br>
> if (sctx->cs_copy_image)<br>
> sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image);<br>
> if (sctx->cs_copy_image_1d_array)<br>
> sctx->b.delete_compute_state(&sctx->b, <br>
> sctx->cs_copy_image_1d_array);<br>
> + if (sctx->cs_clear_render_target)<br>
> + sctx->b.delete_compute_state(&sctx->b, <br>
> sctx->cs_clear_render_target);<br>
> + if (sctx->cs_clear_render_target_1d_array)<br>
> + sctx->b.delete_compute_state(&sctx->b,<br>
> sctx->cs_clear_render_target_1d_array);<br>
> <br>
> if (sctx->blitter)<br>
> util_blitter_destroy(sctx->blitter);<br>
> <br>
> /* Release DCC stats. */<br>
> for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) {<br>
> assert(!sctx->dcc_stats[i].query_active);<br>
> <br>
> for (int j = 0; j < ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++)<br>
> if (sctx->dcc_stats[i].ps_stats[j])<br>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h<br>
> b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> index 3a5d9d2fbd2..437144316d0 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_pipe.h<br>
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> @@ -805,20 +805,22 @@ struct si_context {<br>
> void *custom_blend_dcc_decompress;<br>
> void *vs_blit_pos;<br>
> void *vs_blit_pos_layered;<br>
> void *vs_blit_color;<br>
> void *vs_blit_color_layered;<br>
> void *vs_blit_texcoord;<br>
> void *cs_clear_buffer;<br>
> void *cs_copy_buffer;<br>
> void *cs_copy_image;<br>
> void *cs_copy_image_1d_array;<br>
> + void *cs_clear_render_target;<br>
> + void *cs_clear_render_target_1d_array;<br>
> struct si_screen *screen;<br>
> struct pipe_debug_callback debug;<br>
> struct ac_llvm_compiler compiler; /* only non-threaded compilation <br>
> */<br>
> struct si_shader_ctx_state fixed_func_tcs_shader;<br>
> struct si_resource *wait_mem_scratch;<br>
> unsigned wait_mem_number;<br>
> uint16_t prefetch_L2_mask;<br>
> <br>
> bool gfx_flush_in_progress:1;<br>
> bool gfx_last_ib_is_busy:1;<br>
> @@ -1182,20 +1184,25 @@ void si_clear_buffer(struct si_context *sctx,<br>
> struct pipe_resource *dst,<br>
> void si_copy_buffer(struct si_context *sctx,<br>
> struct pipe_resource *dst, struct pipe_resource *src,<br>
> uint64_t dst_offset, uint64_t src_offset, unsigned size);<br>
> void si_compute_copy_image(struct si_context *sctx,<br>
> struct pipe_resource *dst,<br>
> unsigned dst_level,<br>
> struct pipe_resource *src,<br>
> unsigned src_level,<br>
> unsigned dstx, unsigned dsty, unsigned dstz,<br>
> const struct pipe_box *src_box);<br>
> +void si_compute_clear_render_target(struct pipe_context *ctx,<br>
> + struct pipe_surface *dstsurf,<br>
> + const union pipe_color_union <br>
> *color,<br>
> + unsigned dstx, unsigned dsty,<br>
> + unsigned width, unsigned height);<br>
> void si_init_compute_blit_functions(struct si_context *sctx);<br>
> <br>
> /* si_cp_dma.c */<br>
> #define SI_CPDMA_SKIP_CHECK_CS_SPACE (1 << 0) /* don't call <br>
> need_cs_space */<br>
> #define SI_CPDMA_SKIP_SYNC_AFTER (1 << 1) /* don't wait for DMA after<br>
> the copy */<br>
> #define SI_CPDMA_SKIP_SYNC_BEFORE (1 << 2) /* don't wait for DMA<br>
> before the copy (RAW hazards) */<br>
> #define SI_CPDMA_SKIP_GFX_SYNC (1 << 3) /* don't flush caches and<br>
> don't wait for PS/CS */<br>
> #define SI_CPDMA_SKIP_BO_LIST_UPDATE (1 << 4) /* don't update the BO <br>
> list */<br>
> #define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \<br>
> SI_CPDMA_SKIP_SYNC_AFTER | \<br>
> @@ -1297,20 +1304,22 @@ void si_resume_queries(struct si_context <br>
> *sctx);<br>
> <br>
> /* si_shaderlib_tgsi.c */<br>
> void *si_get_blitter_vs(struct si_context *sctx, enum <br>
> blitter_attrib_type type,<br>
> unsigned num_layers);<br>
> void *si_create_fixed_func_tcs(struct si_context *sctx);<br>
> void *si_create_dma_compute_shader(struct pipe_context *ctx,<br>
> unsigned num_dwords_per_thread,<br>
> bool dst_stream_cache_policy, bool is_copy);<br>
> void *si_create_copy_image_compute_shader(struct pipe_context *ctx);<br>
> void *si_create_copy_image_compute_shader_1d_array(struct pipe_context <br>
> *ctx);<br>
> +void *si_clear_render_target_shader(struct pipe_context *ctx);<br>
> +void *si_clear_render_target_shader_1d_array(struct pipe_context <br>
> *ctx);<br>
> void *si_create_query_result_cs(struct si_context *sctx);<br>
> <br>
> /* si_test_dma.c */<br>
> void si_test_dma(struct si_screen *sscreen);<br>
> <br>
> /* si_test_clearbuffer.c */<br>
> void si_test_dma_perf(struct si_screen *sscreen);<br>
> <br>
> /* si_uvd.c */<br>
> struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context <br>
> *context,<br>
> diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c<br>
> b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c<br>
> index 55f96b3a25e..91a23b1d7ed 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c<br>
> @@ -509,10 +509,79 @@ void<br>
> *si_create_copy_image_compute_shader_1d_array(struct pipe_context<br>
> *ctx)<br>
> if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {<br>
> assert(false);<br>
> return NULL;<br>
> }<br>
> <br>
> state.ir_type = PIPE_SHADER_IR_TGSI;<br>
> state.prog = tokens;<br>
> <br>
> return ctx->create_compute_state(ctx, &state);<br>
> }<br>
> +<br>
> +void *si_clear_render_target_shader(struct pipe_context *ctx)<br>
> +{<br>
> + static const char text[] =<br>
> + "COMP\n"<br>
> + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"<br>
> + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"<br>
> + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"<br>
> + "DCL SV[0], THREAD_ID\n"<br>
> + "DCL SV[1], BLOCK_ID\n"<br>
> + "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"<br>
> + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw<br>
> + "DCL TEMP[0..3], LOCAL\n"<br>
> + "IMM[0] UINT32 {8, 1, 0, 0}\n"<br>
> + "MOV TEMP[0].xyz, CONST[0][0].xyzw\n"<br>
> + "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n"<br>
> + "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n"<br>
> + "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n"<br>
> + "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 2D_ARRAY,<br>
> PIPE_FORMAT_R32G32B32A32_FLOAT\n"<br>
> + "END\n";<br>
> +<br>
> + struct tgsi_token tokens[1024];<br>
> + struct pipe_compute_state state = {0};<br>
> +<br>
> + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {<br>
> + assert(false);<br>
> + return NULL;<br>
> + }<br>
> +<br>
> + state.ir_type = PIPE_SHADER_IR_TGSI;<br>
> + state.prog = tokens;<br>
> +<br>
> + return ctx->create_compute_state(ctx, &state);<br>
> +}<br>
> +<br>
> +/* TODO: Didn't really test 1D_ARRAY */<br>
> +void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx)<br>
> +{<br>
> + static const char text[] =<br>
> + "COMP\n"<br>
> + "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n"<br>
> + "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"<br>
> + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"<br>
> + "DCL SV[0], THREAD_ID\n"<br>
> + "DCL SV[1], BLOCK_ID\n"<br>
> + "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"<br>
> + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw<br>
> + "DCL TEMP[0..3], LOCAL\n"<br>
> + "IMM[0] UINT32 {64, 1, 0, 0}\n"<br>
> + "MOV TEMP[0].xy, CONST[0][0].xzzw\n"<br>
> + "UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n"<br>
> + "UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n"<br>
> + "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n"<br>
> + "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 1D_ARRAY,<br>
> PIPE_FORMAT_R32G32B32A32_FLOAT\n"<br>
> + "END\n";<br>
> +<br>
> + struct tgsi_token tokens[1024];<br>
> + struct pipe_compute_state state = {0};<br>
> +<br>
> + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {<br>
> + assert(false);<br>
> + return NULL;<br>
> + }<br>
> +<br>
> + state.ir_type = PIPE_SHADER_IR_TGSI;<br>
> + state.prog = tokens;<br>
> +<br>
> + return ctx->create_compute_state(ctx, &state);<br>
> +}<br>
</blockquote></div>