<div dir="ltr"><div>No, the correct backport is attached.</div><div><br></div><div>Marek<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Apr 23, 2019 at 2:51 PM Dylan Baker <<a href="mailto:dylan@pnwbakers.com">dylan@pnwbakers.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Hi Marek and Samuel,<br>
<br>
I've staged this for 19.0, but I had to fix some very minor rebase conflicts.<br>
I'm getting ready to make a release, could one of you take a peak at the tip of<br>
the staging/19.0 branch and let me know if what I did looks okay?<br>
<br>
Thanks,<br>
Dylan<br>
<br>
Quoting Samuel Pitoiset (2019-04-16 08:24:01)<br>
> I don't have much context for that issue, so:<br>
> <br>
> Acked-by: Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com" target="_blank">samuel.pitoiset@gmail.com</a>><br>
> <br>
> On 4/12/19 10:15 PM, Marek Ol\u0161 k wrote:<br>
> <br>
> Done locally.<br>
> <br>
> Marek<br>
> <br>
> On Fri, Apr 12, 2019 at 12:20 PM Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com" target="_blank">samuel.pitoiset@gmail.com</a><br>
> > wrote:<br>
> <br>
> I would suggest to document that workaround somewhere in the code.<br>
> <br>
> On 4/12/19 5:17 PM, Marek Ol\u0161 k wrote:<br>
> > From: Marek Ol\u0161 k <<a href="mailto:marek.olsak@amd.com" target="_blank">marek.olsak@amd.com</a>><br>
> ><br>
> > This is a workaround for a thread deadlock that I have no idea<br>
> > why it occurs.<br>
> ><br>
> > Bugzilla: <a href="https://bugs.freedesktop.org/show_bug.cgi?id=108879" rel="noreferrer" target="_blank">https://bugs.freedesktop.org/show_bug.cgi?id=108879</a><br>
> > Fixes: 9b331e462e5021d994859756d46cd2519d9c9c6e<br>
> > ---<br>
> > src/gallium/drivers/radeonsi/si_clear.c | 6 +++---<br>
> > src/gallium/drivers/radeonsi/si_compute_blit.c | 8 +++++---<br>
> > src/gallium/drivers/radeonsi/si_pipe.c | 2 +-<br>
> > src/gallium/drivers/radeonsi/si_pipe.h | 3 ++-<br>
> > src/gallium/drivers/radeonsi/si_test_dma.c | 2 +-<br>
> > 5 files changed, 12 insertions(+), 9 deletions(-)<br>
> ><br>
> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/<br>
> drivers/radeonsi/si_clear.c<br>
> > index e1805f2a1c9..ead680b857b 100644<br>
> > --- a/src/gallium/drivers/radeonsi/si_clear.c<br>
> > +++ b/src/gallium/drivers/radeonsi/si_clear.c<br>
> > @@ -256,21 +256,21 @@ void vi_dcc_clear_level(struct si_context<br>
> *sctx,<br>
> > * would be more efficient than separate per-layer<br>
> clear operations.<br>
> > */<br>
> > assert(tex->buffer.b.b.nr_storage_samples <= 2 ||<br>
> num_layers == 1);<br>
> > <br>
> > dcc_offset += tex->surface.u.legacy.level<br>
> [level].dcc_offset;<br>
> > clear_size = tex->surface.u.legacy.level<br>
> [level].dcc_fast_clear_size *<br>
> > num_layers;<br>
> > }<br>
> > <br>
> > si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,<br>
> > - &clear_value, 4, SI_COHERENCY_CB_META);<br>
> > + &clear_value, 4, SI_COHERENCY_CB_META, false);<br>
> > }<br>
> > <br>
> > /* Set the same micro tile mode as the destination of the last MSAA<br>
> resolve.<br>
> > * This allows hitting the MSAA resolve fast path, which requires<br>
> that both<br>
> > * src and dst micro tile modes match.<br>
> > */<br>
> > static void si_set_optimal_micro_tile_mode(struct si_screen<br>
> *sscreen,<br>
> > struct si_texture *tex)<br>
> > {<br>
> > if (tex->buffer.b.is_shared ||<br>
> > @@ -489,21 +489,21 @@ static void si_do_fast_color_clear(struct<br>
> si_context *sctx,<br>
> > <br>
> > /* DCC fast clear with MSAA should clear CMASK<br>
> to 0xC. */<br>
> > if (tex->buffer.b.b.nr_samples >= 2 && tex-><br>
> cmask_buffer) {<br>
> > /* TODO: This doesn't work with MSAA. *<br>
> /<br>
> > if (eliminate_needed)<br>
> > continue;<br>
> > <br>
> > uint32_t clear_value = 0xCCCCCCCC;<br>
> > si_clear_buffer(sctx, &tex-><br>
> cmask_buffer->b.b,<br>
> > tex->cmask_offset,<br>
> tex->surface.cmask_size,<br>
> > - &clear_value, 4,<br>
> SI_COHERENCY_CB_META);<br>
> > + &clear_value, 4,<br>
> SI_COHERENCY_CB_META, false);<br>
> > fmask_decompress_needed = true;<br>
> > }<br>
> > <br>
> > vi_dcc_clear_level(sctx, tex, 0, reset_value);<br>
> > tex->separate_dcc_dirty = true;<br>
> > } else {<br>
> > if (too_small)<br>
> > continue;<br>
> > <br>
> > /* 128-bit formats are unusupported */<br>
> > @@ -517,21 +517,21 @@ static void si_do_fast_color_clear(struct<br>
> si_context *sctx,<br>
> > <br>
> > /* ensure CMASK is enabled */<br>
> > si_alloc_separate_cmask(sctx->screen, tex);<br>
> > if (!tex->cmask_buffer)<br>
> > continue;<br>
> > <br>
> > /* Do the fast clear. */<br>
> > uint32_t clear_value = 0;<br>
> > si_clear_buffer(sctx, &tex->cmask_buffer->b.b,<br>
> > tex->cmask_offset, tex-><br>
> surface.cmask_size,<br>
> > - &clear_value, 4,<br>
> SI_COHERENCY_CB_META);<br>
> > + &clear_value, 4,<br>
> SI_COHERENCY_CB_META, false);<br>
> > eliminate_needed = true;<br>
> > }<br>
> > <br>
> > if ((eliminate_needed || fmask_decompress_needed) &&<br>
> > !(tex->dirty_level_mask & (1 << level))) {<br>
> > tex->dirty_level_mask |= 1 << level;<br>
> > p_atomic_inc(&sctx->screen-><br>
> compressed_colortex_counter);<br>
> > }<br>
> > <br>
> > /* We can change the micro tile mode before a full<br>
> clear. */<br>
> > diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/<br>
> gallium/drivers/radeonsi/si_compute_blit.c<br>
> > index 1abeac6adb0..fb0d8d2f1b6 100644<br>
> > --- a/src/gallium/drivers/radeonsi/si_compute_blit.c<br>
> > +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c<br>
> > @@ -179,21 +179,22 @@ static void si_compute_do_clear_or_copy(struct<br>
> si_context *sctx,<br>
> > <br>
> > /* Restore states. */<br>
> > ctx->bind_compute_state(ctx, saved_cs);<br>
> > ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 :<br>
> 1, saved_sb,<br>
> > saved_writable_mask);<br>
> > si_compute_internal_end(sctx);<br>
> > }<br>
> > <br>
> > void si_clear_buffer(struct si_context *sctx, struct pipe_resource<br>
> *dst,<br>
> > uint64_t offset, uint64_t size, uint32_t<br>
> *clear_value,<br>
> > - uint32_t clear_value_size, enum si_coherency<br>
> coher)<br>
> > + uint32_t clear_value_size, enum si_coherency<br>
> coher,<br>
> > + bool force_cpdma)<br>
> > {<br>
> > if (!size)<br>
> > return;<br>
> > <br>
> > unsigned clear_alignment = MIN2(clear_value_size, 4);<br>
> > <br>
> > assert(clear_value_size != 3 && clear_value_size != 6); /* 12<br>
> is allowed. */<br>
> > assert(offset % clear_alignment == 0);<br>
> > assert(size % clear_alignment == 0);<br>
> > assert(size < (UINT_MAX & ~0xf)); /* TODO: test 64-bit sizes in<br>
> all codepaths */<br>
> > @@ -243,21 +244,22 @@ void si_clear_buffer(struct si_context *sctx,<br>
> struct pipe_resource *dst,<br>
> > return;<br>
> > }<br>
> > <br>
> > uint64_t aligned_size = size & ~3ull;<br>
> > if (aligned_size >= 4) {<br>
> > /* Before GFX9, CP DMA was very slow when clearing GTT,<br>
> so never<br>
> > * use CP DMA clears on those chips, because we can't<br>
> be certain<br>
> > * about buffer placements.<br>
> > */<br>
> > if (clear_value_size > 4 ||<br>
> > - (clear_value_size == 4 &&<br>
> > + (!force_cpdma &&<br>
> > + clear_value_size == 4 &&<br>
> > offset % 4 == 0 &&<br>
> > (size > 32*1024 || sctx->chip_class <= VI))) {<br>
> > si_compute_do_clear_or_copy(sctx, dst, offset,<br>
> NULL, 0,<br>
> > aligned_size,<br>
> clear_value,<br>
> > clear_value_size,<br>
> coher);<br>
> > } else {<br>
> > assert(clear_value_size == 4);<br>
> > si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst,<br>
> offset,<br>
> > aligned_size,<br>
> *clear_value, 0, coher,<br>
> > get_cache_policy(sctx,<br>
> coher, size));<br>
> > @@ -277,21 +279,21 @@ void si_clear_buffer(struct si_context *sctx,<br>
> struct pipe_resource *dst,<br>
> > }<br>
> > }<br>
> > <br>
> > static void si_pipe_clear_buffer(struct pipe_context *ctx,<br>
> > struct pipe_resource *dst,<br>
> > unsigned offset, unsigned size,<br>
> > const void *clear_value,<br>
> > int clear_value_size)<br>
> > {<br>
> > si_clear_buffer((struct si_context*)ctx, dst, offset, size,<br>
> (uint32_t*)clear_value,<br>
> > - clear_value_size, SI_COHERENCY_SHADER);<br>
> > + clear_value_size, SI_COHERENCY_SHADER, false);<br>
> > }<br>
> > <br>
> > void si_copy_buffer(struct si_context *sctx,<br>
> > struct pipe_resource *dst, struct pipe_resource<br>
> *src,<br>
> > uint64_t dst_offset, uint64_t src_offset, unsigned<br>
> size)<br>
> > {<br>
> > if (!size)<br>
> > return;<br>
> > <br>
> > enum si_coherency coher = SI_COHERENCY_SHADER;<br>
> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/<br>
> drivers/radeonsi/si_pipe.c<br>
> > index 5caeb575623..5d376e6181a 100644<br>
> > --- a/src/gallium/drivers/radeonsi/si_pipe.c<br>
> > +++ b/src/gallium/drivers/radeonsi/si_pipe.c<br>
> > @@ -634,21 +634,21 @@ static struct pipe_context *si_create_context<br>
> (struct pipe_screen *screen,<br>
> > sizeof(sctx->sample_positions), &sctx-><br>
> sample_positions);<br>
> > <br>
> > /* this must be last */<br>
> > si_begin_new_gfx_cs(sctx);<br>
> > <br>
> > if (sctx->chip_class == CIK) {<br>
> > /* Clear the NULL constant buffer, because loads should<br>
> return zeros. */<br>
> > uint32_t clear_value = 0;<br>
> > si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,<br>
> > sctx->null_const_buf.buffer->width0,<br>
> > - &clear_value, 4, SI_COHERENCY_SHADER);<br>
> > + &clear_value, 4, SI_COHERENCY_SHADER,<br>
> true);<br>
> > }<br>
> > return &sctx->b;<br>
> > fail:<br>
> > fprintf(stderr, "radeonsi: Failed to create a context.\n");<br>
> > si_destroy_context(&sctx->b);<br>
> > return NULL;<br>
> > }<br>
> > <br>
> > static struct pipe_context *si_pipe_create_context(struct<br>
> pipe_screen *screen,<br>
> > void *priv, unsigned<br>
> flags)<br>
> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/<br>
> drivers/radeonsi/si_pipe.h<br>
> > index 301d38649bf..aaa95f32d20 100644<br>
> > --- a/src/gallium/drivers/radeonsi/si_pipe.h<br>
> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> > @@ -1182,21 +1182,22 @@ bool vi_alpha_is_on_msb(enum pipe_format<br>
> format);<br>
> > void vi_dcc_clear_level(struct si_context *sctx,<br>
> > struct si_texture *tex,<br>
> > unsigned level, unsigned clear_value);<br>
> > void si_init_clear_functions(struct si_context *sctx);<br>
> > <br>
> > /* si_compute_blit.c */<br>
> > unsigned si_get_flush_flags(struct si_context *sctx, enum<br>
> si_coherency coher,<br>
> > enum si_cache_policy cache_policy);<br>
> > void si_clear_buffer(struct si_context *sctx, struct pipe_resource<br>
> *dst,<br>
> > uint64_t offset, uint64_t size, uint32_t<br>
> *clear_value,<br>
> > - uint32_t clear_value_size, enum si_coherency<br>
> coher);<br>
> > + uint32_t clear_value_size, enum si_coherency<br>
> coher,<br>
> > + bool force_cpdma);<br>
> > void si_copy_buffer(struct si_context *sctx,<br>
> > struct pipe_resource *dst, struct pipe_resource<br>
> *src,<br>
> > uint64_t dst_offset, uint64_t src_offset, unsigned<br>
> size);<br>
> > void si_compute_copy_image(struct si_context *sctx,<br>
> > struct pipe_resource *dst,<br>
> > unsigned dst_level,<br>
> > struct pipe_resource *src,<br>
> > unsigned src_level,<br>
> > unsigned dstx, unsigned dsty, unsigned dstz,<br>
> > const struct pipe_box *src_box);<br>
> > diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium<br>
> /drivers/radeonsi/si_test_dma.c<br>
> > index 90a2032cd80..7e396e671be 100644<br>
> > --- a/src/gallium/drivers/radeonsi/si_test_dma.c<br>
> > +++ b/src/gallium/drivers/radeonsi/si_test_dma.c<br>
> > @@ -302,21 +302,21 @@ void si_test_dma(struct si_screen *sscreen)<br>
> > tsrc.width0, tsrc.height0, tsrc.array_size,<br>
> > array_mode_to_string(sscreen, &ssrc->surface),<br>
> bpp);<br>
> > fflush(stdout);<br>
> > <br>
> > /* set src pixels */<br>
> > set_random_pixels(ctx, src, &src_cpu);<br>
> > <br>
> > /* clear dst pixels */<br>
> > uint32_t zero = 0;<br>
> > si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size,<br>
> &zero, 4,<br>
> > - SI_COHERENCY_SHADER);<br>
> > + SI_COHERENCY_SHADER, false);<br>
> > memset(dst_cpu.ptr, 0, dst_cpu.layer_stride *<br>
> tdst.array_size);<br>
> > <br>
> > /* preparation */<br>
> > max_width = MIN2(tsrc.width0, tdst.width0);<br>
> > max_height = MIN2(tsrc.height0, tdst.height0);<br>
> > max_depth = MIN2(tsrc.array_size, tdst.array_size);<br>
> > <br>
> > num = do_partial_copies ? num_partial_copies : 1;<br>
> > for (j = 0; j < num; j++) {<br>
> > int width, height, depth;<br>
> <br>
</blockquote></div>