<div dir="ltr"><div dir="ltr"><div>I'll remove the env var.</div><div><br></div><div>Additionally, I'm amending this:</div><div><br></div><div>diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c<br>index 3f8db7cf4f0..4936eb5a5b1 100644<br>--- a/src/gallium/drivers/radeonsi/si_buffer.c<br>+++ b/src/gallium/drivers/radeonsi/si_buffer.c<br>@@ -461,10 +461,20 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,<br> si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||<br> !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {<br> /* Do a wait-free write-only transfer using a temporary buffer. */<br>- unsigned offset;<br>+ struct u_upload_mgr *uploader;<br> struct si_resource *staging = NULL;<br>+ unsigned offset;<br>+<br>+ /* If we are not called from the driver thread, we have<br>+ * to use the uploader from u_threaded_context, which is<br>+ * local to the calling thread.<br>+ */<br>+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)<br>+ uploader = sctx->tc->base.stream_uploader;<br>+ else<br>+ uploader = sctx->b.stream_uploader;<br> <br>- u_upload_alloc(ctx->stream_uploader, 0,<br>+ u_upload_alloc(uploader, 0,<br> box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT),<br> sctx->screen->info.tcc_cache_line_size,<br> &offset, (struct pipe_resource**)&staging,<br></div><div><br></div><div>Marek<br></div><div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, Feb 11, 2019 at 4:38 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">On 07.02.19 02:22, Marek Olšák wrote:<br>
> + bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->dma_cs && debug_get_bool_option("SDMA", true);<br>
<br>
Could you please namespace the environment variable, e.g. RADEONSI_SDMA?<br>
<br>
Apart from that, series is<br>
<br>
Reviewed-by: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
<br>
> + sctx->b.const_uploader = u_upload_create(&sctx->b, 256 * 1024,<br>
> + 0, PIPE_USAGE_DEFAULT,<br>
> + SI_RESOURCE_FLAG_32BIT |<br>
> + (use_sdma_upload ?<br>
> + SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA :<br>
> + (sscreen->cpdma_prefetch_writes_memory ?<br>
> + 0 : SI_RESOURCE_FLAG_READ_ONLY)));<br>
> + if (!sctx->b.const_uploader)<br>
> + goto fail;<br>
> +<br>
> + if (use_sdma_upload)<br>
> + u_upload_enable_flush_explicit(sctx->b.const_uploader);<br>
> +<br>
> si_init_buffer_functions(sctx);<br>
> si_init_clear_functions(sctx);<br>
> si_init_blit_functions(sctx);<br>
> si_init_compute_functions(sctx);<br>
> si_init_compute_blit_functions(sctx);<br>
> si_init_debug_functions(sctx);<br>
> si_init_msaa_functions(sctx);<br>
> si_init_streamout_functions(sctx);<br>
> <br>
> if (sscreen->info.has_hw_decode) {<br>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> index b01d5744752..b208bdeb848 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_pipe.h<br>
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> @@ -103,20 +103,22 @@<br>
> #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024<br>
> <br>
> #define SI_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)<br>
> #define SI_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)<br>
> #define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)<br>
> #define SI_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)<br>
> #define SI_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)<br>
> #define SI_RESOURCE_FLAG_READ_ONLY (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)<br>
> #define SI_RESOURCE_FLAG_32BIT (PIPE_RESOURCE_FLAG_DRV_PRIV << 6)<br>
> #define SI_RESOURCE_FLAG_CLEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 7)<br>
> +/* For const_uploader, upload data via GTT and copy to VRAM on context flush via SDMA. */<br>
> +#define SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA (PIPE_RESOURCE_FLAG_DRV_PRIV << 8)<br>
> <br>
> enum si_clear_code<br>
> {<br>
> DCC_CLEAR_COLOR_0000 = 0x00000000,<br>
> DCC_CLEAR_COLOR_0001 = 0x40404040,<br>
> DCC_CLEAR_COLOR_1110 = 0x80808080,<br>
> DCC_CLEAR_COLOR_1111 = 0xC0C0C0C0,<br>
> DCC_CLEAR_COLOR_REG = 0x20202020,<br>
> DCC_UNCOMPRESSED = 0xFFFFFFFF,<br>
> };<br>
> @@ -769,20 +771,28 @@ struct si_saved_cs {<br>
> struct si_context *ctx;<br>
> struct radeon_saved_cs gfx;<br>
> struct si_resource *trace_buf;<br>
> unsigned trace_id;<br>
> <br>
> unsigned gfx_last_dw;<br>
> bool flushed;<br>
> int64_t time_flush;<br>
> };<br>
> <br>
> +struct si_sdma_upload {<br>
> + struct si_resource *dst;<br>
> + struct si_resource *src;<br>
> + unsigned src_offset;<br>
> + unsigned dst_offset;<br>
> + unsigned size;<br>
> +};<br>
> +<br>
> struct si_context {<br>
> struct pipe_context b; /* base class */<br>
> <br>
> enum radeon_family family;<br>
> enum chip_class chip_class;<br>
> <br>
> struct radeon_winsys *ws;<br>
> struct radeon_winsys_ctx *ctx;<br>
> struct radeon_cmdbuf *gfx_cs;<br>
> struct radeon_cmdbuf *dma_cs;<br>
> @@ -1074,20 +1084,26 @@ struct si_context {<br>
> int num_perfect_occlusion_queries;<br>
> struct list_head active_queries;<br>
> unsigned num_cs_dw_queries_suspend;<br>
> <br>
> /* Render condition. */<br>
> struct pipe_query *render_cond;<br>
> unsigned render_cond_mode;<br>
> bool render_cond_invert;<br>
> bool render_cond_force_off; /* for u_blitter */<br>
> <br>
> + /* For uploading data via GTT and copy to VRAM on context flush via SDMA. */<br>
> + bool sdma_uploads_in_progress;<br>
> + struct si_sdma_upload *sdma_uploads;<br>
> + unsigned num_sdma_uploads;<br>
> + unsigned max_sdma_uploads;<br>
> +<br>
> /* Statistics gathering for the DCC enablement heuristic. It can't be<br>
> * in si_texture because si_texture can be shared by multiple<br>
> * contexts. This is for back buffers only. We shouldn't get too many<br>
> * of those.<br>
> *<br>
> * X11 DRI3 rotates among a finite set of back buffers. They should<br>
> * all fit in this array. If they don't, separate DCC might never be<br>
> * enabled by DCC stat gathering.<br>
> */<br>
> struct {<br>
> @@ -1273,20 +1289,21 @@ struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,<br>
> struct tc_unflushed_batch_token *tc_token);<br>
> <br>
> /* si_get.c */<br>
> void si_init_screen_get_functions(struct si_screen *sscreen);<br>
> <br>
> /* si_gfx_cs.c */<br>
> void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,<br>
> struct pipe_fence_handle **fence);<br>
> void si_begin_new_gfx_cs(struct si_context *ctx);<br>
> void si_need_gfx_cs_space(struct si_context *ctx);<br>
> +void si_unref_sdma_uploads(struct si_context *sctx);<br>
> <br>
> /* si_gpu_load.c */<br>
> void si_gpu_load_kill_thread(struct si_screen *sscreen);<br>
> uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);<br>
> unsigned si_end_counter(struct si_screen *sscreen, unsigned type,<br>
> uint64_t begin);<br>
> <br>
> /* si_compute.c */<br>
> void si_init_compute_functions(struct si_context *sctx);<br>
> <br>
> <br>
<br>
-- <br>
Lerne, wie die Welt wirklich ist,<br>
Aber vergiss niemals, wie sie sein sollte.<br>
</blockquote></div></div></div></div>