[Mesa-dev] [PATCH 1/2] radeonsi: always use compute rings for clover on CI and newer (v2)
Marek Olšák
maraeo at gmail.com
Tue Feb 26 23:34:26 UTC 2019
I ran a simple test verifying that compute is working properly on the
compute ring.
When clover is using compute rings, it doesn't stall/block graphics
operations.
Marek
On Tue, Feb 26, 2019 at 4:10 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:
> Can you add a bit of background why clover should/should not use other
> rings?
>
> I planned to test this, but my raven system can't run clover since kernel
> 4.20 release (BZ 109649), so I need to bisect that first.
> Can this patch help address the soft lockup issue on CIK (BZ 108879)?
> presumably, it was tested using clover on CIK, right?
>
> Jan
>
> On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo at gmail.com> wrote:
>
>> I'll just push it.
>>
>> Marek
>>
>> On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter at nuetzel-hh.de>
>> wrote:
>>
>>> Hello Marek,
>>>
>>> this series need a rebase (if you have some time).
>>>
>>> Dieter
>>>
>>> Am 12.02.2019 19:12, schrieb Marek Olšák:
>>> > From: Marek Olšák <marek.olsak at amd.com>
>>> >
>>> > initialize all non-compute context functions to NULL.
>>> >
>>> > v2: fix SI
>>> > ---
>>> > src/gallium/drivers/radeonsi/si_blit.c | 14 ++-
>>> > src/gallium/drivers/radeonsi/si_clear.c | 7 +-
>>> > src/gallium/drivers/radeonsi/si_compute.c | 15 +--
>>> > src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
>>> > src/gallium/drivers/radeonsi/si_gfx_cs.c | 29 +++---
>>> > src/gallium/drivers/radeonsi/si_pipe.c | 95 +++++++++++--------
>>> > src/gallium/drivers/radeonsi/si_pipe.h | 3 +-
>>> > src/gallium/drivers/radeonsi/si_state.c | 3 +-
>>> > src/gallium/drivers/radeonsi/si_state.h | 1 +
>>> > src/gallium/drivers/radeonsi/si_state_draw.c | 25 +++--
>>> > src/gallium/drivers/radeonsi/si_texture.c | 3 +
>>> > 11 files changed, 130 insertions(+), 75 deletions(-)
>>> >
>>> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>>> > b/src/gallium/drivers/radeonsi/si_blit.c
>>> > index bb8d1cbd12d..f39cb5d143f 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_blit.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
>>> > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
>>> > pipe_context *ctx,
>>> >
>>> > if (separate_dcc_dirty) {
>>> > tex->separate_dcc_dirty = false;
>>> > vi_separate_dcc_process_and_reset_stats(ctx,
>>> tex);
>>> > }
>>> > }
>>> > }
>>> >
>>> > void si_decompress_dcc(struct si_context *sctx, struct si_texture
>>> > *tex)
>>> > {
>>> > - if (!tex->dcc_offset)
>>> > + /* If graphics is disabled, we can't decompress DCC, but it
>>> shouldn't
>>> > + * be compressed either. The caller should simply discard it.
>>> > + */
>>> > + if (!tex->dcc_offset || !sctx->has_graphics)
>>> > return;
>>> >
>>> > si_blit_decompress_color(sctx, tex, 0,
>>> tex->buffer.b.b.last_level,
>>> > 0, util_max_layer(&tex->buffer.b.b, 0),
>>> > true);
>>> > }
>>> >
>>> > void si_init_blit_functions(struct si_context *sctx)
>>> > {
>>> > sctx->b.resource_copy_region = si_resource_copy_region;
>>> > - sctx->b.blit = si_blit;
>>> > - sctx->b.flush_resource = si_flush_resource;
>>> > - sctx->b.generate_mipmap = si_generate_mipmap;
>>> > +
>>> > + if (sctx->has_graphics) {
>>> > + sctx->b.blit = si_blit;
>>> > + sctx->b.flush_resource = si_flush_resource;
>>> > + sctx->b.generate_mipmap = si_generate_mipmap;
>>> > + }
>>> > }
>>> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
>>> > b/src/gallium/drivers/radeonsi/si_clear.c
>>> > index 9a00bb73b94..e1805f2a1c9 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_clear.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_clear.c
>>> > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context
>>> > *pipe,
>>> > util_clear_render_target(pipe, sf, &color,
>>> > box->x, box->y,
>>> > box->width,
>>> box->height);
>>> > }
>>> > }
>>> > pipe_surface_reference(&sf, NULL);
>>> > }
>>> >
>>> > void si_init_clear_functions(struct si_context *sctx)
>>> > {
>>> > - sctx->b.clear = si_clear;
>>> > sctx->b.clear_render_target = si_clear_render_target;
>>> > - sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>>> > sctx->b.clear_texture = si_clear_texture;
>>> > +
>>> > + if (sctx->has_graphics) {
>>> > + sctx->b.clear = si_clear;
>>> > + sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>>> > + }
>>> > }
>>> > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>>> > b/src/gallium/drivers/radeonsi/si_compute.c
>>> > index 1a62b3e0844..87addd53976 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_compute.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_compute.c
>>> > @@ -880,26 +880,28 @@ static void si_launch_grid(
>>> > info->block[0] * info->block[1] * info->block[2] > 256;
>>> >
>>> > if (cs_regalloc_hang)
>>> > sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>>> > SI_CONTEXT_CS_PARTIAL_FLUSH;
>>> >
>>> > if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
>>> > program->shader.compilation_failed)
>>> > return;
>>> >
>>> > - if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>>> > - si_update_fb_dirtiness_after_rendering(sctx);
>>> > - sctx->last_num_draw_calls = sctx->num_draw_calls;
>>> > - }
>>> > + if (sctx->has_graphics) {
>>> > + if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>>> > + si_update_fb_dirtiness_after_rendering(sctx);
>>> > + sctx->last_num_draw_calls = sctx->num_draw_calls;
>>> > + }
>>> >
>>> > - si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>>> > + si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>>> > + }
>>> >
>>> > /* Add buffer sizes for memory checking in need_cs_space. */
>>> > si_context_add_resource_size(sctx, &program->shader.bo->b.b);
>>> > /* TODO: add the scratch buffer */
>>> >
>>> > if (info->indirect) {
>>> > si_context_add_resource_size(sctx, info->indirect);
>>> >
>>> > /* Indirect buffers use TC L2 on GFX9, but not older hw.
>>> */
>>> > if (sctx->chip_class <= VI &&
>>> > @@ -917,21 +919,22 @@ static void si_launch_grid(
>>> > if (sctx->flags)
>>> > si_emit_cache_flush(sctx);
>>> >
>>> > if (!si_switch_compute_shader(sctx, program, &program->shader,
>>> > code_object, info->pc))
>>> > return;
>>> >
>>> > si_upload_compute_shader_descriptors(sctx);
>>> > si_emit_compute_shader_pointers(sctx);
>>> >
>>> > - if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>>> > + if (sctx->has_graphics &&
>>> > + si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>>> > sctx->atoms.s.render_cond.emit(sctx);
>>> > si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
>>> false);
>>> > }
>>> >
>>> > if ((program->input_size ||
>>> > program->ir_type == PIPE_SHADER_IR_NATIVE) &&
>>> > unlikely(!si_upload_compute_input(sctx, code_object,
>>> > info))) {
>>> > return;
>>> > }
>>> >
>>> > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > index 21d4ca946d3..0f22c55723c 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > @@ -2640,22 +2640,24 @@ void
>>> > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
>>> >
>>> > sctx->num_resident_handles += num_resident_tex_handles +
>>> > num_resident_img_handles;
>>> > }
>>> >
>>> > /* INIT/DEINIT/UPLOAD */
>>> >
>>> > void si_init_all_descriptors(struct si_context *sctx)
>>> > {
>>> > int i;
>>> > + unsigned first_shader =
>>> > + sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
>>> >
>>> > - for (i = 0; i < SI_NUM_SHADERS; i++) {
>>> > + for (i = first_shader; i < SI_NUM_SHADERS; i++) {
>>> > bool is_2nd = sctx->chip_class >= GFX9 &&
>>> > (i == PIPE_SHADER_TESS_CTRL ||
>>> > i == PIPE_SHADER_GEOMETRY);
>>> > unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
>>> SI_NUM_SAMPLERS;
>>> > unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
>>> > SI_NUM_CONST_BUFFERS;
>>> > int rel_dw_offset;
>>> > struct si_descriptors *desc;
>>> >
>>> > if (is_2nd) {
>>> > if (i == PIPE_SHADER_TESS_CTRL) {
>>> > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context
>>> > *sctx)
>>> > si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
>>> >
>>> SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
>>> > 1024);
>>> >
>>> > sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
>>> >
>>> > /* Set pipe_context functions. */
>>> > sctx->b.bind_sampler_states = si_bind_sampler_states;
>>> > sctx->b.set_shader_images = si_set_shader_images;
>>> > sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
>>> > - sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>>> > sctx->b.set_shader_buffers = si_set_shader_buffers;
>>> > sctx->b.set_sampler_views = si_set_sampler_views;
>>> > sctx->b.create_texture_handle = si_create_texture_handle;
>>> > sctx->b.delete_texture_handle = si_delete_texture_handle;
>>> > sctx->b.make_texture_handle_resident =
>>> > si_make_texture_handle_resident;
>>> > sctx->b.create_image_handle = si_create_image_handle;
>>> > sctx->b.delete_image_handle = si_delete_image_handle;
>>> > sctx->b.make_image_handle_resident =
>>> si_make_image_handle_resident;
>>> >
>>> > + if (!sctx->has_graphics)
>>> > + return;
>>> > +
>>> > + sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>>> > +
>>> > /* Shader user data. */
>>> > sctx->atoms.s.shader_pointers.emit =
>>> > si_emit_graphics_shader_pointers;
>>> >
>>> > /* Set default and immutable mappings. */
>>> > si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
>>> > R_00B130_SPI_SHADER_USER_DATA_VS_0);
>>> >
>>> > if (sctx->chip_class >= GFX9) {
>>> > si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
>>> >
>>> R_00B430_SPI_SHADER_USER_DATA_LS_0);
>>> > si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
>>> > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > index 3d64587fa2b..d0e7cf20b4c 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
>>> > unsigned flags,
>>> > * This code is only needed when the driver flushes the GFX IB
>>> > * internally, and it never asks for a fence handle.
>>> > */
>>> > if (radeon_emitted(ctx->dma_cs, 0)) {
>>> > assert(fence == NULL); /* internal flushes only */
>>> > si_flush_dma_cs(ctx, flags, NULL);
>>> > }
>>> >
>>> > ctx->gfx_flush_in_progress = true;
>>> >
>>> > - if (!LIST_IS_EMPTY(&ctx->active_queries))
>>> > - si_suspend_queries(ctx);
>>> > -
>>> > - ctx->streamout.suspended = false;
>>> > - if (ctx->streamout.begin_emitted) {
>>> > - si_emit_streamout_end(ctx);
>>> > - ctx->streamout.suspended = true;
>>> > + if (ctx->has_graphics) {
>>> > + if (!LIST_IS_EMPTY(&ctx->active_queries))
>>> > + si_suspend_queries(ctx);
>>> > +
>>> > + ctx->streamout.suspended = false;
>>> > + if (ctx->streamout.begin_emitted) {
>>> > + si_emit_streamout_end(ctx);
>>> > + ctx->streamout.suspended = true;
>>> > + }
>>> > }
>>> >
>>> > /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
>>> > * because the kernel doesn't wait for it. */
>>> > if (ctx->chip_class >= CIK)
>>> > si_cp_dma_wait_for_idle(ctx);
>>> >
>>> > /* Wait for draw calls to finish if needed. */
>>> > if (wait_flags) {
>>> > ctx->flags |= wait_flags;
>>> > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>>> > * IB starts drawing.
>>> > *
>>> > * TODO: Do we also need to invalidate CB & DB caches?
>>> > */
>>> > ctx->flags |= SI_CONTEXT_INV_ICACHE |
>>> > SI_CONTEXT_INV_SMEM_L1 |
>>> > SI_CONTEXT_INV_VMEM_L1 |
>>> > SI_CONTEXT_INV_GLOBAL_L2 |
>>> > SI_CONTEXT_START_PIPELINE_STATS;
>>> >
>>> > + ctx->cs_shader_state.initialized = false;
>>> > + si_all_descriptors_begin_new_cs(ctx);
>>> > + si_all_resident_buffers_begin_new_cs(ctx);
>>> > +
>>> > + if (!ctx->has_graphics) {
>>> > + ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
>>> > + return;
>>> > + }
>>> > +
>>> > /* set all valid group as dirty so they get reemited on
>>> > * next draw command
>>> > */
>>> > si_pm4_reset_emitted(ctx);
>>> >
>>> > /* The CS initialization should be emitted before everything
>>> else. */
>>> > si_pm4_emit(ctx, ctx->init_config);
>>> > if (ctx->init_config_gs_rings)
>>> > si_pm4_emit(ctx, ctx->init_config_gs_rings);
>>> >
>>> > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
>>> > if (ctx->chip_class >= GFX9)
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
>>> > /* CLEAR_STATE disables all window rectangles. */
>>> > if (!has_clear_state || ctx->num_window_rectangles > 0)
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
>>> > - si_all_descriptors_begin_new_cs(ctx);
>>> > - si_all_resident_buffers_begin_new_cs(ctx);
>>> >
>>> > ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>>> > ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>>> > ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS)
>>> - 1;
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
>>> >
>>> > si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
>>> > if (ctx->scratch_buffer) {
>>> > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>>> > ctx->last_multi_vgt_param = -1;
>>> > ctx->last_rast_prim = -1;
>>> > ctx->last_sc_line_stipple = ~0;
>>> > ctx->last_vs_state = ~0;
>>> > ctx->last_ls = NULL;
>>> > ctx->last_tcs = NULL;
>>> > ctx->last_tes_sh_base = -1;
>>> > ctx->last_num_tcs_input_cp = -1;
>>> > ctx->last_ls_hs_config = -1; /* impossible value */
>>> >
>>> > - ctx->cs_shader_state.initialized = false;
>>> > -
>>> > if (has_clear_state) {
>>> >
>>> ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] =
>>> > 0x00000000;
>>> > ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
>>> =
>>> > 0x00000000;
>>> >
>>> ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
>>> > 0x00000000;
>>> >
>>> ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] =
>>> > 0x00000000;
>>> > ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
>>> 0xffffffff;
>>> > ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
>>> 0x00000000;
>>> >
>>> ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] =
>>> > 0x00000000;
>>> >
>>> ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
>>> > 0x00000000;
>>> >
>>> ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
>>> > 0x00000000;
>>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>>> > b/src/gallium/drivers/radeonsi/si_pipe.c
>>> > index 20767c806d2..c2ec664d5a4 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> > @@ -381,61 +381,56 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> > {
>>> > struct si_context *sctx = CALLOC_STRUCT(si_context);
>>> > struct si_screen* sscreen = (struct si_screen *)screen;
>>> > struct radeon_winsys *ws = sscreen->ws;
>>> > int shader, i;
>>> > bool stop_exec_on_failure = (flags &
>>> > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
>>> >
>>> > if (!sctx)
>>> > return NULL;
>>> >
>>> > + sctx->has_graphics = sscreen->info.chip_class == SI ||
>>> > + !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
>>> > +
>>> > if (flags & PIPE_CONTEXT_DEBUG)
>>> > sscreen->record_llvm_ir = true; /* racy but not critical
>>> */
>>> >
>>> > sctx->b.screen = screen; /* this must be set first */
>>> > sctx->b.priv = NULL;
>>> > sctx->b.destroy = si_destroy_context;
>>> > - sctx->b.emit_string_marker = si_emit_string_marker;
>>> > - sctx->b.set_debug_callback = si_set_debug_callback;
>>> > - sctx->b.set_log_context = si_set_log_context;
>>> > - sctx->b.set_context_param = si_set_context_param;
>>> > sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
>>> > sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
>>> >
>>> > slab_create_child(&sctx->pool_transfers,
>>> &sscreen->pool_transfers);
>>> > slab_create_child(&sctx->pool_transfers_unsync,
>>> > &sscreen->pool_transfers);
>>> >
>>> > sctx->ws = sscreen->ws;
>>> > sctx->family = sscreen->info.family;
>>> > sctx->chip_class = sscreen->info.chip_class;
>>> >
>>> > if (sscreen->info.has_gpu_reset_counter_query) {
>>> > sctx->gpu_reset_counter =
>>> > sctx->ws->query_value(sctx->ws,
>>> RADEON_GPU_RESET_COUNTER);
>>> > }
>>> >
>>> > - sctx->b.get_device_reset_status = si_get_reset_status;
>>> > - sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>>> > -
>>> > - si_init_context_texture_functions(sctx);
>>> > - si_init_query_functions(sctx);
>>> >
>>> > if (sctx->chip_class == CIK ||
>>> > sctx->chip_class == VI ||
>>> > sctx->chip_class == GFX9) {
>>> > sctx->eop_bug_scratch = si_resource(
>>> > pipe_buffer_create(&sscreen->b, 0,
>>> PIPE_USAGE_DEFAULT,
>>> > 16 *
>>> sscreen->info.num_render_backends));
>>> > if (!sctx->eop_bug_scratch)
>>> > goto fail;
>>> > }
>>> >
>>> > + /* Initialize context allocators. */
>>> > sctx->allocator_zeroed_memory =
>>> > u_suballocator_create(&sctx->b, 128 * 1024,
>>> > 0, PIPE_USAGE_DEFAULT,
>>> > SI_RESOURCE_FLAG_UNMAPPABLE |
>>> > SI_RESOURCE_FLAG_CLEAR, false);
>>> > if (!sctx->allocator_zeroed_memory)
>>> > goto fail;
>>> >
>>> > sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
>>> > 0, PIPE_USAGE_STREAM,
>>> > @@ -459,38 +454,22 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> > sctx->ctx = sctx->ws->ctx_create(sctx->ws);
>>> > if (!sctx->ctx)
>>> > goto fail;
>>> >
>>> > if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
>>> > DBG(NO_ASYNC_DMA))) {
>>> > sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
>>> >
>>> (void*)si_flush_dma_cs,
>>> > sctx,
>>> stop_exec_on_failure);
>>> > }
>>> >
>>> > - si_init_buffer_functions(sctx);
>>> > - si_init_clear_functions(sctx);
>>> > - si_init_blit_functions(sctx);
>>> > - si_init_compute_functions(sctx);
>>> > - si_init_compute_blit_functions(sctx);
>>> > - si_init_debug_functions(sctx);
>>> > - si_init_msaa_functions(sctx);
>>> > - si_init_streamout_functions(sctx);
>>> > -
>>> > - if (sscreen->info.has_hw_decode) {
>>> > - sctx->b.create_video_codec = si_uvd_create_decoder;
>>> > - sctx->b.create_video_buffer = si_video_buffer_create;
>>> > - } else {
>>> > - sctx->b.create_video_codec = vl_create_decoder;
>>> > - sctx->b.create_video_buffer = vl_video_buffer_create;
>>> > - }
>>> > -
>>> > - sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
>>> > + sctx->gfx_cs = ws->cs_create(sctx->ctx,
>>> > + sctx->has_graphics ? RING_GFX :
>>> RING_COMPUTE,
>>> > (void*)si_flush_gfx_cs, sctx,
>>> stop_exec_on_failure);
>>> >
>>> > /* Border colors. */
>>> > sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>>> >
>>> sizeof(*sctx->border_color_table));
>>> > if (!sctx->border_color_table)
>>> > goto fail;
>>> >
>>> > sctx->border_color_buffer = si_resource(
>>> > pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
>>> > @@ -498,43 +477,76 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> > sizeof(*sctx->border_color_table)));
>>> > if (!sctx->border_color_buffer)
>>> > goto fail;
>>> >
>>> > sctx->border_color_map =
>>> > ws->buffer_map(sctx->border_color_buffer->buf,
>>> > NULL, PIPE_TRANSFER_WRITE);
>>> > if (!sctx->border_color_map)
>>> > goto fail;
>>> >
>>> > + /* Initialize context functions used by graphics and compute. */
>>> > + sctx->b.emit_string_marker = si_emit_string_marker;
>>> > + sctx->b.set_debug_callback = si_set_debug_callback;
>>> > + sctx->b.set_log_context = si_set_log_context;
>>> > + sctx->b.set_context_param = si_set_context_param;
>>> > + sctx->b.get_device_reset_status = si_get_reset_status;
>>> > + sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>>> > + sctx->b.memory_barrier = si_memory_barrier;
>>> > +
>>> > si_init_all_descriptors(sctx);
>>> > + si_init_buffer_functions(sctx);
>>> > + si_init_clear_functions(sctx);
>>> > + si_init_blit_functions(sctx);
>>> > + si_init_compute_functions(sctx);
>>> > + si_init_compute_blit_functions(sctx);
>>> > + si_init_debug_functions(sctx);
>>> > si_init_fence_functions(sctx);
>>> > - si_init_state_functions(sctx);
>>> > - si_init_shader_functions(sctx);
>>> > - si_init_viewport_functions(sctx);
>>> > -
>>> > - if (sctx->chip_class >= CIK)
>>> > - cik_init_sdma_functions(sctx);
>>> > - else
>>> > - si_init_dma_functions(sctx);
>>> >
>>> > if (sscreen->debug_flags & DBG(FORCE_DMA))
>>> > sctx->b.resource_copy_region = sctx->dma_copy;
>>> >
>>> > - sctx->blitter = util_blitter_create(&sctx->b);
>>> > - if (sctx->blitter == NULL)
>>> > - goto fail;
>>> > - sctx->blitter->skip_viewport_restore = true;
>>> > + /* Initialize graphics-only context functions. */
>>> > + if (sctx->has_graphics) {
>>> > + si_init_context_texture_functions(sctx);
>>> > + si_init_query_functions(sctx);
>>> > + si_init_msaa_functions(sctx);
>>> > + si_init_shader_functions(sctx);
>>> > + si_init_state_functions(sctx);
>>> > + si_init_streamout_functions(sctx);
>>> > + si_init_viewport_functions(sctx);
>>> > +
>>> > + sctx->blitter = util_blitter_create(&sctx->b);
>>> > + if (sctx->blitter == NULL)
>>> > + goto fail;
>>> > + sctx->blitter->skip_viewport_restore = true;
>>> >
>>> > - si_init_draw_functions(sctx);
>>> > + si_init_draw_functions(sctx);
>>> > + }
>>> > +
>>> > + /* Initialize SDMA functions. */
>>> > + if (sctx->chip_class >= CIK)
>>> > + cik_init_sdma_functions(sctx);
>>> > + else
>>> > + si_init_dma_functions(sctx);
>>> >
>>> > sctx->sample_mask = 0xffff;
>>> >
>>> > + /* Initialize multimedia functions. */
>>> > + if (sscreen->info.has_hw_decode) {
>>> > + sctx->b.create_video_codec = si_uvd_create_decoder;
>>> > + sctx->b.create_video_buffer = si_video_buffer_create;
>>> > + } else {
>>> > + sctx->b.create_video_codec = vl_create_decoder;
>>> > + sctx->b.create_video_buffer = vl_video_buffer_create;
>>> > + }
>>> > +
>>> > if (sctx->chip_class >= GFX9) {
>>> > sctx->wait_mem_scratch = si_resource(
>>> > pipe_buffer_create(screen, 0,
>>> PIPE_USAGE_DEFAULT, 4));
>>> > if (!sctx->wait_mem_scratch)
>>> > goto fail;
>>> >
>>> > /* Initialize the memory. */
>>> > si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
>>> > V_370_MEM, V_370_ME,
>>> &sctx->wait_mem_number);
>>> > }
>>> > @@ -544,21 +556,22 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> > if (sctx->chip_class == CIK) {
>>> > sctx->null_const_buf.buffer =
>>> > pipe_aligned_buffer_create(screen,
>>> >
>>> SI_RESOURCE_FLAG_32BIT,
>>> > PIPE_USAGE_DEFAULT,
>>> 16,
>>> >
>>> sctx->screen->info.tcc_cache_line_size);
>>> > if (!sctx->null_const_buf.buffer)
>>> > goto fail;
>>> > sctx->null_const_buf.buffer_size =
>>> > sctx->null_const_buf.buffer->width0;
>>> >
>>> > - for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>>> > + unsigned start_shader = sctx->has_graphics ? 0 :
>>> > PIPE_SHADER_COMPUTE;
>>> > + for (shader = start_shader; shader < SI_NUM_SHADERS;
>>> shader++) {
>>> > for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>>> > sctx->b.set_constant_buffer(&sctx->b,
>>> shader, i,
>>> >
>>> &sctx->null_const_buf);
>>> > }
>>> > }
>>> >
>>> > si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
>>> > &sctx->null_const_buf);
>>> > si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
>>> > &sctx->null_const_buf);
>>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>>> > b/src/gallium/drivers/radeonsi/si_pipe.h
>>> > index b01d5744752..348e8e5bd26 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_pipe.h
>>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>>> > @@ -777,21 +777,21 @@ struct si_saved_cs {
>>> > };
>>> >
>>> > struct si_context {
>>> > struct pipe_context b; /* base class */
>>> >
>>> > enum radeon_family family;
>>> > enum chip_class chip_class;
>>> >
>>> > struct radeon_winsys *ws;
>>> > struct radeon_winsys_ctx *ctx;
>>> > - struct radeon_cmdbuf *gfx_cs;
>>> > + struct radeon_cmdbuf *gfx_cs; /* compute IB if
>>> graphics is disabled
>>> > */
>>> > struct radeon_cmdbuf *dma_cs;
>>> > struct pipe_fence_handle *last_gfx_fence;
>>> > struct pipe_fence_handle *last_sdma_fence;
>>> > struct si_resource *eop_bug_scratch;
>>> > struct u_upload_mgr *cached_gtt_allocator;
>>> > struct threaded_context *tc;
>>> > struct u_suballocator *allocator_zeroed_memory;
>>> > struct slab_child_pool pool_transfers;
>>> > struct slab_child_pool pool_transfers_unsync; /* for
>>> > threaded_context */
>>> > struct pipe_device_reset_callback device_reset_callback;
>>> > @@ -815,20 +815,21 @@ struct si_context {
>>> > void *cs_clear_render_target;
>>> > void *cs_clear_render_target_1d_array;
>>> > struct si_screen *screen;
>>> > struct pipe_debug_callback debug;
>>> > struct ac_llvm_compiler compiler; /* only non-threaded
>>> compilation
>>> > */
>>> > struct si_shader_ctx_state fixed_func_tcs_shader;
>>> > struct si_resource *wait_mem_scratch;
>>> > unsigned wait_mem_number;
>>> > uint16_t prefetch_L2_mask;
>>> >
>>> > + bool has_graphics;
>>> > bool gfx_flush_in_progress:1;
>>> > bool gfx_last_ib_is_busy:1;
>>> > bool compute_is_busy:1;
>>> >
>>> > unsigned num_gfx_cs_flushes;
>>> > unsigned initial_gfx_cs_size;
>>> > unsigned gpu_reset_counter;
>>> > unsigned last_dirty_tex_counter;
>>> > unsigned last_compressed_colortex_counter;
>>> > unsigned last_num_draw_calls;
>>> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
>>> > b/src/gallium/drivers/radeonsi/si_state.c
>>> > index b49a1b3695e..458b108a7e3 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_state.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_state.c
>>> > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
>>> > pipe_context *ctx, unsigned flags)
>>> >
>>> > si_update_fb_dirtiness_after_rendering(sctx);
>>> >
>>> > /* Multisample surfaces are flushed in si_decompress_textures. */
>>> > if (sctx->framebuffer.uncompressed_cb_mask)
>>> > si_make_CB_shader_coherent(sctx,
>>> sctx->framebuffer.nr_samples,
>>> >
>>> sctx->framebuffer.CB_has_shader_readable_metadata);
>>> > }
>>> >
>>> > /* This only ensures coherency for shader image/buffer stores. */
>>> > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
>>> > flags)
>>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>>> > {
>>> > struct si_context *sctx = (struct si_context *)ctx;
>>> >
>>> > /* Subsequent commands must wait for all shader invocations to
>>> > * complete. */
>>> > sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>>> > SI_CONTEXT_CS_PARTIAL_FLUSH;
>>> >
>>> > if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>>> > sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
>>> > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context
>>> > *sctx)
>>> > sctx->b.sampler_view_destroy = si_sampler_view_destroy;
>>> >
>>> > sctx->b.set_sample_mask = si_set_sample_mask;
>>> >
>>> > sctx->b.create_vertex_elements_state = si_create_vertex_elements;
>>> > sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
>>> > sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
>>> > sctx->b.set_vertex_buffers = si_set_vertex_buffers;
>>> >
>>> > sctx->b.texture_barrier = si_texture_barrier;
>>> > - sctx->b.memory_barrier = si_memory_barrier;
>>> > sctx->b.set_min_samples = si_set_min_samples;
>>> > sctx->b.set_tess_state = si_set_tess_state;
>>> >
>>> > sctx->b.set_active_query_state = si_set_active_query_state;
>>> >
>>> > si_init_config(sctx);
>>> > }
>>> >
>>> > void si_init_screen_state_functions(struct si_screen *sscreen)
>>> > {
>>> > diff --git a/src/gallium/drivers/radeonsi/si_state.h
>>> > b/src/gallium/drivers/radeonsi/si_state.h
>>> > index 767e789276a..6faa4c511b1 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_state.h
>>> > +++ b/src/gallium/drivers/radeonsi/si_state.h
>>> > @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
>>> > si_context *sctx,
>>> > struct si_shader_selector
>>> *sel);
>>> > bool si_bindless_descriptor_can_reclaim_slab(void *priv,
>>> > struct pb_slab_entry
>>> *entry);
>>> > struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv,
>>> unsigned
>>> > heap,
>>> > unsigned entry_size,
>>> > unsigned group_index);
>>> > void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
>>> > *pslab);
>>> > void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
>>> > *buf,
>>> > uint64_t old_va);
>>> > /* si_state.c */
>>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
>>> > void si_init_state_functions(struct si_context *sctx);
>>> > void si_init_screen_state_functions(struct si_screen *sscreen);
>>> > void
>>> > si_make_buffer_descriptor(struct si_screen *screen, struct
>>> si_resource
>>> > *buf,
>>> > enum pipe_format format,
>>> > unsigned offset, unsigned size,
>>> > uint32_t *state);
>>> > void
>>> > si_make_texture_descriptor(struct si_screen *screen,
>>> > struct si_texture *tex,
>>> > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > b/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > index 9c968e39c2c..2a514f144b9 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
>>> > si_context *sctx,
>>> >
>>> S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
>>> > }
>>> > }
>>> > }
>>> >
>>> > static void si_emit_surface_sync(struct si_context *sctx,
>>> > unsigned cp_coher_cntl)
>>> > {
>>> > struct radeon_cmdbuf *cs = sctx->gfx_cs;
>>> >
>>> > - if (sctx->chip_class >= GFX9) {
>>> > + if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
>>> > /* Flush caches and wait for the caches to assert idle.
>>> */
>>> > radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
>>> > radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
>>> > radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
>>> > radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */
>>> > radeon_emit(cs, 0); /* CP_COHER_BASE */
>>> > radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
>>> > radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
>>> > } else {
>>> > /* ACQUIRE_MEM is only required on a compute ring. */
>>> > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
>>> > si_context *sctx,
>>> > radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
>>> > radeon_emit(cs, 0); /* CP_COHER_BASE */
>>> > radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
>>> > }
>>> > }
>>> >
>>> > void si_emit_cache_flush(struct si_context *sctx)
>>> > {
>>> > struct radeon_cmdbuf *cs = sctx->gfx_cs;
>>> > uint32_t flags = sctx->flags;
>>> > +
>>> > + if (!sctx->has_graphics) {
>>> > + /* Only process compute flags. */
>>> > + flags &= SI_CONTEXT_INV_ICACHE |
>>> > + SI_CONTEXT_INV_SMEM_L1 |
>>> > + SI_CONTEXT_INV_VMEM_L1 |
>>> > + SI_CONTEXT_INV_GLOBAL_L2 |
>>> > + SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
>>> > + SI_CONTEXT_INV_L2_METADATA |
>>> > + SI_CONTEXT_CS_PARTIAL_FLUSH;
>>> > + }
>>> > +
>>> > uint32_t cp_coher_cntl = 0;
>>> > uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
>>> > SI_CONTEXT_FLUSH_AND_INV_DB);
>>> >
>>> > if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
>>> > sctx->num_cb_cache_flushes++;
>>> > if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>>> > sctx->num_db_cache_flushes++;
>>> >
>>> > /* SI has a bug that it always flushes ICACHE and KCACHE if
>>> either
>>> > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context
>>> > *sctx)
>>> > EOP_DATA_SEL_VALUE_32BIT,
>>> > sctx->wait_mem_scratch, va,
>>> > sctx->wait_mem_number, SI_NOT_QUERY);
>>> > si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
>>> 0xffffffff,
>>> > WAIT_REG_MEM_EQUAL);
>>> > }
>>> >
>>> > /* Make sure ME is idle (it executes most packets) before
>>> continuing.
>>> > * This prevents read-after-write hazards between PFP and ME.
>>> > */
>>> > - if (cp_coher_cntl ||
>>> > - (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>>> > - SI_CONTEXT_INV_VMEM_L1 |
>>> > - SI_CONTEXT_INV_GLOBAL_L2 |
>>> > - SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
>>> > + if (sctx->has_graphics &&
>>> > + (cp_coher_cntl ||
>>> > + (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>>> > + SI_CONTEXT_INV_VMEM_L1 |
>>> > + SI_CONTEXT_INV_GLOBAL_L2 |
>>> > + SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
>>> > radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>>> > radeon_emit(cs, 0);
>>> > }
>>> >
>>> > /* SI-CI-VI only:
>>> > * When one of the CP_COHER_CNTL.DEST_BASE flags is set,
>>> > SURFACE_SYNC
>>> > * waits for idle, so it should be last. SURFACE_SYNC is done
>>> in
>>> > PFP.
>>> > *
>>> > * cp_coher_cntl should contain all necessary flags except TC
>>> flags
>>> > * at this point.
>>> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
>>> > b/src/gallium/drivers/radeonsi/si_texture.c
>>> > index a50088d2d8f..581f90a7b2f 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_texture.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_texture.c
>>> > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
>>> > si_screen *sscreen,
>>> > * compressed tiled
>>> > *
>>> > * \param sctx the current context if you have one, or
>>> > sscreen->aux_context
>>> > * if you don't.
>>> > */
>>> > bool si_texture_disable_dcc(struct si_context *sctx,
>>> > struct si_texture *tex)
>>> > {
>>> > struct si_screen *sscreen = sctx->screen;
>>> >
>>> > + if (!sctx->has_graphics)
>>> > + return si_texture_discard_dcc(sscreen, tex);
>>> > +
>>> > if (!si_can_disable_dcc(tex))
>>> > return false;
>>> >
>>> > if (&sctx->b == sscreen->aux_context)
>>> > mtx_lock(&sscreen->aux_context_lock);
>>> >
>>> > /* Decompress DCC. */
>>> > si_decompress_dcc(sctx, tex);
>>> > sctx->b.flush(&sctx->b, NULL, 0);
>>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190226/3539e06c/attachment-0001.html>
More information about the mesa-dev
mailing list