<div dir="ltr"><div dir="ltr"><div dir="ltr">Can you add a bit of background why clover should/should not use other rings?<div><br></div><div>I planned to test this, but my raven system can't run clover since kernel 4.20 release (BZ 109649), so I need to bisect that first.</div><div>Can this patch help address the soft lockup issue on CIK (BZ 108879)? presumably, it was tested using clover on CIK, right?</div><div><br></div><div>Jan</div></div></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <<a href="mailto:maraeo@gmail.com">maraeo@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div>I'll just push it.</div><div><br></div><div>Marek<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <<a href="mailto:Dieter@nuetzel-hh.de" target="_blank">Dieter@nuetzel-hh.de</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Hello Marek,<br>
<br>
this series need a rebase (if you have some time).<br>
<br>
Dieter<br>
<br>
Am 12.02.2019 19:12, schrieb Marek Olšák:<br>
> From: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank">marek.olsak@amd.com</a>><br>
> <br>
> initialize all non-compute context functions to NULL.<br>
> <br>
> v2: fix SI<br>
> ---<br>
>  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-<br>
>  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-<br>
>  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--<br>
>  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-<br>
>  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---<br>
>  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------<br>
>  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-<br>
>  src/gallium/drivers/radeonsi/si_state.c       |  3 +-<br>
>  src/gallium/drivers/radeonsi/si_state.h       |  1 +<br>
>  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--<br>
>  src/gallium/drivers/radeonsi/si_texture.c     |  3 +<br>
>  11 files changed, 130 insertions(+), 75 deletions(-)<br>
> <br>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c<br>
> b/src/gallium/drivers/radeonsi/si_blit.c<br>
> index bb8d1cbd12d..f39cb5d143f 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_blit.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_blit.c<br>
> @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct <br>
> pipe_context *ctx,<br>
> <br>
>               if (separate_dcc_dirty) {<br>
>                       tex->separate_dcc_dirty = false;<br>
>                       vi_separate_dcc_process_and_reset_stats(ctx, tex);<br>
>               }<br>
>       }<br>
>  }<br>
> <br>
>  void si_decompress_dcc(struct si_context *sctx, struct si_texture <br>
> *tex)<br>
>  {<br>
> -     if (!tex->dcc_offset)<br>
> +     /* If graphics is disabled, we can't decompress DCC, but it shouldn't<br>
> +      * be compressed either. The caller should simply discard it.<br>
> +      */<br>
> +     if (!tex->dcc_offset || !sctx->has_graphics)<br>
>               return;<br>
> <br>
>       si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,<br>
>                                0, util_max_layer(&tex->buffer.b.b, 0),<br>
>                                true);<br>
>  }<br>
> <br>
>  void si_init_blit_functions(struct si_context *sctx)<br>
>  {<br>
>       sctx->b.resource_copy_region = si_resource_copy_region;<br>
> -     sctx->b.blit = si_blit;<br>
> -     sctx->b.flush_resource = si_flush_resource;<br>
> -     sctx->b.generate_mipmap = si_generate_mipmap;<br>
> +<br>
> +     if (sctx->has_graphics) {<br>
> +             sctx->b.blit = si_blit;<br>
> +             sctx->b.flush_resource = si_flush_resource;<br>
> +             sctx->b.generate_mipmap = si_generate_mipmap;<br>
> +     }<br>
>  }<br>
> diff --git a/src/gallium/drivers/radeonsi/si_clear.c<br>
> b/src/gallium/drivers/radeonsi/si_clear.c<br>
> index 9a00bb73b94..e1805f2a1c9 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_clear.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_clear.c<br>
> @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context <br>
> *pipe,<br>
>                       util_clear_render_target(pipe, sf, &color,<br>
>                                                box->x, box->y,<br>
>                                                box->width, box->height);<br>
>               }<br>
>       }<br>
>       pipe_surface_reference(&sf, NULL);<br>
>  }<br>
> <br>
>  void si_init_clear_functions(struct si_context *sctx)<br>
>  {<br>
> -     sctx->b.clear = si_clear;<br>
>       sctx->b.clear_render_target = si_clear_render_target;<br>
> -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;<br>
>       sctx->b.clear_texture = si_clear_texture;<br>
> +<br>
> +     if (sctx->has_graphics) {<br>
> +             sctx->b.clear = si_clear;<br>
> +             sctx->b.clear_depth_stencil = si_clear_depth_stencil;<br>
> +     }<br>
>  }<br>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c<br>
> b/src/gallium/drivers/radeonsi/si_compute.c<br>
> index 1a62b3e0844..87addd53976 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_compute.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_compute.c<br>
> @@ -880,26 +880,28 @@ static void si_launch_grid(<br>
>               info->block[0] * info->block[1] * info->block[2] > 256;<br>
> <br>
>       if (cs_regalloc_hang)<br>
>               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |<br>
>                                SI_CONTEXT_CS_PARTIAL_FLUSH;<br>
> <br>
>       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&<br>
>           program->shader.compilation_failed)<br>
>               return;<br>
> <br>
> -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {<br>
> -             si_update_fb_dirtiness_after_rendering(sctx);<br>
> -             sctx->last_num_draw_calls = sctx->num_draw_calls;<br>
> -     }<br>
> +     if (sctx->has_graphics) {<br>
> +             if (sctx->last_num_draw_calls != sctx->num_draw_calls) {<br>
> +                     si_update_fb_dirtiness_after_rendering(sctx);<br>
> +                     sctx->last_num_draw_calls = sctx->num_draw_calls;<br>
> +             }<br>
> <br>
> -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);<br>
> +             si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);<br>
> +     }<br>
> <br>
>       /* Add buffer sizes for memory checking in need_cs_space. */<br>
>       si_context_add_resource_size(sctx, &program->shader.bo->b.b);<br>
>       /* TODO: add the scratch buffer */<br>
> <br>
>       if (info->indirect) {<br>
>               si_context_add_resource_size(sctx, info->indirect);<br>
> <br>
>               /* Indirect buffers use TC L2 on GFX9, but not older hw. */<br>
>               if (sctx->chip_class <= VI &&<br>
> @@ -917,21 +919,22 @@ static void si_launch_grid(<br>
>       if (sctx->flags)<br>
>               si_emit_cache_flush(sctx);<br>
> <br>
>       if (!si_switch_compute_shader(sctx, program, &program->shader,<br>
>                                       code_object, info->pc))<br>
>               return;<br>
> <br>
>       si_upload_compute_shader_descriptors(sctx);<br>
>       si_emit_compute_shader_pointers(sctx);<br>
> <br>
> -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {<br>
> +     if (sctx->has_graphics &&<br>
> +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {<br>
>               sctx->atoms.s.render_cond.emit(sctx);<br>
>               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);<br>
>       }<br>
> <br>
>       if ((program->input_size ||<br>
>              program->ir_type == PIPE_SHADER_IR_NATIVE) &&<br>
>             unlikely(!si_upload_compute_input(sctx, code_object, <br>
> info))) {<br>
>               return;<br>
>       }<br>
> <br>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c<br>
> b/src/gallium/drivers/radeonsi/si_descriptors.c<br>
> index 21d4ca946d3..0f22c55723c 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c<br>
> @@ -2640,22 +2640,24 @@ void<br>
> si_all_resident_buffers_begin_new_cs(struct si_context *sctx)<br>
> <br>
>       sctx->num_resident_handles += num_resident_tex_handles +<br>
>                                       num_resident_img_handles;<br>
>  }<br>
> <br>
>  /* INIT/DEINIT/UPLOAD */<br>
> <br>
>  void si_init_all_descriptors(struct si_context *sctx)<br>
>  {<br>
>       int i;<br>
> +     unsigned first_shader =<br>
> +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;<br>
> <br>
> -     for (i = 0; i < SI_NUM_SHADERS; i++) {<br>
> +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {<br>
>               bool is_2nd = sctx->chip_class >= GFX9 &&<br>
>                                    (i == PIPE_SHADER_TESS_CTRL ||<br>
>                                     i == PIPE_SHADER_GEOMETRY);<br>
>               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;<br>
>               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + <br>
> SI_NUM_CONST_BUFFERS;<br>
>               int rel_dw_offset;<br>
>               struct si_descriptors *desc;<br>
> <br>
>               if (is_2nd) {<br>
>                       if (i == PIPE_SHADER_TESS_CTRL) {<br>
> @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context <br>
> *sctx)<br>
>       si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,<br>
>                                    SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,<br>
>                                    1024);<br>
> <br>
>       sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);<br>
> <br>
>       /* Set pipe_context functions. */<br>
>       sctx->b.bind_sampler_states = si_bind_sampler_states;<br>
>       sctx->b.set_shader_images = si_set_shader_images;<br>
>       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;<br>
> -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;<br>
>       sctx->b.set_shader_buffers = si_set_shader_buffers;<br>
>       sctx->b.set_sampler_views = si_set_sampler_views;<br>
>       sctx->b.create_texture_handle = si_create_texture_handle;<br>
>       sctx->b.delete_texture_handle = si_delete_texture_handle;<br>
>       sctx->b.make_texture_handle_resident = <br>
> si_make_texture_handle_resident;<br>
>       sctx->b.create_image_handle = si_create_image_handle;<br>
>       sctx->b.delete_image_handle = si_delete_image_handle;<br>
>       sctx->b.make_image_handle_resident = si_make_image_handle_resident;<br>
> <br>
> +     if (!sctx->has_graphics)<br>
> +             return;<br>
> +<br>
> +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;<br>
> +<br>
>       /* Shader user data. */<br>
>       sctx->atoms.s.shader_pointers.emit = <br>
> si_emit_graphics_shader_pointers;<br>
> <br>
>       /* Set default and immutable mappings. */<br>
>       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,<br>
> R_00B130_SPI_SHADER_USER_DATA_VS_0);<br>
> <br>
>       if (sctx->chip_class >= GFX9) {<br>
>               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,<br>
>                                     R_00B430_SPI_SHADER_USER_DATA_LS_0);<br>
>               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,<br>
> diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c<br>
> b/src/gallium/drivers/radeonsi/si_gfx_cs.c<br>
> index 3d64587fa2b..d0e7cf20b4c 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c<br>
> @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,<br>
> unsigned flags,<br>
>        * This code is only needed when the driver flushes the GFX IB<br>
>        * internally, and it never asks for a fence handle.<br>
>        */<br>
>       if (radeon_emitted(ctx->dma_cs, 0)) {<br>
>               assert(fence == NULL); /* internal flushes only */<br>
>               si_flush_dma_cs(ctx, flags, NULL);<br>
>       }<br>
> <br>
>       ctx->gfx_flush_in_progress = true;<br>
> <br>
> -     if (!LIST_IS_EMPTY(&ctx->active_queries))<br>
> -             si_suspend_queries(ctx);<br>
> -<br>
> -     ctx->streamout.suspended = false;<br>
> -     if (ctx->streamout.begin_emitted) {<br>
> -             si_emit_streamout_end(ctx);<br>
> -             ctx->streamout.suspended = true;<br>
> +     if (ctx->has_graphics) {<br>
> +             if (!LIST_IS_EMPTY(&ctx->active_queries))<br>
> +                     si_suspend_queries(ctx);<br>
> +<br>
> +             ctx->streamout.suspended = false;<br>
> +             if (ctx->streamout.begin_emitted) {<br>
> +                     si_emit_streamout_end(ctx);<br>
> +                     ctx->streamout.suspended = true;<br>
> +             }<br>
>       }<br>
> <br>
>       /* Make sure CP DMA is idle at the end of IBs after L2 prefetches<br>
>        * because the kernel doesn't wait for it. */<br>
>       if (ctx->chip_class >= CIK)<br>
>               si_cp_dma_wait_for_idle(ctx);<br>
> <br>
>       /* Wait for draw calls to finish if needed. */<br>
>       if (wait_flags) {<br>
>               ctx->flags |= wait_flags;<br>
> @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)<br>
>        * IB starts drawing.<br>
>        *<br>
>        * TODO: Do we also need to invalidate CB & DB caches?<br>
>        */<br>
>       ctx->flags |= SI_CONTEXT_INV_ICACHE |<br>
>                     SI_CONTEXT_INV_SMEM_L1 |<br>
>                     SI_CONTEXT_INV_VMEM_L1 |<br>
>                     SI_CONTEXT_INV_GLOBAL_L2 |<br>
>                     SI_CONTEXT_START_PIPELINE_STATS;<br>
> <br>
> +     ctx->cs_shader_state.initialized = false;<br>
> +     si_all_descriptors_begin_new_cs(ctx);<br>
> +     si_all_resident_buffers_begin_new_cs(ctx);<br>
> +<br>
> +     if (!ctx->has_graphics) {<br>
> +             ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;<br>
> +             return;<br>
> +     }<br>
> +<br>
>       /* set all valid group as dirty so they get reemited on<br>
>        * next draw command<br>
>        */<br>
>       si_pm4_reset_emitted(ctx);<br>
> <br>
>       /* The CS initialization should be emitted before everything else. */<br>
>       si_pm4_emit(ctx, ctx->init_config);<br>
>       if (ctx->init_config_gs_rings)<br>
>               si_pm4_emit(ctx, ctx->init_config_gs_rings);<br>
> <br>
> @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);<br>
>       if (ctx->chip_class >= GFX9)<br>
>               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);<br>
>       /* CLEAR_STATE disables all window rectangles. */<br>
>       if (!has_clear_state || ctx->num_window_rectangles > 0)<br>
>               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);<br>
> -     si_all_descriptors_begin_new_cs(ctx);<br>
> -     si_all_resident_buffers_begin_new_cs(ctx);<br>
> <br>
>       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;<br>
>       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;<br>
>       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);<br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);<br>
> <br>
>       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);<br>
>       if (ctx->scratch_buffer) {<br>
> @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)<br>
>       ctx->last_multi_vgt_param = -1;<br>
>       ctx->last_rast_prim = -1;<br>
>       ctx->last_sc_line_stipple = ~0;<br>
>       ctx->last_vs_state = ~0;<br>
>       ctx->last_ls = NULL;<br>
>       ctx->last_tcs = NULL;<br>
>       ctx->last_tes_sh_base = -1;<br>
>       ctx->last_num_tcs_input_cp = -1;<br>
>       ctx->last_ls_hs_config = -1; /* impossible value */<br>
> <br>
> -     ctx->cs_shader_state.initialized = false;<br>
> -<br>
>       if (has_clear_state) {<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = <br>
> 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = <br>
> 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = <br>
> 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = <br>
> 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = <br>
> 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = <br>
> 0x00000000;<br>
>               ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = <br>
> 0x00000000;<br>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c<br>
> b/src/gallium/drivers/radeonsi/si_pipe.c<br>
> index 20767c806d2..c2ec664d5a4 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_pipe.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c<br>
> @@ -381,61 +381,56 @@ static struct pipe_context<br>
> *si_create_context(struct pipe_screen *screen,<br>
>  {<br>
>       struct si_context *sctx = CALLOC_STRUCT(si_context);<br>
>       struct si_screen* sscreen = (struct si_screen *)screen;<br>
>       struct radeon_winsys *ws = sscreen->ws;<br>
>       int shader, i;<br>
>       bool stop_exec_on_failure = (flags & <br>
> PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;<br>
> <br>
>       if (!sctx)<br>
>               return NULL;<br>
> <br>
> +     sctx->has_graphics = sscreen->info.chip_class == SI ||<br>
> +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);<br>
> +<br>
>       if (flags & PIPE_CONTEXT_DEBUG)<br>
>               sscreen->record_llvm_ir = true; /* racy but not critical */<br>
> <br>
>       sctx->b.screen = screen; /* this must be set first */<br>
>       sctx->b.priv = NULL;<br>
>       sctx->b.destroy = si_destroy_context;<br>
> -     sctx->b.emit_string_marker = si_emit_string_marker;<br>
> -     sctx->b.set_debug_callback = si_set_debug_callback;<br>
> -     sctx->b.set_log_context = si_set_log_context;<br>
> -     sctx->b.set_context_param = si_set_context_param;<br>
>       sctx->screen = sscreen; /* Easy accessing of screen/winsys. */<br>
>       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;<br>
> <br>
>       slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);<br>
>       slab_create_child(&sctx->pool_transfers_unsync, <br>
> &sscreen->pool_transfers);<br>
> <br>
>       sctx->ws = sscreen->ws;<br>
>       sctx->family = sscreen->info.family;<br>
>       sctx->chip_class = sscreen->info.chip_class;<br>
> <br>
>       if (sscreen->info.has_gpu_reset_counter_query) {<br>
>               sctx->gpu_reset_counter =<br>
>                       sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER);<br>
>       }<br>
> <br>
> -     sctx->b.get_device_reset_status = si_get_reset_status;<br>
> -     sctx->b.set_device_reset_callback = si_set_device_reset_callback;<br>
> -<br>
> -     si_init_context_texture_functions(sctx);<br>
> -     si_init_query_functions(sctx);<br>
> <br>
>       if (sctx->chip_class == CIK ||<br>
>           sctx->chip_class == VI ||<br>
>           sctx->chip_class == GFX9) {<br>
>               sctx->eop_bug_scratch = si_resource(<br>
>                       pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,<br>
>                                          16 * sscreen->info.num_render_backends));<br>
>               if (!sctx->eop_bug_scratch)<br>
>                       goto fail;<br>
>       }<br>
> <br>
> +     /* Initialize context allocators. */<br>
>       sctx->allocator_zeroed_memory =<br>
>               u_suballocator_create(&sctx->b, 128 * 1024,<br>
>                                     0, PIPE_USAGE_DEFAULT,<br>
>                                     SI_RESOURCE_FLAG_UNMAPPABLE |<br>
>                                     SI_RESOURCE_FLAG_CLEAR, false);<br>
>       if (!sctx->allocator_zeroed_memory)<br>
>               goto fail;<br>
> <br>
>       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,<br>
>                                                   0, PIPE_USAGE_STREAM,<br>
> @@ -459,38 +454,22 @@ static struct pipe_context<br>
> *si_create_context(struct pipe_screen *screen,<br>
>       sctx->ctx = sctx->ws->ctx_create(sctx->ws);<br>
>       if (!sctx->ctx)<br>
>               goto fail;<br>
> <br>
>       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &<br>
> DBG(NO_ASYNC_DMA))) {<br>
>               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,<br>
>                                                  (void*)si_flush_dma_cs,<br>
>                                                  sctx, stop_exec_on_failure);<br>
>       }<br>
> <br>
> -     si_init_buffer_functions(sctx);<br>
> -     si_init_clear_functions(sctx);<br>
> -     si_init_blit_functions(sctx);<br>
> -     si_init_compute_functions(sctx);<br>
> -     si_init_compute_blit_functions(sctx);<br>
> -     si_init_debug_functions(sctx);<br>
> -     si_init_msaa_functions(sctx);<br>
> -     si_init_streamout_functions(sctx);<br>
> -<br>
> -     if (sscreen->info.has_hw_decode) {<br>
> -             sctx->b.create_video_codec = si_uvd_create_decoder;<br>
> -             sctx->b.create_video_buffer = si_video_buffer_create;<br>
> -     } else {<br>
> -             sctx->b.create_video_codec = vl_create_decoder;<br>
> -             sctx->b.create_video_buffer = vl_video_buffer_create;<br>
> -     }<br>
> -<br>
> -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,<br>
> +     sctx->gfx_cs = ws->cs_create(sctx->ctx,<br>
> +                                  sctx->has_graphics ? RING_GFX : RING_COMPUTE,<br>
>                                    (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);<br>
> <br>
>       /* Border colors. */<br>
>       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *<br>
>                                         sizeof(*sctx->border_color_table));<br>
>       if (!sctx->border_color_table)<br>
>               goto fail;<br>
> <br>
>       sctx->border_color_buffer = si_resource(<br>
>               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,<br>
> @@ -498,43 +477,76 @@ static struct pipe_context<br>
> *si_create_context(struct pipe_screen *screen,<br>
>                                  sizeof(*sctx->border_color_table)));<br>
>       if (!sctx->border_color_buffer)<br>
>               goto fail;<br>
> <br>
>       sctx->border_color_map =<br>
>               ws->buffer_map(sctx->border_color_buffer->buf,<br>
>                              NULL, PIPE_TRANSFER_WRITE);<br>
>       if (!sctx->border_color_map)<br>
>               goto fail;<br>
> <br>
> +     /* Initialize context functions used by graphics and compute. */<br>
> +     sctx->b.emit_string_marker = si_emit_string_marker;<br>
> +     sctx->b.set_debug_callback = si_set_debug_callback;<br>
> +     sctx->b.set_log_context = si_set_log_context;<br>
> +     sctx->b.set_context_param = si_set_context_param;<br>
> +     sctx->b.get_device_reset_status = si_get_reset_status;<br>
> +     sctx->b.set_device_reset_callback = si_set_device_reset_callback;<br>
> +     sctx->b.memory_barrier = si_memory_barrier;<br>
> +<br>
>       si_init_all_descriptors(sctx);<br>
> +     si_init_buffer_functions(sctx);<br>
> +     si_init_clear_functions(sctx);<br>
> +     si_init_blit_functions(sctx);<br>
> +     si_init_compute_functions(sctx);<br>
> +     si_init_compute_blit_functions(sctx);<br>
> +     si_init_debug_functions(sctx);<br>
>       si_init_fence_functions(sctx);<br>
> -     si_init_state_functions(sctx);<br>
> -     si_init_shader_functions(sctx);<br>
> -     si_init_viewport_functions(sctx);<br>
> -<br>
> -     if (sctx->chip_class >= CIK)<br>
> -             cik_init_sdma_functions(sctx);<br>
> -     else<br>
> -             si_init_dma_functions(sctx);<br>
> <br>
>       if (sscreen->debug_flags & DBG(FORCE_DMA))<br>
>               sctx->b.resource_copy_region = sctx->dma_copy;<br>
> <br>
> -     sctx->blitter = util_blitter_create(&sctx->b);<br>
> -     if (sctx->blitter == NULL)<br>
> -             goto fail;<br>
> -     sctx->blitter->skip_viewport_restore = true;<br>
> +     /* Initialize graphics-only context functions. */<br>
> +     if (sctx->has_graphics) {<br>
> +             si_init_context_texture_functions(sctx);<br>
> +             si_init_query_functions(sctx);<br>
> +             si_init_msaa_functions(sctx);<br>
> +             si_init_shader_functions(sctx);<br>
> +             si_init_state_functions(sctx);<br>
> +             si_init_streamout_functions(sctx);<br>
> +             si_init_viewport_functions(sctx);<br>
> +<br>
> +             sctx->blitter = util_blitter_create(&sctx->b);<br>
> +             if (sctx->blitter == NULL)<br>
> +                     goto fail;<br>
> +             sctx->blitter->skip_viewport_restore = true;<br>
> <br>
> -     si_init_draw_functions(sctx);<br>
> +             si_init_draw_functions(sctx);<br>
> +     }<br>
> +<br>
> +     /* Initialize SDMA functions. */<br>
> +     if (sctx->chip_class >= CIK)<br>
> +             cik_init_sdma_functions(sctx);<br>
> +     else<br>
> +             si_init_dma_functions(sctx);<br>
> <br>
>       sctx->sample_mask = 0xffff;<br>
> <br>
> +     /* Initialize multimedia functions. */<br>
> +     if (sscreen->info.has_hw_decode) {<br>
> +             sctx->b.create_video_codec = si_uvd_create_decoder;<br>
> +             sctx->b.create_video_buffer = si_video_buffer_create;<br>
> +     } else {<br>
> +             sctx->b.create_video_codec = vl_create_decoder;<br>
> +             sctx->b.create_video_buffer = vl_video_buffer_create;<br>
> +     }<br>
> +<br>
>       if (sctx->chip_class >= GFX9) {<br>
>               sctx->wait_mem_scratch = si_resource(<br>
>                       pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));<br>
>               if (!sctx->wait_mem_scratch)<br>
>                       goto fail;<br>
> <br>
>               /* Initialize the memory. */<br>
>               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,<br>
>                                V_370_MEM, V_370_ME, &sctx->wait_mem_number);<br>
>       }<br>
> @@ -544,21 +556,22 @@ static struct pipe_context<br>
> *si_create_context(struct pipe_screen *screen,<br>
>       if (sctx->chip_class == CIK) {<br>
>               sctx->null_const_buf.buffer =<br>
>                       pipe_aligned_buffer_create(screen,<br>
>                                                  SI_RESOURCE_FLAG_32BIT,<br>
>                                                  PIPE_USAGE_DEFAULT, 16,<br>
>                                                  sctx->screen->info.tcc_cache_line_size);<br>
>               if (!sctx->null_const_buf.buffer)<br>
>                       goto fail;<br>
>               sctx->null_const_buf.buffer_size = <br>
> sctx->null_const_buf.buffer->width0;<br>
> <br>
> -             for (shader = 0; shader < SI_NUM_SHADERS; shader++) {<br>
> +             unsigned start_shader = sctx->has_graphics ? 0 :  <br>
> PIPE_SHADER_COMPUTE;<br>
> +             for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {<br>
>                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {<br>
>                               sctx->b.set_constant_buffer(&sctx->b, shader, i,<br>
>                                                             &sctx->null_const_buf);<br>
>                       }<br>
>               }<br>
> <br>
>               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,<br>
>                                &sctx->null_const_buf);<br>
>               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,<br>
>                                &sctx->null_const_buf);<br>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h<br>
> b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> index b01d5744752..348e8e5bd26 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_pipe.h<br>
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h<br>
> @@ -777,21 +777,21 @@ struct si_saved_cs {<br>
>  };<br>
> <br>
>  struct si_context {<br>
>       struct pipe_context             b; /* base class */<br>
> <br>
>       enum radeon_family              family;<br>
>       enum chip_class                 chip_class;<br>
> <br>
>       struct radeon_winsys            *ws;<br>
>       struct radeon_winsys_ctx        *ctx;<br>
> -     struct radeon_cmdbuf            *gfx_cs;<br>
> +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if graphics is disabled <br>
> */<br>
>       struct radeon_cmdbuf            *dma_cs;<br>
>       struct pipe_fence_handle        *last_gfx_fence;<br>
>       struct pipe_fence_handle        *last_sdma_fence;<br>
>       struct si_resource              *eop_bug_scratch;<br>
>       struct u_upload_mgr             *cached_gtt_allocator;<br>
>       struct threaded_context         *tc;<br>
>       struct u_suballocator           *allocator_zeroed_memory;<br>
>       struct slab_child_pool          pool_transfers;<br>
>       struct slab_child_pool          pool_transfers_unsync; /* for <br>
> threaded_context */<br>
>       struct pipe_device_reset_callback device_reset_callback;<br>
> @@ -815,20 +815,21 @@ struct si_context {<br>
>       void                            *cs_clear_render_target;<br>
>       void                            *cs_clear_render_target_1d_array;<br>
>       struct si_screen                *screen;<br>
>       struct pipe_debug_callback      debug;<br>
>       struct ac_llvm_compiler         compiler; /* only non-threaded compilation <br>
> */<br>
>       struct si_shader_ctx_state      fixed_func_tcs_shader;<br>
>       struct si_resource              *wait_mem_scratch;<br>
>       unsigned                        wait_mem_number;<br>
>       uint16_t                        prefetch_L2_mask;<br>
> <br>
> +     bool                            has_graphics;<br>
>       bool                            gfx_flush_in_progress:1;<br>
>       bool                            gfx_last_ib_is_busy:1;<br>
>       bool                            compute_is_busy:1;<br>
> <br>
>       unsigned                        num_gfx_cs_flushes;<br>
>       unsigned                        initial_gfx_cs_size;<br>
>       unsigned                        gpu_reset_counter;<br>
>       unsigned                        last_dirty_tex_counter;<br>
>       unsigned                        last_compressed_colortex_counter;<br>
>       unsigned                        last_num_draw_calls;<br>
> diff --git a/src/gallium/drivers/radeonsi/si_state.c<br>
> b/src/gallium/drivers/radeonsi/si_state.c<br>
> index b49a1b3695e..458b108a7e3 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_state.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_state.c<br>
> @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct<br>
> pipe_context *ctx, unsigned flags)<br>
> <br>
>       si_update_fb_dirtiness_after_rendering(sctx);<br>
> <br>
>       /* Multisample surfaces are flushed in si_decompress_textures. */<br>
>       if (sctx->framebuffer.uncompressed_cb_mask)<br>
>               si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,<br>
>                                          sctx->framebuffer.CB_has_shader_readable_metadata);<br>
>  }<br>
> <br>
>  /* This only ensures coherency for shader image/buffer stores. */<br>
> -static void si_memory_barrier(struct pipe_context *ctx, unsigned <br>
> flags)<br>
> +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)<br>
>  {<br>
>       struct si_context *sctx = (struct si_context *)ctx;<br>
> <br>
>       /* Subsequent commands must wait for all shader invocations to<br>
>        * complete. */<br>
>       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |<br>
>                        SI_CONTEXT_CS_PARTIAL_FLUSH;<br>
> <br>
>       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)<br>
>               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |<br>
> @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context <br>
> *sctx)<br>
>       sctx->b.sampler_view_destroy = si_sampler_view_destroy;<br>
> <br>
>       sctx->b.set_sample_mask = si_set_sample_mask;<br>
> <br>
>       sctx->b.create_vertex_elements_state = si_create_vertex_elements;<br>
>       sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;<br>
>       sctx->b.delete_vertex_elements_state = si_delete_vertex_element;<br>
>       sctx->b.set_vertex_buffers = si_set_vertex_buffers;<br>
> <br>
>       sctx->b.texture_barrier = si_texture_barrier;<br>
> -     sctx->b.memory_barrier = si_memory_barrier;<br>
>       sctx->b.set_min_samples = si_set_min_samples;<br>
>       sctx->b.set_tess_state = si_set_tess_state;<br>
> <br>
>       sctx->b.set_active_query_state = si_set_active_query_state;<br>
> <br>
>       si_init_config(sctx);<br>
>  }<br>
> <br>
>  void si_init_screen_state_functions(struct si_screen *sscreen)<br>
>  {<br>
> diff --git a/src/gallium/drivers/radeonsi/si_state.h<br>
> b/src/gallium/drivers/radeonsi/si_state.h<br>
> index 767e789276a..6faa4c511b1 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_state.h<br>
> +++ b/src/gallium/drivers/radeonsi/si_state.h<br>
> @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct<br>
> si_context *sctx,<br>
>                                         struct si_shader_selector *sel);<br>
>  bool si_bindless_descriptor_can_reclaim_slab(void *priv,<br>
>                                            struct pb_slab_entry *entry);<br>
>  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned <br>
> heap,<br>
>                                                 unsigned entry_size,<br>
>                                                 unsigned group_index);<br>
>  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab <br>
> *pslab);<br>
>  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource <br>
> *buf,<br>
>                     uint64_t old_va);<br>
>  /* si_state.c */<br>
> +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);<br>
>  void si_init_state_functions(struct si_context *sctx);<br>
>  void si_init_screen_state_functions(struct si_screen *sscreen);<br>
>  void<br>
>  si_make_buffer_descriptor(struct si_screen *screen, struct si_resource <br>
> *buf,<br>
>                         enum pipe_format format,<br>
>                         unsigned offset, unsigned size,<br>
>                         uint32_t *state);<br>
>  void<br>
>  si_make_texture_descriptor(struct si_screen *screen,<br>
>                          struct si_texture *tex,<br>
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c<br>
> b/src/gallium/drivers/radeonsi/si_state_draw.c<br>
> index 9c968e39c2c..2a514f144b9 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c<br>
> @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct <br>
> si_context *sctx,<br>
>                                       S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));<br>
>               }<br>
>       }<br>
>  }<br>
> <br>
>  static void si_emit_surface_sync(struct si_context *sctx,<br>
>                                unsigned cp_coher_cntl)<br>
>  {<br>
>       struct radeon_cmdbuf *cs = sctx->gfx_cs;<br>
> <br>
> -     if (sctx->chip_class >= GFX9) {<br>
> +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {<br>
>               /* Flush caches and wait for the caches to assert idle. */<br>
>               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));<br>
>               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */<br>
>               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */<br>
>               radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */<br>
>               radeon_emit(cs, 0);             /* CP_COHER_BASE */<br>
>               radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */<br>
>               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */<br>
>       } else {<br>
>               /* ACQUIRE_MEM is only required on a compute ring. */<br>
> @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct <br>
> si_context *sctx,<br>
>               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */<br>
>               radeon_emit(cs, 0);               /* CP_COHER_BASE */<br>
>               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */<br>
>       }<br>
>  }<br>
> <br>
>  void si_emit_cache_flush(struct si_context *sctx)<br>
>  {<br>
>       struct radeon_cmdbuf *cs = sctx->gfx_cs;<br>
>       uint32_t flags = sctx->flags;<br>
> +<br>
> +     if (!sctx->has_graphics) {<br>
> +             /* Only process compute flags. */<br>
> +             flags &= SI_CONTEXT_INV_ICACHE |<br>
> +                      SI_CONTEXT_INV_SMEM_L1 |<br>
> +                      SI_CONTEXT_INV_VMEM_L1 |<br>
> +                      SI_CONTEXT_INV_GLOBAL_L2 |<br>
> +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |<br>
> +                      SI_CONTEXT_INV_L2_METADATA |<br>
> +                      SI_CONTEXT_CS_PARTIAL_FLUSH;<br>
> +     }<br>
> +<br>
>       uint32_t cp_coher_cntl = 0;<br>
>       uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |<br>
>                                       SI_CONTEXT_FLUSH_AND_INV_DB);<br>
> <br>
>       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)<br>
>               sctx->num_cb_cache_flushes++;<br>
>       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)<br>
>               sctx->num_db_cache_flushes++;<br>
> <br>
>       /* SI has a bug that it always flushes ICACHE and KCACHE if either<br>
> @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context <br>
> *sctx)<br>
>                                 EOP_DATA_SEL_VALUE_32BIT,<br>
>                                 sctx->wait_mem_scratch, va,<br>
>                                 sctx->wait_mem_number, SI_NOT_QUERY);<br>
>               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,<br>
>                              WAIT_REG_MEM_EQUAL);<br>
>       }<br>
> <br>
>       /* Make sure ME is idle (it executes most packets) before continuing.<br>
>        * This prevents read-after-write hazards between PFP and ME.<br>
>        */<br>
> -     if (cp_coher_cntl ||<br>
> -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |<br>
> -                         SI_CONTEXT_INV_VMEM_L1 |<br>
> -                         SI_CONTEXT_INV_GLOBAL_L2 |<br>
> -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {<br>
> +     if (sctx->has_graphics &&<br>
> +         (cp_coher_cntl ||<br>
> +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |<br>
> +                    SI_CONTEXT_INV_VMEM_L1 |<br>
> +                    SI_CONTEXT_INV_GLOBAL_L2 |<br>
> +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {<br>
>               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));<br>
>               radeon_emit(cs, 0);<br>
>       }<br>
> <br>
>       /* SI-CI-VI only:<br>
>        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set, <br>
> SURFACE_SYNC<br>
>        *   waits for idle, so it should be last. SURFACE_SYNC is done in <br>
> PFP.<br>
>        *<br>
>        * cp_coher_cntl should contain all necessary flags except TC flags<br>
>        * at this point.<br>
> diff --git a/src/gallium/drivers/radeonsi/si_texture.c<br>
> b/src/gallium/drivers/radeonsi/si_texture.c<br>
> index a50088d2d8f..581f90a7b2f 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_texture.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_texture.c<br>
> @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct<br>
> si_screen *sscreen,<br>
>   *   compressed tiled<br>
>   *<br>
>   * \param sctx  the current context if you have one, or <br>
> sscreen->aux_context<br>
>   *              if you don't.<br>
>   */<br>
>  bool si_texture_disable_dcc(struct si_context *sctx,<br>
>                           struct si_texture *tex)<br>
>  {<br>
>       struct si_screen *sscreen = sctx->screen;<br>
> <br>
> +     if (!sctx->has_graphics)<br>
> +             return si_texture_discard_dcc(sscreen, tex);<br>
> +<br>
>       if (!si_can_disable_dcc(tex))<br>
>               return false;<br>
> <br>
>       if (&sctx->b == sscreen->aux_context)<br>
>               mtx_lock(&sscreen->aux_context_lock);<br>
> <br>
>       /* Decompress DCC. */<br>
>       si_decompress_dcc(sctx, tex);<br>
>       sctx->b.flush(&sctx->b, NULL, 0);<br>
</blockquote></div>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a></blockquote></div>