[Mesa-dev] [PATCH 1/2] radeonsi: always use compute rings for clover on CI and newer (v2)

Jan Vesely jan.vesely at rutgers.edu
Tue Feb 26 21:10:06 UTC 2019


Can you add a bit of background why clover should/should not use other
rings?

I planned to test this, but my raven system can't run clover since kernel
4.20 release (BZ 109649), so I need to bisect that first.
Can this patch help address the soft lockup issue on CIK (BZ 108879)?
presumably, it was tested using clover on CIK, right?

Jan

On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo at gmail.com> wrote:

> I'll just push it.
>
> Marek
>
> On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter at nuetzel-hh.de>
> wrote:
>
>> Hello Marek,
>>
>> this series need a rebase (if you have some time).
>>
>> Dieter
>>
>> Am 12.02.2019 19:12, schrieb Marek Olšák:
>> > From: Marek Olšák <marek.olsak at amd.com>
>> >
>> > initialize all non-compute context functions to NULL.
>> >
>> > v2: fix SI
>> > ---
>> >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
>> >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
>> >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
>> >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
>> >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
>> >  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
>> >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
>> >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
>> >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
>> >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
>> >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
>> >  11 files changed, 130 insertions(+), 75 deletions(-)
>> >
>> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> > b/src/gallium/drivers/radeonsi/si_blit.c
>> > index bb8d1cbd12d..f39cb5d143f 100644
>> > --- a/src/gallium/drivers/radeonsi/si_blit.c
>> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
>> > pipe_context *ctx,
>> >
>> >               if (separate_dcc_dirty) {
>> >                       tex->separate_dcc_dirty = false;
>> >                       vi_separate_dcc_process_and_reset_stats(ctx, tex);
>> >               }
>> >       }
>> >  }
>> >
>> >  void si_decompress_dcc(struct si_context *sctx, struct si_texture
>> > *tex)
>> >  {
>> > -     if (!tex->dcc_offset)
>> > +     /* If graphics is disabled, we can't decompress DCC, but it
>> shouldn't
>> > +      * be compressed either. The caller should simply discard it.
>> > +      */
>> > +     if (!tex->dcc_offset || !sctx->has_graphics)
>> >               return;
>> >
>> >       si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
>> >                                0, util_max_layer(&tex->buffer.b.b, 0),
>> >                                true);
>> >  }
>> >
>> >  void si_init_blit_functions(struct si_context *sctx)
>> >  {
>> >       sctx->b.resource_copy_region = si_resource_copy_region;
>> > -     sctx->b.blit = si_blit;
>> > -     sctx->b.flush_resource = si_flush_resource;
>> > -     sctx->b.generate_mipmap = si_generate_mipmap;
>> > +
>> > +     if (sctx->has_graphics) {
>> > +             sctx->b.blit = si_blit;
>> > +             sctx->b.flush_resource = si_flush_resource;
>> > +             sctx->b.generate_mipmap = si_generate_mipmap;
>> > +     }
>> >  }
>> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
>> > b/src/gallium/drivers/radeonsi/si_clear.c
>> > index 9a00bb73b94..e1805f2a1c9 100644
>> > --- a/src/gallium/drivers/radeonsi/si_clear.c
>> > +++ b/src/gallium/drivers/radeonsi/si_clear.c
>> > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context
>> > *pipe,
>> >                       util_clear_render_target(pipe, sf, &color,
>> >                                                box->x, box->y,
>> >                                                box->width, box->height);
>> >               }
>> >       }
>> >       pipe_surface_reference(&sf, NULL);
>> >  }
>> >
>> >  void si_init_clear_functions(struct si_context *sctx)
>> >  {
>> > -     sctx->b.clear = si_clear;
>> >       sctx->b.clear_render_target = si_clear_render_target;
>> > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>> >       sctx->b.clear_texture = si_clear_texture;
>> > +
>> > +     if (sctx->has_graphics) {
>> > +             sctx->b.clear = si_clear;
>> > +             sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>> > +     }
>> >  }
>> > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>> > b/src/gallium/drivers/radeonsi/si_compute.c
>> > index 1a62b3e0844..87addd53976 100644
>> > --- a/src/gallium/drivers/radeonsi/si_compute.c
>> > +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> > @@ -880,26 +880,28 @@ static void si_launch_grid(
>> >               info->block[0] * info->block[1] * info->block[2] > 256;
>> >
>> >       if (cs_regalloc_hang)
>> >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>> >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
>> >
>> >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
>> >           program->shader.compilation_failed)
>> >               return;
>> >
>> > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>> > -             si_update_fb_dirtiness_after_rendering(sctx);
>> > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
>> > -     }
>> > +     if (sctx->has_graphics) {
>> > +             if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>> > +                     si_update_fb_dirtiness_after_rendering(sctx);
>> > +                     sctx->last_num_draw_calls = sctx->num_draw_calls;
>> > +             }
>> >
>> > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>> > +             si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>> > +     }
>> >
>> >       /* Add buffer sizes for memory checking in need_cs_space. */
>> >       si_context_add_resource_size(sctx, &program->shader.bo->b.b);
>> >       /* TODO: add the scratch buffer */
>> >
>> >       if (info->indirect) {
>> >               si_context_add_resource_size(sctx, info->indirect);
>> >
>> >               /* Indirect buffers use TC L2 on GFX9, but not older hw.
>> */
>> >               if (sctx->chip_class <= VI &&
>> > @@ -917,21 +919,22 @@ static void si_launch_grid(
>> >       if (sctx->flags)
>> >               si_emit_cache_flush(sctx);
>> >
>> >       if (!si_switch_compute_shader(sctx, program, &program->shader,
>> >                                       code_object, info->pc))
>> >               return;
>> >
>> >       si_upload_compute_shader_descriptors(sctx);
>> >       si_emit_compute_shader_pointers(sctx);
>> >
>> > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>> > +     if (sctx->has_graphics &&
>> > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>> >               sctx->atoms.s.render_cond.emit(sctx);
>> >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
>> false);
>> >       }
>> >
>> >       if ((program->input_size ||
>> >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
>> >             unlikely(!si_upload_compute_input(sctx, code_object,
>> > info))) {
>> >               return;
>> >       }
>> >
>> > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>> > b/src/gallium/drivers/radeonsi/si_descriptors.c
>> > index 21d4ca946d3..0f22c55723c 100644
>> > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> > @@ -2640,22 +2640,24 @@ void
>> > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
>> >
>> >       sctx->num_resident_handles += num_resident_tex_handles +
>> >                                       num_resident_img_handles;
>> >  }
>> >
>> >  /* INIT/DEINIT/UPLOAD */
>> >
>> >  void si_init_all_descriptors(struct si_context *sctx)
>> >  {
>> >       int i;
>> > +     unsigned first_shader =
>> > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
>> >
>> > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
>> > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
>> >               bool is_2nd = sctx->chip_class >= GFX9 &&
>> >                                    (i == PIPE_SHADER_TESS_CTRL ||
>> >                                     i == PIPE_SHADER_GEOMETRY);
>> >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
>> SI_NUM_SAMPLERS;
>> >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
>> > SI_NUM_CONST_BUFFERS;
>> >               int rel_dw_offset;
>> >               struct si_descriptors *desc;
>> >
>> >               if (is_2nd) {
>> >                       if (i == PIPE_SHADER_TESS_CTRL) {
>> > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context
>> > *sctx)
>> >       si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
>> >                                    SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
>> >                                    1024);
>> >
>> >       sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
>> >
>> >       /* Set pipe_context functions. */
>> >       sctx->b.bind_sampler_states = si_bind_sampler_states;
>> >       sctx->b.set_shader_images = si_set_shader_images;
>> >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
>> > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>> >       sctx->b.set_shader_buffers = si_set_shader_buffers;
>> >       sctx->b.set_sampler_views = si_set_sampler_views;
>> >       sctx->b.create_texture_handle = si_create_texture_handle;
>> >       sctx->b.delete_texture_handle = si_delete_texture_handle;
>> >       sctx->b.make_texture_handle_resident =
>> > si_make_texture_handle_resident;
>> >       sctx->b.create_image_handle = si_create_image_handle;
>> >       sctx->b.delete_image_handle = si_delete_image_handle;
>> >       sctx->b.make_image_handle_resident =
>> si_make_image_handle_resident;
>> >
>> > +     if (!sctx->has_graphics)
>> > +             return;
>> > +
>> > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>> > +
>> >       /* Shader user data. */
>> >       sctx->atoms.s.shader_pointers.emit =
>> > si_emit_graphics_shader_pointers;
>> >
>> >       /* Set default and immutable mappings. */
>> >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
>> > R_00B130_SPI_SHADER_USER_DATA_VS_0);
>> >
>> >       if (sctx->chip_class >= GFX9) {
>> >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
>> >                                     R_00B430_SPI_SHADER_USER_DATA_LS_0);
>> >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
>> > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > index 3d64587fa2b..d0e7cf20b4c 100644
>> > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
>> > unsigned flags,
>> >        * This code is only needed when the driver flushes the GFX IB
>> >        * internally, and it never asks for a fence handle.
>> >        */
>> >       if (radeon_emitted(ctx->dma_cs, 0)) {
>> >               assert(fence == NULL); /* internal flushes only */
>> >               si_flush_dma_cs(ctx, flags, NULL);
>> >       }
>> >
>> >       ctx->gfx_flush_in_progress = true;
>> >
>> > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
>> > -             si_suspend_queries(ctx);
>> > -
>> > -     ctx->streamout.suspended = false;
>> > -     if (ctx->streamout.begin_emitted) {
>> > -             si_emit_streamout_end(ctx);
>> > -             ctx->streamout.suspended = true;
>> > +     if (ctx->has_graphics) {
>> > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
>> > +                     si_suspend_queries(ctx);
>> > +
>> > +             ctx->streamout.suspended = false;
>> > +             if (ctx->streamout.begin_emitted) {
>> > +                     si_emit_streamout_end(ctx);
>> > +                     ctx->streamout.suspended = true;
>> > +             }
>> >       }
>> >
>> >       /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
>> >        * because the kernel doesn't wait for it. */
>> >       if (ctx->chip_class >= CIK)
>> >               si_cp_dma_wait_for_idle(ctx);
>> >
>> >       /* Wait for draw calls to finish if needed. */
>> >       if (wait_flags) {
>> >               ctx->flags |= wait_flags;
>> > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>> >        * IB starts drawing.
>> >        *
>> >        * TODO: Do we also need to invalidate CB & DB caches?
>> >        */
>> >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
>> >                     SI_CONTEXT_INV_SMEM_L1 |
>> >                     SI_CONTEXT_INV_VMEM_L1 |
>> >                     SI_CONTEXT_INV_GLOBAL_L2 |
>> >                     SI_CONTEXT_START_PIPELINE_STATS;
>> >
>> > +     ctx->cs_shader_state.initialized = false;
>> > +     si_all_descriptors_begin_new_cs(ctx);
>> > +     si_all_resident_buffers_begin_new_cs(ctx);
>> > +
>> > +     if (!ctx->has_graphics) {
>> > +             ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
>> > +             return;
>> > +     }
>> > +
>> >       /* set all valid group as dirty so they get reemited on
>> >        * next draw command
>> >        */
>> >       si_pm4_reset_emitted(ctx);
>> >
>> >       /* The CS initialization should be emitted before everything
>> else. */
>> >       si_pm4_emit(ctx, ctx->init_config);
>> >       if (ctx->init_config_gs_rings)
>> >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
>> >
>> > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
>> >       if (ctx->chip_class >= GFX9)
>> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
>> >       /* CLEAR_STATE disables all window rectangles. */
>> >       if (!has_clear_state || ctx->num_window_rectangles > 0)
>> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
>> > -     si_all_descriptors_begin_new_cs(ctx);
>> > -     si_all_resident_buffers_begin_new_cs(ctx);
>> >
>> >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>> >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>> >       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) -
>> 1;
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
>> >
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
>> >       if (ctx->scratch_buffer) {
>> > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>> >       ctx->last_multi_vgt_param = -1;
>> >       ctx->last_rast_prim = -1;
>> >       ctx->last_sc_line_stipple = ~0;
>> >       ctx->last_vs_state = ~0;
>> >       ctx->last_ls = NULL;
>> >       ctx->last_tcs = NULL;
>> >       ctx->last_tes_sh_base = -1;
>> >       ctx->last_num_tcs_input_cp = -1;
>> >       ctx->last_ls_hs_config = -1; /* impossible value */
>> >
>> > -     ctx->cs_shader_state.initialized = false;
>> > -
>> >       if (has_clear_state) {
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL]
>> =
>> > 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
>> =
>> > 0x00000000;
>> >
>>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
>> > 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL]
>> =
>> > 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
>> 0xffffffff;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
>> 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT]
>> =
>> > 0x00000000;
>> >
>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
>> > 0x00000000;
>> >
>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
>> > 0x00000000;
>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>> > b/src/gallium/drivers/radeonsi/si_pipe.c
>> > index 20767c806d2..c2ec664d5a4 100644
>> > --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> > @@ -381,61 +381,56 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >  {
>> >       struct si_context *sctx = CALLOC_STRUCT(si_context);
>> >       struct si_screen* sscreen = (struct si_screen *)screen;
>> >       struct radeon_winsys *ws = sscreen->ws;
>> >       int shader, i;
>> >       bool stop_exec_on_failure = (flags &
>> > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
>> >
>> >       if (!sctx)
>> >               return NULL;
>> >
>> > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
>> > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
>> > +
>> >       if (flags & PIPE_CONTEXT_DEBUG)
>> >               sscreen->record_llvm_ir = true; /* racy but not critical
>> */
>> >
>> >       sctx->b.screen = screen; /* this must be set first */
>> >       sctx->b.priv = NULL;
>> >       sctx->b.destroy = si_destroy_context;
>> > -     sctx->b.emit_string_marker = si_emit_string_marker;
>> > -     sctx->b.set_debug_callback = si_set_debug_callback;
>> > -     sctx->b.set_log_context = si_set_log_context;
>> > -     sctx->b.set_context_param = si_set_context_param;
>> >       sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
>> >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
>> >
>> >       slab_create_child(&sctx->pool_transfers,
>> &sscreen->pool_transfers);
>> >       slab_create_child(&sctx->pool_transfers_unsync,
>> > &sscreen->pool_transfers);
>> >
>> >       sctx->ws = sscreen->ws;
>> >       sctx->family = sscreen->info.family;
>> >       sctx->chip_class = sscreen->info.chip_class;
>> >
>> >       if (sscreen->info.has_gpu_reset_counter_query) {
>> >               sctx->gpu_reset_counter =
>> >                       sctx->ws->query_value(sctx->ws,
>> RADEON_GPU_RESET_COUNTER);
>> >       }
>> >
>> > -     sctx->b.get_device_reset_status = si_get_reset_status;
>> > -     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>> > -
>> > -     si_init_context_texture_functions(sctx);
>> > -     si_init_query_functions(sctx);
>> >
>> >       if (sctx->chip_class == CIK ||
>> >           sctx->chip_class == VI ||
>> >           sctx->chip_class == GFX9) {
>> >               sctx->eop_bug_scratch = si_resource(
>> >                       pipe_buffer_create(&sscreen->b, 0,
>> PIPE_USAGE_DEFAULT,
>> >                                          16 *
>> sscreen->info.num_render_backends));
>> >               if (!sctx->eop_bug_scratch)
>> >                       goto fail;
>> >       }
>> >
>> > +     /* Initialize context allocators. */
>> >       sctx->allocator_zeroed_memory =
>> >               u_suballocator_create(&sctx->b, 128 * 1024,
>> >                                     0, PIPE_USAGE_DEFAULT,
>> >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
>> >                                     SI_RESOURCE_FLAG_CLEAR, false);
>> >       if (!sctx->allocator_zeroed_memory)
>> >               goto fail;
>> >
>> >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
>> >                                                   0, PIPE_USAGE_STREAM,
>> > @@ -459,38 +454,22 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
>> >       if (!sctx->ctx)
>> >               goto fail;
>> >
>> >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
>> > DBG(NO_ASYNC_DMA))) {
>> >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
>> >                                                  (void*)si_flush_dma_cs,
>> >                                                  sctx,
>> stop_exec_on_failure);
>> >       }
>> >
>> > -     si_init_buffer_functions(sctx);
>> > -     si_init_clear_functions(sctx);
>> > -     si_init_blit_functions(sctx);
>> > -     si_init_compute_functions(sctx);
>> > -     si_init_compute_blit_functions(sctx);
>> > -     si_init_debug_functions(sctx);
>> > -     si_init_msaa_functions(sctx);
>> > -     si_init_streamout_functions(sctx);
>> > -
>> > -     if (sscreen->info.has_hw_decode) {
>> > -             sctx->b.create_video_codec = si_uvd_create_decoder;
>> > -             sctx->b.create_video_buffer = si_video_buffer_create;
>> > -     } else {
>> > -             sctx->b.create_video_codec = vl_create_decoder;
>> > -             sctx->b.create_video_buffer = vl_video_buffer_create;
>> > -     }
>> > -
>> > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
>> > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
>> > +                                  sctx->has_graphics ? RING_GFX :
>> RING_COMPUTE,
>> >                                    (void*)si_flush_gfx_cs, sctx,
>> stop_exec_on_failure);
>> >
>> >       /* Border colors. */
>> >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>> >
>>  sizeof(*sctx->border_color_table));
>> >       if (!sctx->border_color_table)
>> >               goto fail;
>> >
>> >       sctx->border_color_buffer = si_resource(
>> >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
>> > @@ -498,43 +477,76 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >                                  sizeof(*sctx->border_color_table)));
>> >       if (!sctx->border_color_buffer)
>> >               goto fail;
>> >
>> >       sctx->border_color_map =
>> >               ws->buffer_map(sctx->border_color_buffer->buf,
>> >                              NULL, PIPE_TRANSFER_WRITE);
>> >       if (!sctx->border_color_map)
>> >               goto fail;
>> >
>> > +     /* Initialize context functions used by graphics and compute. */
>> > +     sctx->b.emit_string_marker = si_emit_string_marker;
>> > +     sctx->b.set_debug_callback = si_set_debug_callback;
>> > +     sctx->b.set_log_context = si_set_log_context;
>> > +     sctx->b.set_context_param = si_set_context_param;
>> > +     sctx->b.get_device_reset_status = si_get_reset_status;
>> > +     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>> > +     sctx->b.memory_barrier = si_memory_barrier;
>> > +
>> >       si_init_all_descriptors(sctx);
>> > +     si_init_buffer_functions(sctx);
>> > +     si_init_clear_functions(sctx);
>> > +     si_init_blit_functions(sctx);
>> > +     si_init_compute_functions(sctx);
>> > +     si_init_compute_blit_functions(sctx);
>> > +     si_init_debug_functions(sctx);
>> >       si_init_fence_functions(sctx);
>> > -     si_init_state_functions(sctx);
>> > -     si_init_shader_functions(sctx);
>> > -     si_init_viewport_functions(sctx);
>> > -
>> > -     if (sctx->chip_class >= CIK)
>> > -             cik_init_sdma_functions(sctx);
>> > -     else
>> > -             si_init_dma_functions(sctx);
>> >
>> >       if (sscreen->debug_flags & DBG(FORCE_DMA))
>> >               sctx->b.resource_copy_region = sctx->dma_copy;
>> >
>> > -     sctx->blitter = util_blitter_create(&sctx->b);
>> > -     if (sctx->blitter == NULL)
>> > -             goto fail;
>> > -     sctx->blitter->skip_viewport_restore = true;
>> > +     /* Initialize graphics-only context functions. */
>> > +     if (sctx->has_graphics) {
>> > +             si_init_context_texture_functions(sctx);
>> > +             si_init_query_functions(sctx);
>> > +             si_init_msaa_functions(sctx);
>> > +             si_init_shader_functions(sctx);
>> > +             si_init_state_functions(sctx);
>> > +             si_init_streamout_functions(sctx);
>> > +             si_init_viewport_functions(sctx);
>> > +
>> > +             sctx->blitter = util_blitter_create(&sctx->b);
>> > +             if (sctx->blitter == NULL)
>> > +                     goto fail;
>> > +             sctx->blitter->skip_viewport_restore = true;
>> >
>> > -     si_init_draw_functions(sctx);
>> > +             si_init_draw_functions(sctx);
>> > +     }
>> > +
>> > +     /* Initialize SDMA functions. */
>> > +     if (sctx->chip_class >= CIK)
>> > +             cik_init_sdma_functions(sctx);
>> > +     else
>> > +             si_init_dma_functions(sctx);
>> >
>> >       sctx->sample_mask = 0xffff;
>> >
>> > +     /* Initialize multimedia functions. */
>> > +     if (sscreen->info.has_hw_decode) {
>> > +             sctx->b.create_video_codec = si_uvd_create_decoder;
>> > +             sctx->b.create_video_buffer = si_video_buffer_create;
>> > +     } else {
>> > +             sctx->b.create_video_codec = vl_create_decoder;
>> > +             sctx->b.create_video_buffer = vl_video_buffer_create;
>> > +     }
>> > +
>> >       if (sctx->chip_class >= GFX9) {
>> >               sctx->wait_mem_scratch = si_resource(
>> >                       pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
>> 4));
>> >               if (!sctx->wait_mem_scratch)
>> >                       goto fail;
>> >
>> >               /* Initialize the memory. */
>> >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
>> >                                V_370_MEM, V_370_ME,
>> &sctx->wait_mem_number);
>> >       }
>> > @@ -544,21 +556,22 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >       if (sctx->chip_class == CIK) {
>> >               sctx->null_const_buf.buffer =
>> >                       pipe_aligned_buffer_create(screen,
>> >                                                  SI_RESOURCE_FLAG_32BIT,
>> >                                                  PIPE_USAGE_DEFAULT, 16,
>> >
>> sctx->screen->info.tcc_cache_line_size);
>> >               if (!sctx->null_const_buf.buffer)
>> >                       goto fail;
>> >               sctx->null_const_buf.buffer_size =
>> > sctx->null_const_buf.buffer->width0;
>> >
>> > -             for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>> > +             unsigned start_shader = sctx->has_graphics ? 0 :
>> > PIPE_SHADER_COMPUTE;
>> > +             for (shader = start_shader; shader < SI_NUM_SHADERS;
>> shader++) {
>> >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>> >                               sctx->b.set_constant_buffer(&sctx->b,
>> shader, i,
>> >
>>  &sctx->null_const_buf);
>> >                       }
>> >               }
>> >
>> >               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
>> >                                &sctx->null_const_buf);
>> >               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
>> >                                &sctx->null_const_buf);
>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> > b/src/gallium/drivers/radeonsi/si_pipe.h
>> > index b01d5744752..348e8e5bd26 100644
>> > --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> > @@ -777,21 +777,21 @@ struct si_saved_cs {
>> >  };
>> >
>> >  struct si_context {
>> >       struct pipe_context             b; /* base class */
>> >
>> >       enum radeon_family              family;
>> >       enum chip_class                 chip_class;
>> >
>> >       struct radeon_winsys            *ws;
>> >       struct radeon_winsys_ctx        *ctx;
>> > -     struct radeon_cmdbuf            *gfx_cs;
>> > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if
>> graphics is disabled
>> > */
>> >       struct radeon_cmdbuf            *dma_cs;
>> >       struct pipe_fence_handle        *last_gfx_fence;
>> >       struct pipe_fence_handle        *last_sdma_fence;
>> >       struct si_resource              *eop_bug_scratch;
>> >       struct u_upload_mgr             *cached_gtt_allocator;
>> >       struct threaded_context         *tc;
>> >       struct u_suballocator           *allocator_zeroed_memory;
>> >       struct slab_child_pool          pool_transfers;
>> >       struct slab_child_pool          pool_transfers_unsync; /* for
>> > threaded_context */
>> >       struct pipe_device_reset_callback device_reset_callback;
>> > @@ -815,20 +815,21 @@ struct si_context {
>> >       void                            *cs_clear_render_target;
>> >       void                            *cs_clear_render_target_1d_array;
>> >       struct si_screen                *screen;
>> >       struct pipe_debug_callback      debug;
>> >       struct ac_llvm_compiler         compiler; /* only non-threaded
>> compilation
>> > */
>> >       struct si_shader_ctx_state      fixed_func_tcs_shader;
>> >       struct si_resource              *wait_mem_scratch;
>> >       unsigned                        wait_mem_number;
>> >       uint16_t                        prefetch_L2_mask;
>> >
>> > +     bool                            has_graphics;
>> >       bool                            gfx_flush_in_progress:1;
>> >       bool                            gfx_last_ib_is_busy:1;
>> >       bool                            compute_is_busy:1;
>> >
>> >       unsigned                        num_gfx_cs_flushes;
>> >       unsigned                        initial_gfx_cs_size;
>> >       unsigned                        gpu_reset_counter;
>> >       unsigned                        last_dirty_tex_counter;
>> >       unsigned                        last_compressed_colortex_counter;
>> >       unsigned                        last_num_draw_calls;
>> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> > b/src/gallium/drivers/radeonsi/si_state.c
>> > index b49a1b3695e..458b108a7e3 100644
>> > --- a/src/gallium/drivers/radeonsi/si_state.c
>> > +++ b/src/gallium/drivers/radeonsi/si_state.c
>> > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
>> > pipe_context *ctx, unsigned flags)
>> >
>> >       si_update_fb_dirtiness_after_rendering(sctx);
>> >
>> >       /* Multisample surfaces are flushed in si_decompress_textures. */
>> >       if (sctx->framebuffer.uncompressed_cb_mask)
>> >               si_make_CB_shader_coherent(sctx,
>> sctx->framebuffer.nr_samples,
>> >
>> sctx->framebuffer.CB_has_shader_readable_metadata);
>> >  }
>> >
>> >  /* This only ensures coherency for shader image/buffer stores. */
>> > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
>> > flags)
>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>> >  {
>> >       struct si_context *sctx = (struct si_context *)ctx;
>> >
>> >       /* Subsequent commands must wait for all shader invocations to
>> >        * complete. */
>> >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>> >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
>> >
>> >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>> >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
>> > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context
>> > *sctx)
>> >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
>> >
>> >       sctx->b.set_sample_mask = si_set_sample_mask;
>> >
>> >       sctx->b.create_vertex_elements_state = si_create_vertex_elements;
>> >       sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
>> >       sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
>> >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
>> >
>> >       sctx->b.texture_barrier = si_texture_barrier;
>> > -     sctx->b.memory_barrier = si_memory_barrier;
>> >       sctx->b.set_min_samples = si_set_min_samples;
>> >       sctx->b.set_tess_state = si_set_tess_state;
>> >
>> >       sctx->b.set_active_query_state = si_set_active_query_state;
>> >
>> >       si_init_config(sctx);
>> >  }
>> >
>> >  void si_init_screen_state_functions(struct si_screen *sscreen)
>> >  {
>> > diff --git a/src/gallium/drivers/radeonsi/si_state.h
>> > b/src/gallium/drivers/radeonsi/si_state.h
>> > index 767e789276a..6faa4c511b1 100644
>> > --- a/src/gallium/drivers/radeonsi/si_state.h
>> > +++ b/src/gallium/drivers/radeonsi/si_state.h
>> > @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
>> > si_context *sctx,
>> >                                         struct si_shader_selector *sel);
>> >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
>> >                                            struct pb_slab_entry *entry);
>> >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned
>> > heap,
>> >                                                 unsigned entry_size,
>> >                                                 unsigned group_index);
>> >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
>> > *pslab);
>> >  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
>> > *buf,
>> >                     uint64_t old_va);
>> >  /* si_state.c */
>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
>> >  void si_init_state_functions(struct si_context *sctx);
>> >  void si_init_screen_state_functions(struct si_screen *sscreen);
>> >  void
>> >  si_make_buffer_descriptor(struct si_screen *screen, struct si_resource
>> > *buf,
>> >                         enum pipe_format format,
>> >                         unsigned offset, unsigned size,
>> >                         uint32_t *state);
>> >  void
>> >  si_make_texture_descriptor(struct si_screen *screen,
>> >                          struct si_texture *tex,
>> > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>> > b/src/gallium/drivers/radeonsi/si_state_draw.c
>> > index 9c968e39c2c..2a514f144b9 100644
>> > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>> > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>> > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
>> > si_context *sctx,
>> >
>>  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
>> >               }
>> >       }
>> >  }
>> >
>> >  static void si_emit_surface_sync(struct si_context *sctx,
>> >                                unsigned cp_coher_cntl)
>> >  {
>> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> >
>> > -     if (sctx->chip_class >= GFX9) {
>> > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
>> >               /* Flush caches and wait for the caches to assert idle. */
>> >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
>> >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
>> >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
>> >               radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */
>> >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
>> >               radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
>> >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
>> >       } else {
>> >               /* ACQUIRE_MEM is only required on a compute ring. */
>> > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
>> > si_context *sctx,
>> >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
>> >               radeon_emit(cs, 0);               /* CP_COHER_BASE */
>> >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
>> >       }
>> >  }
>> >
>> >  void si_emit_cache_flush(struct si_context *sctx)
>> >  {
>> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> >       uint32_t flags = sctx->flags;
>> > +
>> > +     if (!sctx->has_graphics) {
>> > +             /* Only process compute flags. */
>> > +             flags &= SI_CONTEXT_INV_ICACHE |
>> > +                      SI_CONTEXT_INV_SMEM_L1 |
>> > +                      SI_CONTEXT_INV_VMEM_L1 |
>> > +                      SI_CONTEXT_INV_GLOBAL_L2 |
>> > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
>> > +                      SI_CONTEXT_INV_L2_METADATA |
>> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
>> > +     }
>> > +
>> >       uint32_t cp_coher_cntl = 0;
>> >       uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
>> >                                       SI_CONTEXT_FLUSH_AND_INV_DB);
>> >
>> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
>> >               sctx->num_cb_cache_flushes++;
>> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>> >               sctx->num_db_cache_flushes++;
>> >
>> >       /* SI has a bug that it always flushes ICACHE and KCACHE if either
>> > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context
>> > *sctx)
>> >                                 EOP_DATA_SEL_VALUE_32BIT,
>> >                                 sctx->wait_mem_scratch, va,
>> >                                 sctx->wait_mem_number, SI_NOT_QUERY);
>> >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
>> 0xffffffff,
>> >                              WAIT_REG_MEM_EQUAL);
>> >       }
>> >
>> >       /* Make sure ME is idle (it executes most packets) before
>> continuing.
>> >        * This prevents read-after-write hazards between PFP and ME.
>> >        */
>> > -     if (cp_coher_cntl ||
>> > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>> > -                         SI_CONTEXT_INV_VMEM_L1 |
>> > -                         SI_CONTEXT_INV_GLOBAL_L2 |
>> > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
>> > +     if (sctx->has_graphics &&
>> > +         (cp_coher_cntl ||
>> > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>> > +                    SI_CONTEXT_INV_VMEM_L1 |
>> > +                    SI_CONTEXT_INV_GLOBAL_L2 |
>> > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
>> >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>> >               radeon_emit(cs, 0);
>> >       }
>> >
>> >       /* SI-CI-VI only:
>> >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
>> > SURFACE_SYNC
>> >        *   waits for idle, so it should be last. SURFACE_SYNC is done
>> in
>> > PFP.
>> >        *
>> >        * cp_coher_cntl should contain all necessary flags except TC
>> flags
>> >        * at this point.
>> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
>> > b/src/gallium/drivers/radeonsi/si_texture.c
>> > index a50088d2d8f..581f90a7b2f 100644
>> > --- a/src/gallium/drivers/radeonsi/si_texture.c
>> > +++ b/src/gallium/drivers/radeonsi/si_texture.c
>> > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
>> > si_screen *sscreen,
>> >   *   compressed tiled
>> >   *
>> >   * \param sctx  the current context if you have one, or
>> > sscreen->aux_context
>> >   *              if you don't.
>> >   */
>> >  bool si_texture_disable_dcc(struct si_context *sctx,
>> >                           struct si_texture *tex)
>> >  {
>> >       struct si_screen *sscreen = sctx->screen;
>> >
>> > +     if (!sctx->has_graphics)
>> > +             return si_texture_discard_dcc(sscreen, tex);
>> > +
>> >       if (!si_can_disable_dcc(tex))
>> >               return false;
>> >
>> >       if (&sctx->b == sscreen->aux_context)
>> >               mtx_lock(&sscreen->aux_context_lock);
>> >
>> >       /* Decompress DCC. */
>> >       si_decompress_dcc(sctx, tex);
>> >       sctx->b.flush(&sctx->b, NULL, 0);
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190226/ef55f041/attachment-0001.html>


More information about the mesa-dev mailing list