[Mesa-dev] [PATCH 1/2] radeonsi: Lock a mutex when checking scratch relocations.
Nicolai Hähnle
nhaehnle at gmail.com
Fri Apr 22 04:41:55 UTC 2016
On 21.04.2016 13:51, Bas Nieuwenhuizen wrote:
> We can use shaders from multiple contexts, and they were not
> otherwise locked yet.
Ouch... I guess this is why compute scratch buffers used to be per-program?
I'm still trying to wrap my head around the possible code paths here...
are you sure that nothing can go wrong with the
radeon_add_to_buffer_list of the scratch buffer? I'm thinking a code
sequence like
Context 1 checks that sctx(1)->scratch_buffer == shader->scratch_bo.
Context 2 allocates a new scratch buffer sctx(2)->scratch_buffer and
uploads a modified shader.
Context 1 adds the old sctx(1)->scratch_buffer to its buffer list and
submits its CS. It now runs with the new shader, and may cause a VM fault.
More generally, what if two contexts use the same shader with different
scratch buffers? Do we just ping-pong, re-uploading the shader each time?
Bonus question: Do we even need per-context scratch buffers? As long as
we idle all shaders at CS flush, we should be fine with a
per-device/per-screen scratch buffer.
(Curiously, there is an old "this is probably not needed anymore"
comment on the PS_PARTIAL_FLUSH in si_context_gfx_flush, but this may be
wrong: since shaders can write memory, and the kernel may want to swap
buffers around, we have to be extra careful about this...)
This needs a bit more thought...
Cheers,
Nicolai
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
> src/gallium/drivers/radeonsi/si_compute.c | 20 ++++++++++++++++----
> src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++++++++++--
> 2 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 7e05be5..a99a985 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -37,6 +37,7 @@
> #define MAX_GLOBAL_BUFFERS 20
>
> struct si_compute {
> + pipe_mutex mutex;
> unsigned ir_type;
> unsigned local_size;
> unsigned private_size;
> @@ -116,6 +117,7 @@ static void *si_create_compute_state(
> si_shader_binary_upload(sctx->screen, &program->shader);
> }
>
> + pipe_mutex_init(program->mutex);
> return program;
> }
>
> @@ -196,15 +198,19 @@ static void si_initialize_compute(struct si_context *sctx)
> }
>
> static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
> - struct si_shader *shader,
> + struct si_compute *program,
> struct si_shader_config *config)
> {
> + struct si_shader *shader = &program->shader;
> uint64_t scratch_bo_size, scratch_needed;
> scratch_bo_size = 0;
> scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
> if (sctx->compute_scratch_buffer)
> scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
>
> + if (!scratch_needed)
> + return true;
> +
> if (scratch_bo_size < scratch_needed) {
> pipe_resource_reference(
> (struct pipe_resource**)&sctx->compute_scratch_buffer,
> @@ -218,18 +224,23 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
> return false;
> }
>
> - if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
> + pipe_mutex_lock(program->mutex);
> +
> + if (sctx->compute_scratch_buffer != shader->scratch_bo) {
> uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
>
> si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va);
>
> - if (si_shader_binary_upload(sctx->screen, shader))
> + if (si_shader_binary_upload(sctx->screen, shader)) {
> + pipe_mutex_unlock(program->mutex);
> return false;
> + }
>
> r600_resource_reference(&shader->scratch_bo,
> sctx->compute_scratch_buffer);
> }
>
> + pipe_mutex_unlock(program->mutex);
> return true;
> }
>
> @@ -272,7 +283,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
> config->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
> }
>
> - if (!si_setup_compute_scratch_buffer(sctx, shader, config))
> + if (!si_setup_compute_scratch_buffer(sctx, program, config))
> return false;
>
> if (shader->scratch_bo) {
> @@ -503,6 +514,7 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
> sctx->cs_shader_state.emitted_program = NULL;
>
> si_shader_destroy(&program->shader);
> + pipe_mutex_destroy(program->mutex);
> FREE(program);
> }
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 49e688a..382481c 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1618,6 +1618,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
> struct si_shader *shader)
> {
> uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
> + struct si_shader_selector *sel = shader->selector;
> int r;
>
> if (!shader)
> @@ -1627,10 +1628,14 @@ static int si_update_scratch_buffer(struct si_context *sctx,
> if (shader->config.scratch_bytes_per_wave == 0)
> return 0;
>
> + pipe_mutex_lock(sel->mutex);
> +
> /* This shader is already configured to use the current
> * scratch buffer. */
> - if (shader->scratch_bo == sctx->scratch_buffer)
> + if (shader->scratch_bo == sctx->scratch_buffer) {
> + pipe_mutex_unlock(sel->mutex);
> return 0;
> + }
>
> assert(sctx->scratch_buffer);
>
> @@ -1638,14 +1643,17 @@ static int si_update_scratch_buffer(struct si_context *sctx,
>
> /* Replace the shader bo with a new bo that has the relocs applied. */
> r = si_shader_binary_upload(sctx->screen, shader);
> - if (r)
> + if (r) {
> + pipe_mutex_unlock(sel->mutex);
> return r;
> + }
>
> /* Update the shader state to use the new shader bo. */
> si_shader_init_pm4_state(shader);
>
> r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
>
> + pipe_mutex_unlock(sel->mutex);
> return 1;
> }
>
>
More information about the mesa-dev
mailing list