[Mesa-dev] [PATCH 1/2] radeonsi: Lock a mutex when checking scratch relocations.

Fri Apr 22 04:41:55 UTC 2016

On 21.04.2016 13:51, Bas Nieuwenhuizen wrote:
> We can use shaders from multiple contexts, and they were not
> otherwise locked yet.

Ouch... I guess this is why compute scratch buffers used to be per-program?

I'm still trying to wrap my head around the possible code paths here... 
are you sure that nothing can go wrong with the 
radeon_add_to_buffer_list of the scratch buffer? I'm thinking a code 
sequence like

Context 1 checks that sctx(1)->scratch_buffer == shader->scratch_bo.
Context 2 allocates a new scratch buffer sctx(2)->scratch_buffer and 
uploads a modified shader.
Context 1 adds the old sctx(1)->scratch_buffer to its buffer list and 
submits its CS. It now runs with the new shader, and may cause a VM fault.

More generally, what if two contexts use the same shader with different 
scratch buffers? Do we just ping-pong, re-uploading the shader each time?

Bonus question: Do we even need per-context scratch buffers? As long as 
we idle all shaders at CS flush, we should be fine with a 
per-device/per-screen scratch buffer.

(Curiously, there is an old "this is probably not needed anymore" 
comment on the PS_PARTIAL_FLUSH in si_context_gfx_flush, but this may be 
wrong: since shaders can write memory, and the kernel may want to swap 
buffers around, we have to be extra careful about this...)

This needs a bit more thought...

Cheers,
Nicolai

>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>   src/gallium/drivers/radeonsi/si_compute.c       | 20 ++++++++++++++++----
>   src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++++++++++--
>   2 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 7e05be5..a99a985 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -37,6 +37,7 @@
>   #define MAX_GLOBAL_BUFFERS 20
>
>   struct si_compute {
> +	pipe_mutex mutex;
>   	unsigned ir_type;
>   	unsigned local_size;
>   	unsigned private_size;
> @@ -116,6 +117,7 @@ static void *si_create_compute_state(
>   		si_shader_binary_upload(sctx->screen, &program->shader);
>   	}
>
> +	pipe_mutex_init(program->mutex);
>   	return program;
>   }
>
> @@ -196,15 +198,19 @@ static void si_initialize_compute(struct si_context *sctx)
>   }
>
>   static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
> -                                            struct si_shader *shader,
> +                                            struct si_compute *program,
>                                               struct si_shader_config *config)
>   {
> +	struct si_shader *shader = &program->shader;
>   	uint64_t scratch_bo_size, scratch_needed;
>   	scratch_bo_size = 0;
>   	scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
>   	if (sctx->compute_scratch_buffer)
>   		scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
>
> +	if (!scratch_needed)
> +		return true;
> +
>   	if (scratch_bo_size < scratch_needed) {
>   		pipe_resource_reference(
>   			(struct pipe_resource**)&sctx->compute_scratch_buffer,
> @@ -218,18 +224,23 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
>   			return false;
>   	}
>
> -	if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
> +	pipe_mutex_lock(program->mutex);
> +
> +	if (sctx->compute_scratch_buffer != shader->scratch_bo) {
>   		uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
>
>   		si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va);
>
> -		if (si_shader_binary_upload(sctx->screen, shader))
> +		if (si_shader_binary_upload(sctx->screen, shader)) {
> +			pipe_mutex_unlock(program->mutex);
>   			return false;
> +		}
>
>   		r600_resource_reference(&shader->scratch_bo,
>   		                        sctx->compute_scratch_buffer);
>   	}
>
> +	pipe_mutex_unlock(program->mutex);
>   	return true;
>   }
>
> @@ -272,7 +283,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
>   		config->rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
>   	}
>
> -	if (!si_setup_compute_scratch_buffer(sctx, shader, config))
> +	if (!si_setup_compute_scratch_buffer(sctx, program, config))
>   		return false;
>
>   	if (shader->scratch_bo) {
> @@ -503,6 +514,7 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
>   		sctx->cs_shader_state.emitted_program = NULL;
>
>   	si_shader_destroy(&program->shader);
> +	pipe_mutex_destroy(program->mutex);
>   	FREE(program);
>   }
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 49e688a..382481c 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -1618,6 +1618,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
>   				    struct si_shader *shader)
>   {
>   	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
> +	struct si_shader_selector *sel = shader->selector;
>   	int r;
>
>   	if (!shader)
> @@ -1627,10 +1628,14 @@ static int si_update_scratch_buffer(struct si_context *sctx,
>   	if (shader->config.scratch_bytes_per_wave == 0)
>   		return 0;
>
> +	pipe_mutex_lock(sel->mutex);
> +
>   	/* This shader is already configured to use the current
>   	 * scratch buffer. */
> -	if (shader->scratch_bo == sctx->scratch_buffer)
> +	if (shader->scratch_bo == sctx->scratch_buffer) {
> +		pipe_mutex_unlock(sel->mutex);
>   		return 0;
> +	}
>
>   	assert(sctx->scratch_buffer);
>
> @@ -1638,14 +1643,17 @@ static int si_update_scratch_buffer(struct si_context *sctx,
>
>   	/* Replace the shader bo with a new bo that has the relocs applied. */
>   	r = si_shader_binary_upload(sctx->screen, shader);
> -	if (r)
> +	if (r) {
> +		pipe_mutex_unlock(sel->mutex);
>   		return r;
> +	}
>
>   	/* Update the shader state to use the new shader bo. */
>   	si_shader_init_pm4_state(shader);
>
>   	r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
>
> +	pipe_mutex_unlock(sel->mutex);
>   	return 1;
>   }
>
>