[Mesa-dev] [PATCH 2/4] radeonsi/compute: Use relocs for scratch pointer rather than user sgprs

Michel Dänzer michel at daenzer.net
Thu Jan 8 01:28:33 PST 2015


On 08.01.2015 06:03, Tom Stellard wrote:
> Instead of passing a pointer to the scratch buffer via user sgprs, we
> now patch the shader with the buffer address using reloc information
> from the LLVM generated ELF.

[...]

> @@ -174,6 +183,35 @@ static unsigned compute_num_waves_for_scratch(
>  	return scratch_waves;
>  }
>  
> +static void apply_scratch_relocs(const struct si_screen *sscreen,
> +			const struct radeon_shader_binary *binary,
> +			struct si_shader *shader, uint64_t scratch_va) {
> +	unsigned i;
> +	char *ptr;
> +	uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
> +	uint32_t scratch_rsrc_dword1 =
> +		S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
> +		|  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
> +
> +	if (!binary->reloc_count) {
> +		return;
> +	}
> +
> +	ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
> +					PIPE_TRANSFER_READ_WRITE);
> +	for (i = 0 ; i < binary->reloc_count; i++) {
> +		const struct radeon_shader_reloc *reloc = &binary->relocs[i];
> +		if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
> +			util_memcpy_cpu_to_le32(ptr + reloc->offset,
> +				&scratch_rsrc_dword0, 4);
> +		} else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
> +			util_memcpy_cpu_to_le32(ptr + reloc->offset,
> +				&scratch_rsrc_dword1, 4);
> +		}
> +	}
> +	sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> +}

[...]

> @@ -273,10 +315,6 @@ static void si_launch_grid(
>  
>  	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
>  	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
> -	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, scratch_buffer_va);
> -	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 12,
> -		S_008F04_BASE_ADDRESS_HI(scratch_buffer_va >> 32)
> -		|  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64));
>  
>  	si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
>  	si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
> 

Looks like this will break with older LLVM.


Patch 3 looks good to me. Haven't had time to look at patch 4 in detail.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer


More information about the mesa-dev mailing list