[Mesa-dev] [PATCH v3 2/2] radeonsi: enable TGSI support cap for compute shaders
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Tue Apr 19 14:11:52 UTC 2016
On Tue, Apr 19, 2016 at 4:03 PM, Alex Deucher <alexdeucher at gmail.com> wrote:
> On Tue, Apr 19, 2016 at 6:56 AM, Marek Olšák <maraeo at gmail.com> wrote:
>> Reviewed-by: Marek Olšák <marek.olsak at amd.com>
>>
>> Marek
>>
>> On Tue, Apr 19, 2016 at 1:39 AM, Bas Nieuwenhuizen
>> <bas at basnieuwenhuizen.nl> wrote:
>>> v2: Use chip_class instead of family.
>>>
>>> v3: Check kernel version for SI.
>>>
>>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>>> ---
>>> docs/GL3.txt | 4 ++--
>>> docs/relnotes/11.3.0.html | 1 +
>>> src/gallium/drivers/radeon/r600_pipe_common.c | 21 ++++++++++++++++-----
>>> src/gallium/drivers/radeonsi/si_pipe.c | 15 +++++++++++++--
>>> 4 files changed, 32 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/docs/GL3.txt b/docs/GL3.txt
>>> index 3febd6e..6214f8d 100644
>>> --- a/docs/GL3.txt
>>> +++ b/docs/GL3.txt
>>> @@ -167,7 +167,7 @@ GL 4.3, GLSL 4.30:
>>> GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
>>> GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
>>> GL_ARB_clear_buffer_object DONE (all drivers)
>>> - GL_ARB_compute_shader DONE (i965)
>>> + GL_ARB_compute_shader DONE (i965, radeonsi)
>>> GL_ARB_copy_image DONE (i965, nv50, nvc0, r600, radeonsi)
>>> GL_KHR_debug DONE (all drivers)
>>> GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
>>> @@ -225,7 +225,7 @@ GL 4.5, GLSL 4.50:
>>> These are the extensions cherry-picked to make GLES 3.1
>>> GLES3.1, GLSL ES 3.1
>>> GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
>>> - GL_ARB_compute_shader DONE (i965)
>>> + GL_ARB_compute_shader DONE (i965, radeonsi)
>>> GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
>>> GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
>>> GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi, softpipe)
>>> diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
>>> index 0f9aed8..5a7083c 100644
>>> --- a/docs/relnotes/11.3.0.html
>>> +++ b/docs/relnotes/11.3.0.html
>>> @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
>>>
>>> <ul>
>>> <li>OpenGL 4.2 on radeonsi</li>
>>> +<li>GL_ARB_compute_shader on radeonsi</li>
>>> <li>GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi, softpipe</li>
>>> <li>GL_ARB_internalformat_query2 on all drivers</li>
>>> <li>GL_ARB_robust_buffer_access_behavior on radeonsi</li>
>>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
>>> index a7477ab..64da62f 100644
>>> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>>> @@ -645,23 +645,34 @@ static int r600_get_compute_param(struct pipe_screen *screen,
>>> uint64_t *grid_size = ret;
>>> grid_size[0] = 65535;
>>> grid_size[1] = 65535;
>>> - grid_size[2] = 1;
>>> + grid_size[2] = 65535;
>>> }
>>> return 3 * sizeof(uint64_t) ;
>>>
>>> case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
>>> if (ret) {
>>> uint64_t *block_size = ret;
>>> - block_size[0] = 256;
>>> - block_size[1] = 256;
>>> - block_size[2] = 256;
>>> + if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
>>> + ir_type == PIPE_SHADER_IR_TGSI) {
>>> + block_size[0] = 2048;
>>> + block_size[1] = 2048;
>>> + block_size[2] = 2048;
>>> + } else {
>>> + block_size[0] = 256;
>>> + block_size[1] = 256;
>>> + block_size[2] = 256;
>>> + }
>>> }
>>> return 3 * sizeof(uint64_t);
>>>
>>> case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
>>> if (ret) {
>>> uint64_t *max_threads_per_block = ret;
>>> - *max_threads_per_block = 256;
>>> + if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
>>> + ir_type == PIPE_SHADER_IR_TGSI)
>>> + *max_threads_per_block = 2048;
>>> + else
>>> + *max_threads_per_block = 256;
>>> }
>>> return sizeof(uint64_t);
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>>> index f22cd03..7501a8f 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> @@ -447,6 +447,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>>>
>>> static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
>>> {
>>> + struct si_screen *sscreen = (struct si_screen *)pscreen;
>>> +
>>> switch(shader)
>>> {
>>> case PIPE_SHADER_FRAGMENT:
>>> @@ -464,9 +466,18 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
>>> case PIPE_SHADER_CAP_PREFERRED_IR:
>>> return PIPE_SHADER_IR_NATIVE;
>>>
>>> - case PIPE_SHADER_CAP_SUPPORTED_IRS:
>>> - return 0;
>>> + case PIPE_SHADER_CAP_SUPPORTED_IRS: {
>>> + int ir = 1 << PIPE_SHADER_IR_NATIVE;
>>>
>>> + /* Old kernels disallowed shader register writes using
>>> + * COPY_DATA packets that are used for indirect dispatches. */
>>> + if (HAVE_LLVM >= 0x309 && (sscreen->b.chip_class >= CIK ||
>>> + (sscreen->b.info.drm_major == 2 &&
>>> + sscreen->b.info.drm_minor >= 45)))
>
> Can you hide this in the winsys for SI? If we support SI with amdgpu
> at some point, this will need to be changed.
>
> Alex
You mean only doing this check in the radeon winsys? Would just
preemptively allowing SI + drm_major==3 in this if statement also be
okay?
I think that should have the same behavior as hiding it in the winsys
and the amdgpu winsys always saying it is supported.
- Bas
>>> + ir |= 1 << PIPE_SHADER_IR_TGSI;
>>> +
>>> + return ir;
>>> + }
>>> case PIPE_SHADER_CAP_DOUBLES:
>>> return HAVE_LLVM >= 0x0307;
>>>
>>> --
>>> 2.8.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list