[Mesa-dev] [PATCH v3 2/2] radeonsi: enable TGSI support cap for compute shaders

Alex Deucher alexdeucher at gmail.com
Tue Apr 19 14:03:04 UTC 2016


On Tue, Apr 19, 2016 at 6:56 AM, Marek Olšák <maraeo at gmail.com> wrote:
> Reviewed-by: Marek Olšák <marek.olsak at amd.com>
>
> Marek
>
> On Tue, Apr 19, 2016 at 1:39 AM, Bas Nieuwenhuizen
> <bas at basnieuwenhuizen.nl> wrote:
>> v2: Use chip_class instead of family.
>>
>> v3: Check kernel version for SI.
>>
>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>> ---
>>  docs/GL3.txt                                  |  4 ++--
>>  docs/relnotes/11.3.0.html                     |  1 +
>>  src/gallium/drivers/radeon/r600_pipe_common.c | 21 ++++++++++++++++-----
>>  src/gallium/drivers/radeonsi/si_pipe.c        | 15 +++++++++++++--
>>  4 files changed, 32 insertions(+), 9 deletions(-)
>>
>> diff --git a/docs/GL3.txt b/docs/GL3.txt
>> index 3febd6e..6214f8d 100644
>> --- a/docs/GL3.txt
>> +++ b/docs/GL3.txt
>> @@ -167,7 +167,7 @@ GL 4.3, GLSL 4.30:
>>    GL_ARB_arrays_of_arrays                               DONE (all drivers that support GLSL 1.30)
>>    GL_ARB_ES3_compatibility                              DONE (all drivers that support GLSL 3.30)
>>    GL_ARB_clear_buffer_object                            DONE (all drivers)
>> -  GL_ARB_compute_shader                                 DONE (i965)
>> +  GL_ARB_compute_shader                                 DONE (i965, radeonsi)
>>    GL_ARB_copy_image                                     DONE (i965, nv50, nvc0, r600, radeonsi)
>>    GL_KHR_debug                                          DONE (all drivers)
>>    GL_ARB_explicit_uniform_location                      DONE (all drivers that support GLSL)
>> @@ -225,7 +225,7 @@ GL 4.5, GLSL 4.50:
>>  These are the extensions cherry-picked to make GLES 3.1
>>  GLES3.1, GLSL ES 3.1
>>    GL_ARB_arrays_of_arrays                               DONE (all drivers that support GLSL 1.30)
>> -  GL_ARB_compute_shader                                 DONE (i965)
>> +  GL_ARB_compute_shader                                 DONE (i965, radeonsi)
>>    GL_ARB_draw_indirect                                  DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
>>    GL_ARB_explicit_uniform_location                      DONE (all drivers that support GLSL)
>>    GL_ARB_framebuffer_no_attachments                     DONE (i965, nvc0, r600, radeonsi, softpipe)
>> diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
>> index 0f9aed8..5a7083c 100644
>> --- a/docs/relnotes/11.3.0.html
>> +++ b/docs/relnotes/11.3.0.html
>> @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
>>
>>  <ul>
>>  <li>OpenGL 4.2 on radeonsi</li>
>> +<li>GL_ARB_compute_shader on radeonsi</li>
>>  <li>GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi, softpipe</li>
>>  <li>GL_ARB_internalformat_query2 on all drivers</li>
>>  <li>GL_ARB_robust_buffer_access_behavior on radeonsi</li>
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
>> index a7477ab..64da62f 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>> @@ -645,23 +645,34 @@ static int r600_get_compute_param(struct pipe_screen *screen,
>>                         uint64_t *grid_size = ret;
>>                         grid_size[0] = 65535;
>>                         grid_size[1] = 65535;
>> -                       grid_size[2] = 1;
>> +                       grid_size[2] = 65535;
>>                 }
>>                 return 3 * sizeof(uint64_t) ;
>>
>>         case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
>>                 if (ret) {
>>                         uint64_t *block_size = ret;
>> -                       block_size[0] = 256;
>> -                       block_size[1] = 256;
>> -                       block_size[2] = 256;
>> +                       if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
>> +                           ir_type == PIPE_SHADER_IR_TGSI) {
>> +                               block_size[0] = 2048;
>> +                               block_size[1] = 2048;
>> +                               block_size[2] = 2048;
>> +                       } else {
>> +                               block_size[0] = 256;
>> +                               block_size[1] = 256;
>> +                               block_size[2] = 256;
>> +                       }
>>                 }
>>                 return 3 * sizeof(uint64_t);
>>
>>         case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
>>                 if (ret) {
>>                         uint64_t *max_threads_per_block = ret;
>> -                       *max_threads_per_block = 256;
>> +                       if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
>> +                           ir_type == PIPE_SHADER_IR_TGSI)
>> +                               *max_threads_per_block = 2048;
>> +                       else
>> +                               *max_threads_per_block = 256;
>>                 }
>>                 return sizeof(uint64_t);
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>> index f22cd03..7501a8f 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -447,6 +447,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>>
>>  static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
>>  {
>> +       struct si_screen *sscreen = (struct si_screen *)pscreen;
>> +
>>         switch(shader)
>>         {
>>         case PIPE_SHADER_FRAGMENT:
>> @@ -464,9 +466,18 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
>>                 case PIPE_SHADER_CAP_PREFERRED_IR:
>>                         return PIPE_SHADER_IR_NATIVE;
>>
>> -               case PIPE_SHADER_CAP_SUPPORTED_IRS:
>> -                       return 0;
>> +               case PIPE_SHADER_CAP_SUPPORTED_IRS: {
>> +                       int ir = 1 << PIPE_SHADER_IR_NATIVE;
>>
>> +                       /* Old kernels disallowed shader register writes using
>> +                        * COPY_DATA packets that are used for indirect dispatches. */
>> +                       if (HAVE_LLVM >= 0x309 && (sscreen->b.chip_class >= CIK ||
>> +                                                  (sscreen->b.info.drm_major == 2 &&
>> +                                                   sscreen->b.info.drm_minor >= 45)))

Can you hide this in the winsys for SI?  If we support SI with amdgpu
at some point, this will need to be changed.

Alex

>> +                               ir |= 1 << PIPE_SHADER_IR_TGSI;
>> +
>> +                       return ir;
>> +               }
>>                 case PIPE_SHADER_CAP_DOUBLES:
>>                         return HAVE_LLVM >= 0x0307;
>>
>> --
>> 2.8.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list