[Mesa-dev] [PATCH] r600g: Implement gpu_shader5 textureGather
Marek Olšák
maraeo at gmail.com
Fri Aug 1 07:20:40 PDT 2014
Pushed, thanks.
Marek
On Sun, Jul 20, 2014 at 3:59 PM, Glenn Kennard <glenn.kennard at gmail.com> wrote:
> Adds 0-3 textureGather component selection and non-constant offsets
>
> Caveat: 0 and 1 texture swizzles only work if textureGather component
> select is 3 or a component that does not exist in the sampler texture
> format. This is a hardware limitation, any other value returns
> 128/255=0.501961 for both 0 and 1.
>
> Passes all textureGather piglit tests on radeon 6670, except for those
> using 0/1 texture swizzles due to aforementioned reason.
>
> Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
> ---
> It is possible to generate shader variants which gets the existing
> textureGather 0-1 piglit tests passing, but the resulting code is not
> pretty, and it will still fail if anyone uses indirect sampler
> referencing in the shader for textureGather. I don't think it is
> worth the effort, if an app really wants a constant 0/1 value it has
> other ways to accomplish that.
>
> docs/GL3.txt | 4 ++--
> src/gallium/drivers/r600/r600_pipe.c | 5 +++--
> src/gallium/drivers/r600/r600_shader.c | 36 +++++++++++++++++++++++++++++++---
> 3 files changed, 38 insertions(+), 7 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 0f37da4..eee0988 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -106,7 +106,7 @@ GL 4.0:
> - Implicit signed -> unsigned conversions DONE
> - Fused multiply-add DONE (i965, nvc0)
> - Packing/bitfield/conversion functions DONE (i965, nvc0)
> - - Enhanced textureGather DONE (i965, nvc0, radeonsi)
> + - Enhanced textureGather DONE (i965, nvc0, r600, radeonsi)
> - Geometry shader instancing DONE (i965, nvc0)
> - Geometry shader multiple streams DONE (i965, nvc0)
> - Enhanced per-sample shading DONE (i965)
> @@ -118,7 +118,7 @@ GL 4.0:
> GL_ARB_tessellation_shader started (Fabian)
> GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, softpipe)
> GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
> - GL_ARB_texture_gather DONE (i965, nv50, nvc0, radeonsi, r600)
> + GL_ARB_texture_gather DONE (i965, nv50, nvc0, r600, radeonsi)
> GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, radeonsi)
> GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi)
> GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi)
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 5bf9c00..3f07f01 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -303,8 +303,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
> case PIPE_CAP_CUBE_MAP_ARRAY:
> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
> - case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
> + case PIPE_CAP_TEXTURE_GATHER_SM5:
> return family >= CHIP_CEDAR ? 1 : 0;
> + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
> + return family >= CHIP_CEDAR ? 4 : 0;
>
> /* Unsupported features. */
> case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
> @@ -313,7 +315,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
> case PIPE_CAP_VERTEX_COLOR_CLAMPED:
> case PIPE_CAP_USER_VERTEX_BUFFERS:
> - case PIPE_CAP_TEXTURE_GATHER_SM5:
> case PIPE_CAP_TEXTURE_QUERY_LOD:
> case PIPE_CAP_SAMPLE_SHADING:
> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index db928f3..044d67c 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -5060,6 +5060,35 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
> }
>
> opcode = ctx->inst_info->op;
> + if (opcode == FETCH_OP_GATHER4 &&
> + inst->TexOffsets[0].File != TGSI_FILE_NULL &&
> + inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
> + opcode = FETCH_OP_GATHER4_O;
> +
> + /* GATHER4_O/GATHER4_C_O use offset values loaded by
> + SET_TEXTURE_OFFSETS instruction. The immediate offset values
> + encoded in the instruction are ignored. */
> + memset(&tex, 0, sizeof(struct r600_bytecode_tex));
> + tex.op = FETCH_OP_SET_TEXTURE_OFFSETS;
> + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
> + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
> +
> + tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index;
> + tex.src_sel_x = inst->TexOffsets[0].SwizzleX;
> + tex.src_sel_y = inst->TexOffsets[0].SwizzleY;
> + tex.src_sel_z = inst->TexOffsets[0].SwizzleZ;
> + tex.src_sel_w = 4;
> +
> + tex.dst_sel_x = 7;
> + tex.dst_sel_y = 7;
> + tex.dst_sel_z = 7;
> + tex.dst_sel_w = 7;
> +
> + r = r600_bytecode_add_tex(ctx->bc, &tex);
> + if (r)
> + return r;
> + }
> +
> if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
> inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
> inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
> @@ -5082,10 +5111,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
> break;
> /* Texture gather variants */
> case FETCH_OP_GATHER4:
> - tex.op = FETCH_OP_GATHER4_C;
> + opcode = FETCH_OP_GATHER4_C;
> break;
> case FETCH_OP_GATHER4_O:
> - tex.op = FETCH_OP_GATHER4_C_O;
> + opcode = FETCH_OP_GATHER4_C_O;
> break;
> }
> }
> @@ -5153,7 +5182,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
> tex.offset_x = offset_x;
> tex.offset_y = offset_y;
> if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
> - inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) {
> + (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
> + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) {
> tex.offset_z = 0;
> }
> else {
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list