[Mesa-dev] [PATCH] radeonsi: add fine derivate control (v2.1)
Marek Olšák
maraeo at gmail.com
Sat Jul 25 01:02:09 PDT 2015
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Sat, Jul 25, 2015 at 2:14 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This adds support for fine derivatives and enables
> ARB_derivative_control on radeonsi.
>
> (just fell out of my working out interpolation)
>
> v2: cleanup some bits, write a comment
> v2.1: take Michel's comment from the mailing list
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> docs/GL3.txt | 2 +-
> docs/relnotes/10.7.0.html | 1 +
> src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
> src/gallium/drivers/radeonsi/si_shader.c | 52 +++++++++++++++++++++++++++++---
> 4 files changed, 50 insertions(+), 7 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index e3fa1a1..15bb57f 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -191,7 +191,7 @@ GL 4.5, GLSL 4.50:
> GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
> GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe)
> GL_ARB_cull_distance in progress (Tobias)
> - GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600)
> + GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
> GL_ARB_direct_state_access DONE (all drivers)
> GL_ARB_get_texture_sub_image DONE (all drivers)
> GL_ARB_shader_texture_image_samples not started
> diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
> index 26615a8..afef525 100644
> --- a/docs/relnotes/10.7.0.html
> +++ b/docs/relnotes/10.7.0.html
> @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
>
> <ul>
> <li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
> +<li>GL_ARB_derivative_control on radeonsi</li>
> <li>GL_ARB_fragment_layer_viewport on radeonsi</li>
> <li>GL_ARB_framebuffer_no_attachments on i965</li>
> <li>GL_ARB_get_texture_sub_image for all drivers</li>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index c2985b8..ebe1f5a 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -249,6 +249,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
> case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
> case PIPE_CAP_TGSI_TEXCOORD:
> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
> return 1;
>
> case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
> @@ -289,7 +290,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
> case PIPE_CAP_USER_VERTEX_BUFFERS:
> case PIPE_CAP_FAKE_SW_MSAA:
> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
> - case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
> case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
> case PIPE_CAP_SAMPLER_VIEW_TARGET:
> case PIPE_CAP_VERTEXID_NOBASE:
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 81f7bdb..fee427f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2841,6 +2841,35 @@ static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
> }
> }
>
> +/*
> + * SI implements derivatives using the local data store (LDS)
> + * All writes to the LDS happen in all executing threads at
> + * the same time. TID is the Thread ID for the current
> + * thread and is a value between 0 and 63, representing
> + * the thread's position in the wavefront.
> + *
> + * For the pixel shader threads are grouped into quads of four pixels.
> + * The TIDs of the pixels of a quad are:
> + *
> + * +------+------+
> + * |4n + 0|4n + 1|
> + * +------+------+
> + * |4n + 2|4n + 3|
> + * +------+------+
> + *
> + * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
> + * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
> + * the current pixel's column, and masking with 0xfffffffe yields the TID
> + * of the left pixel of the current pixel's row.
> + *
> + * Adding 1 yields the TID of the pixel to the right of the left pixel, and
> + * adding 2 yields the TID of the pixel below the top pixel.
> + */
> +/* masks for thread ID. */
> +#define TID_MASK_TOP_LEFT 0xfffffffc
> +#define TID_MASK_TOP 0xfffffffd
> +#define TID_MASK_LEFT 0xfffffffe
> +
> static void si_llvm_emit_ddxy(
> const struct lp_build_tgsi_action * action,
> struct lp_build_tgsi_context * bld_base,
> @@ -2857,6 +2886,8 @@ static void si_llvm_emit_ddxy(
> LLVMTypeRef i32;
> unsigned swizzle[4];
> unsigned c;
> + int idx;
> + unsigned mask;
>
> i32 = LLVMInt32TypeInContext(gallivm->context);
>
> @@ -2866,15 +2897,22 @@ static void si_llvm_emit_ddxy(
> store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
> indices, 2, "");
>
> + if (opcode == TGSI_OPCODE_DDX_FINE)
> + mask = TID_MASK_LEFT;
> + else if (opcode == TGSI_OPCODE_DDY_FINE)
> + mask = TID_MASK_TOP;
> + else
> + mask = TID_MASK_TOP_LEFT;
> +
> indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
> - lp_build_const_int32(gallivm, 0xfffffffc), "");
> + lp_build_const_int32(gallivm, mask), "");
> load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
> indices, 2, "");
>
> + /* for DDX we want to next X pixel, DDY next Y pixel. */
> + idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
> indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
> - lp_build_const_int32(gallivm,
> - opcode == TGSI_OPCODE_DDX ? 1 : 2),
> - "");
> + lp_build_const_int32(gallivm, idx), "");
> load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
> indices, 2, "");
>
> @@ -3216,7 +3254,9 @@ static void create_function(struct si_shader_context *si_shader_ctx)
>
> if (bld_base->info &&
> (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
> - bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
> + bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
> + bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
> + bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0))
> si_shader_ctx->lds =
> LLVMAddGlobalInAddressSpace(gallivm->module,
> LLVMArrayType(i32, 64),
> @@ -3709,6 +3749,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
>
> bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
> bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
> + bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
> + bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
>
> bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
> bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list