[Mesa-dev] [PATCH 2/2] radeonsi: add fine derivate control (v2)
Dave Airlie
airlied at gmail.com
Thu Jul 23 21:01:15 PDT 2015
From: Dave Airlie <airlied at redhat.com>
This adds support for fine derivatives and enables
ARB_derivative_control on radeonsi.
(just fell out of my working out interpolation)
v2: cleanup some bits, and try and write an
explaination.
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
docs/GL3.txt | 2 +-
docs/relnotes/10.7.0.html | 1 +
src/gallium/drivers/radeonsi/si_pipe.c | 4 ++--
src/gallium/drivers/radeonsi/si_shader.c | 41 ++++++++++++++++++++++++++++----
4 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 9d3d5e0..186f280 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -191,7 +191,7 @@ GL 4.5, GLSL 4.50:
GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe)
GL_ARB_cull_distance in progress (Tobias)
- GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600)
+ GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
GL_ARB_direct_state_access DONE (all drivers)
GL_ARB_get_texture_sub_image DONE (all drivers)
GL_ARB_shader_texture_image_samples not started
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index 26615a8..afef525 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
<ul>
<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
+<li>GL_ARB_derivative_control on radeonsi</li>
<li>GL_ARB_fragment_layer_viewport on radeonsi</li>
<li>GL_ARB_framebuffer_no_attachments on i965</li>
<li>GL_ARB_get_texture_sub_image for all drivers</li>
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index c2985b8..b2e3a8c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -249,6 +249,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -280,7 +281,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return HAVE_LLVM >= 0x0305;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return HAVE_LLVM >= 0x0305 ? 4 : 0;
-
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -289,7 +289,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5a3de96..9c3150d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2799,6 +2799,24 @@ static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
}
}
+/*
+ * SI implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. So all threads get a thread ID, and store
+ * the src value they want to derive into the LDS.
+ * Depending on whether it is doing coarse or fine derivatives
+ * then masks the address to a top-left, top or left value
+ * and reads back the value at that position.
+ * It then reads back the value at the position either
+ * 1 for DDX or 2 for DDY values later, and
+ * calculates the difference. This value is the
+ * derivative in that direction.
+ */
+/* masks for thread ID. */
+#define TID_MASK_TOP_LEFT 0xfffffffc
+#define TID_MASK_TOP 0xfffffffd
+#define TID_MASK_LEFT 0xfffffffe
+
static void si_llvm_emit_ddxy(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -2815,6 +2833,8 @@ static void si_llvm_emit_ddxy(
LLVMTypeRef i32;
unsigned swizzle[4];
unsigned c;
+ int idx;
+ unsigned mask;
i32 = LLVMInt32TypeInContext(gallivm->context);
@@ -2824,15 +2844,22 @@ static void si_llvm_emit_ddxy(
store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
+ if (opcode == TGSI_OPCODE_DDX_FINE)
+ mask = TID_MASK_LEFT;
+ else if (opcode == TGSI_OPCODE_DDY_FINE)
+ mask = TID_MASK_TOP;
+ else
+ mask = TID_MASK_TOP_LEFT;
+
indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, 0xfffffffc), "");
+ lp_build_const_int32(gallivm, mask), "");
load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
+ /* for DDX we want to next X pixel, DDY next Y pixel. */
+ idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm,
- opcode == TGSI_OPCODE_DDX ? 1 : 2),
- "");
+ lp_build_const_int32(gallivm, idx), "");
load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
@@ -3174,7 +3201,9 @@ static void create_function(struct si_shader_context *si_shader_ctx)
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0))
si_shader_ctx->lds =
LLVMAddGlobalInAddressSpace(gallivm->module,
LLVMArrayType(i32, 64),
@@ -3667,6 +3696,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
--
2.4.3
More information about the mesa-dev
mailing list