[Mesa-dev] [PATCH 6/9] freedreno/a5xx: SSBO support
Rob Clark
robdclark at gmail.com
Tue Apr 18 22:15:58 UTC 2017
To simplify things for now, since all the gfx shader stages share a
single SSBO state block, only advertise SSBO support for fragment shader
(and compute when we have that). We could possibly use a fixed-
partitioning of the SSBO index space to support SSBOs on other stages
without having to resort to shader variants.
Signed-off-by: Rob Clark <robdclark at gmail.com>
---
src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 69 ++++++++++++++++++++++++
src/gallium/drivers/freedreno/a5xx/fd5_program.c | 15 ++++--
src/gallium/drivers/freedreno/freedreno_screen.c | 37 ++++++++++++-
3 files changed, 114 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
index f2c9a6b..c48bcaa 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
@@ -345,6 +345,72 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
return needs_border;
}
+static void
+emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+{
+ unsigned count = util_last_bit(so->enabled_mask);
+
+ if (count == 0)
+ return;
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(count));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *buf = &so->sb[i];
+ if (buf->buffer) {
+ struct fd_resource *rsc = fd_resource(buf->buffer);
+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(count));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *buf = &so->sb[i];
+
+ // TODO maybe offset encoded somewhere here??
+ OUT_RING(ring, (buf->buffer_size << 16));
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
+ OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
+ CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+ CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE4_0_NUM_UNIT(count));
+ OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+ CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_shader_buffer *buf = &so->sb[i];
+ if (buf->buffer) {
+ struct fd_resource *rsc = fd_resource(buf->buffer);
+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+}
+
void
fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
{
@@ -663,6 +729,9 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (needs_border)
emit_border_color(ctx, ring);
+
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
+ emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 232b3fb..54b5d80 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -389,11 +389,16 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
- OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen));
- OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen));
+ OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
+ COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
+ COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
+ COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
+ COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
+ OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
+ COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));
OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 9b3ca4d..6fd12cf 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -250,6 +250,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 120;
return is_ir3(screen) ? 140 : 120;
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ if (is_a5xx(screen))
+ return 4;
+ return 0;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@@ -282,7 +287,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
@@ -439,7 +443,7 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
fd_screen_get_shader_param(struct pipe_screen *pscreen,
- enum pipe_shader_type shader,
+ enum pipe_shader_type shader,
enum pipe_shader_cap param)
{
struct fd_screen *screen = fd_screen(pscreen);
@@ -518,6 +522,35 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ if (is_a5xx(screen)) {
+ /* a5xx (and a4xx for that matter) has one state-block
+ * for compute-shader SSBO's and another that is shared
+ * by VS/HS/DS/GS/FS.. so to simplify things for now
+ * just advertise SSBOs for FS and CS. We could possibly
+ * do what blob does, and partition the space for
+ * VS/HS/DS/GS/FS. The blob advertises:
+ *
+ * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4
+ * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24
+ * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24
+ *
+ * I think that way we could avoid having to patch shaders
+ * for actual SSBO indexes by using a static partitioning.
+ */
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
+ return 24;
+ default:
+ return 0;
+ }
+ }
+ return 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
return 0;
--
2.9.3
More information about the mesa-dev
mailing list