[Mesa-dev] [PATCH 2/2] r600: add ARB_shader_storage_buffer_object support

Dave Airlie airlied at gmail.com
Fri Nov 17 02:05:38 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This just builds on the image support. Evergreen only has ssbo
for fragment and compute no other stages.
---
 docs/features.txt                            |   4 +-
 src/gallium/drivers/r600/evergreen_state.c   | 131 ++++++++++++++++++++++++++-
 src/gallium/drivers/r600/r600_pipe.c         |   7 +-
 src/gallium/drivers/r600/r600_pipe.h         |   4 +-
 src/gallium/drivers/r600/r600_shader.c       |  75 ++++++++++++++-
 src/gallium/drivers/r600/r600_state_common.c |  18 ++++
 6 files changed, 230 insertions(+), 9 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 29a9688..59db21b 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -179,7 +179,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, radeonsi
   GL_ARB_program_interface_query                        DONE (all drivers)
   GL_ARB_robust_buffer_access_behavior                  DONE (i965)
   GL_ARB_shader_image_size                              DONE (i965, r600, softpipe)
-  GL_ARB_shader_storage_buffer_object                   DONE (i965, softpipe)
+  GL_ARB_shader_storage_buffer_object                   DONE (i965, r600, softpipe)
   GL_ARB_stencil_texturing                              DONE (i965/hsw+, nv50, r600, llvmpipe, softpipe, swr)
   GL_ARB_texture_buffer_range                           DONE (nv50, i965, r600, llvmpipe)
   GL_ARB_texture_query_levels                           DONE (all drivers that support GLSL 1.30)
@@ -249,7 +249,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
   GL_ARB_shader_atomic_counters                         DONE (i965/gen7+, r600, softpipe)
   GL_ARB_shader_image_load_store                        DONE (i965/gen7+, r600, softpipe)
   GL_ARB_shader_image_size                              DONE (i965/gen7+, r600, softpipe)
-  GL_ARB_shader_storage_buffer_object                   DONE (i965/gen7+, softpipe)
+  GL_ARB_shader_storage_buffer_object                   DONE (i965/gen7+, r600, softpipe)
   GL_ARB_shading_language_packing                       DONE (all drivers)
   GL_ARB_separate_shader_objects                        DONE (all drivers)
   GL_ARB_stencil_texturing                              DONE (nv50, r600, llvmpipe, softpipe, swr)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 5a3a851..66af679 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -614,6 +614,7 @@ struct eg_buf_res_params {
 	unsigned size;
 	unsigned char swizzle[4];
 	bool uncached;
+	bool force_swizzle;
 };
 
 static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
@@ -635,7 +636,10 @@ static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
 
 	desc = util_format_description(params->pipe_format);
 
-	swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE);
+	if (params->force_swizzle)
+		swizzle_res = r600_get_swizzle_combined(params->swizzle, NULL, TRUE);
+	else
+		swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE);
 
 	va = tmp->resource.gpu_address + params->offset;
 	*skip_mip_address_reloc = true;
@@ -1029,7 +1033,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx,
 {
 	unsigned format, swap, ntype, endian;
 	const struct util_format_description *desc;
-	unsigned block_size = align(util_format_get_blocksize(res->b.b.format), 4);
+	unsigned block_size = util_format_get_blocksize(res->b.b.format);
 	unsigned pitch_alignment =
 		MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
 	unsigned pitch = align(res->b.b.width0, pitch_alignment);
@@ -1776,6 +1780,14 @@ static void evergreen_emit_fragment_image_state(struct r600_context *rctx, struc
 				   R600_IMAGE_REAL_RESOURCE_OFFSET);
 }
 
+static void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+	int offset = util_bitcount(rctx->fragment_images.enabled_mask);
+	evergreen_emit_image_state(rctx, atom,
+				   R600_IMAGE_IMMED_RESOURCE_OFFSET + offset,
+				   R600_IMAGE_REAL_RESOURCE_OFFSET + offset);
+}
+
 static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
@@ -1852,6 +1864,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 		i++;
 	}
 	i += util_bitcount(rctx->fragment_images.enabled_mask);
+	i += util_bitcount(rctx->fragment_buffers.enabled_mask);
 	for (; i < 8 ; i++)
 		radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 	for (; i < 12; i++)
@@ -1966,7 +1979,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 	unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
 	unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
-	unsigned rat_colormask = ((1ULL << ((unsigned)a->nr_image_rats * 4)) - 1) << (a->nr_cbufs * 4);
+	unsigned rat_colormask = ((1ULL << ((unsigned)(a->nr_image_rats + a->nr_buffer_rats) * 4)) - 1) << (a->nr_cbufs * 4);
 	radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
 	radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */
 	/* This must match the used export instructions exactly.
@@ -3871,6 +3884,116 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
 	}
 }
 
+static void evergreen_set_shader_buffers(struct pipe_context *ctx,
+					 enum pipe_shader_type shader, unsigned start_slot,
+					 unsigned count,
+					 const struct pipe_shader_buffer *buffers)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
+	struct r600_image_state *istate = NULL;
+	struct r600_image_view *rview;
+	struct r600_tex_color_info color;
+	struct eg_buf_res_params buf_params;
+	struct r600_resource *resource;
+	int i, idx;
+	unsigned old_mask;
+	bool skip_reloc = false;
+
+	if (shader != PIPE_SHADER_FRAGMENT && count == 0)
+		return;
+
+	assert(shader == PIPE_SHADER_FRAGMENT);
+	istate = &rctx->fragment_buffers;
+
+	old_mask = istate->enabled_mask;
+	for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+		const struct pipe_shader_buffer *buf;
+		unsigned res_type;
+
+		rview = &istate->views[i];
+
+		if (!buffers || !buffers[idx].buffer) {
+			pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
+			istate->enabled_mask &= ~(1 << i);
+			continue;
+		}
+
+		buf = &buffers[idx];
+		pipe_resource_reference((struct pipe_resource **)&rview->base.resource, buf->buffer);
+
+		resource = (struct r600_resource *)rview->base.resource;
+		if (!resource->immed_buffer) {
+			int immed_size = (rscreen->b.info.max_se * 256 * 64) * util_format_get_blocksize(resource->b.b.format);
+
+			eg_resource_alloc_immed(&rscreen->b, resource, immed_size);
+		}
+
+		color.offset = 0;
+		color.view = 0;
+		evergreen_set_color_surface_buffer(rctx, resource,
+						   resource->b.b.format,
+						   buf->buffer_offset,
+						   buf->buffer_offset + buf->buffer_size,
+						   &color);
+
+		res_type = V_028C70_BUFFER;
+
+		rview->cb_color_base = color.offset;
+		rview->cb_color_dim = color.dim;
+		rview->cb_color_info = color.info |
+			S_028C70_RAT(1) |
+			S_028C70_RESOURCE_TYPE(res_type);
+		rview->cb_color_pitch = color.pitch;
+		rview->cb_color_slice = color.slice;
+		rview->cb_color_view = color.view;
+		rview->cb_color_attrib = color.attrib;
+		rview->cb_color_fmask = color.fmask;
+		rview->cb_color_fmask_slice = color.fmask_slice;
+
+		memset(&buf_params, 0, sizeof(buf_params));
+		buf_params.pipe_format = resource->b.b.format;
+		buf_params.size = resource->immed_buffer->b.b.width0;
+		buf_params.swizzle[0] = PIPE_SWIZZLE_X;
+		buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
+		buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
+		buf_params.swizzle[3] = PIPE_SWIZZLE_W;
+		buf_params.uncached = 1;
+		evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b,
+						     &buf_params, &skip_reloc,
+						     rview->immed_resource_words);
+
+		memset(&buf_params, 0, sizeof(buf_params));
+		buf_params.pipe_format = PIPE_FORMAT_R32_FLOAT;
+		buf_params.offset = buf->buffer_offset;
+		buf_params.size = buf->buffer_size;
+		buf_params.swizzle[0] = PIPE_SWIZZLE_X;
+		buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
+		buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
+		buf_params.swizzle[3] = PIPE_SWIZZLE_W;
+		buf_params.force_swizzle = true;
+		buf_params.uncached = 1;
+		evergreen_fill_buffer_resource_words(rctx, &resource->b.b,
+						     &buf_params,
+						     &rview->skip_mip_address_reloc,
+						     rview->resource_words);
+
+		istate->enabled_mask |= (1 << i);
+	}
+
+	istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46;
+
+	if (old_mask != istate->enabled_mask)
+		r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
+
+	if (rctx->cb_misc_state.nr_buffer_rats != util_bitcount(istate->enabled_mask)) {
+		rctx->cb_misc_state.nr_buffer_rats = util_bitcount(istate->enabled_mask);
+		r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
+	}
+
+	r600_mark_atom_dirty(rctx, &istate->atom);
+}
+
 static void evergreen_set_shader_images(struct pipe_context *ctx,
 					enum pipe_shader_type shader, unsigned start_slot,
 					unsigned count,
@@ -4079,6 +4202,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	}
 	r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
 	r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0);
+	r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0);
 	/* shader const */
 	r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
 	r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0);
@@ -4148,6 +4272,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	rctx->b.b.set_tess_state = evergreen_set_tess_state;
 	rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers;
 	rctx->b.b.set_shader_images = evergreen_set_shader_images;
+	rctx->b.b.set_shader_buffers = evergreen_set_shader_buffers;
 	if (rctx->b.chip_class == EVERGREEN)
                 rctx->b.b.get_sample_position = evergreen_get_sample_position;
         else
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index a232ee4..4348f93 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -382,7 +382,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
 	case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
 	case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
-	case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
 	case PIPE_CAP_GENERATE_MIPMAP:
 	case PIPE_CAP_STRING_MARKER:
 	case PIPE_CAP_QUERY_BUFFER_OBJECT:
@@ -422,6 +421,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		    rscreen->b.family == CHIP_HEMLOCK)
 			return 1;
 		return 0;
+	case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+		if (family >= CHIP_CEDAR)
+			return 256;
+		return 0;
 
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
 		if (family >= CHIP_CEDAR)
@@ -608,10 +611,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
-	case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
 	case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
 	case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
 		return 0;
+	case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
 	case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
 		if (rscreen->b.family >= CHIP_CEDAR &&
 		    (shader == PIPE_SHADER_FRAGMENT))
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ad7e499..3f53d81 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#define R600_NUM_ATOMS 53
+#define R600_NUM_ATOMS 54
 
 #define R600_MAX_IMAGES 8
 /*
@@ -145,6 +145,7 @@ struct r600_cb_misc_state {
 	unsigned nr_cbufs;
 	unsigned nr_ps_color_outputs;
 	unsigned nr_image_rats;
+	unsigned nr_buffer_rats;
 	bool multiwrite;
 	bool dual_src_blend;
 };
@@ -519,6 +520,7 @@ struct r600_context {
 	struct r600_atomic_buffer_state atomic_buffer_state;
 	/* only have images on fragment shader */
 	struct r600_image_state         fragment_images;
+	struct r600_image_state         fragment_buffers;
 	/* Shaders and shader resources. */
 	struct r600_cso_state		vertex_fetch_shader;
 	struct r600_shader_state        hw_shader_stages[EG_NUM_HW_STAGES];
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 9d0e625..d042306 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -970,6 +970,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 	case TGSI_FILE_SAMPLER:
 	case TGSI_FILE_SAMPLER_VIEW:
 	case TGSI_FILE_ADDRESS:
+	case TGSI_FILE_BUFFER:
 	case TGSI_FILE_IMAGE:
 		break;
 
@@ -3037,7 +3038,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
 	shader->nsys_inputs = 0;
 
-	shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0;
+	shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 ||
+		ctx.info.file_count[TGSI_FILE_BUFFER] > 0;
 	indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
 	tgsi_parse_init(&ctx.parse, tokens);
 	ctx.type = ctx.info.processor;
@@ -7837,6 +7839,73 @@ static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_g
 	return 0;
 }
 
+static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	/* have to work out the offset into the RAT immediate return buffer */
+	struct r600_bytecode_vtx vtx;
+	struct r600_bytecode_cf *cf;
+	int r;
+	int temp_reg = r600_get_temp(ctx);
+	unsigned rat_index_mode;
+	unsigned base;
+
+	rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
+	base = R600_IMAGE_REAL_RESOURCE_OFFSET;
+
+	if (inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE) {
+		int value = (ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]);
+		r = single_alu_op2(ctx, ALU_OP1_MOV,
+				   temp_reg, 0,
+				   V_SQ_ALU_SRC_LITERAL, value >> 2,
+				   0, 0);
+		if (r)
+			return r;
+	} else {
+		struct r600_bytecode_alu alu;
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP2_LSHR_INT;
+		r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
+		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+		alu.src[1].value = 2;
+		alu.dst.sel = temp_reg;
+		alu.dst.write = 1;
+		alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+	ctx->bc->cf_last->barrier = 1;
+	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+	vtx.op = FETCH_OP_VFETCH;
+	vtx.buffer_id = inst->Src[0].Register.Index + base;
+	vtx.buffer_index_mode = rat_index_mode;
+	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+	vtx.src_gpr = temp_reg;
+	vtx.src_sel_x = 0;
+	vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+	vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;		/* SEL_X */
+	vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;		/* SEL_Y */
+	vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;		/* SEL_Z */
+	vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;		/* SEL_W */
+
+	if (inst->Dst[0].Register.WriteMask == 0xf) {
+		vtx.data_format = FMT_32_32_32_32_FLOAT;
+		vtx.num_format_all = 0;
+		vtx.format_comp_all = 1;
+		vtx.use_const_fields = 0;
+		vtx.srf_mode_all = 0;
+	} else
+		vtx.use_const_fields = 1;
+
+	r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
+	if (r)
+		return r;
+	cf = ctx->bc->cf_last;
+	cf->barrier = 1;
+	return 0;
+}
+
 static int tgsi_load_rat(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -7917,6 +7986,8 @@ static int tgsi_load(struct r600_shader_ctx *ctx)
 		return tgsi_load_rat(ctx);
 	if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
 		return tgsi_load_gds(ctx);
+	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
+		return tgsi_load_buffer(ctx);
 	return 0;
 }
 
@@ -8196,6 +8267,8 @@ static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
 		return tgsi_atomic_op_rat(ctx);
 	if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
 		return tgsi_atomic_op_gds(ctx);
+	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
+		return tgsi_atomic_op_rat(ctx);
 	return 0;
 }
 
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index b8d4b8f..6fa7b32 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -3024,6 +3024,24 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc
 			r600_sampler_views_dirty(rctx, state);
 		}
 	}
+
+	/* SSBOs */
+	struct r600_image_state *istate = &rctx->fragment_buffers;
+	{
+		uint32_t mask = istate->enabled_mask;
+		bool found = false;
+		while (mask) {
+			unsigned i = u_bit_scan(&mask);
+			if (istate->views[i].base.resource == &rbuffer->b.b) {
+				found = true;
+				istate->dirty_mask |= 1 << i;
+			}
+		}
+		if (found) {
+			r600_mark_atom_dirty(rctx, &istate->atom);
+		}
+	}
+
 }
 
 static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable)
-- 
2.9.5



More information about the mesa-dev mailing list