[Mesa-dev] [PATCH v5 38/70] i965/nir/vec4: Implement nir_intrinsic_store_ssbo
Kristian Høgsberg
krh at bitplanet.net
Fri Sep 18 13:02:13 PDT 2015
On Thu, Sep 10, 2015 at 03:35:54PM +0200, Iago Toral Quiroga wrote:
> ---
> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 148 +++++++++++++++++++++++++++++
> 1 file changed, 148 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index f47b029..450441d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -23,8 +23,13 @@
>
> #include "brw_nir.h"
> #include "brw_vec4.h"
> +#include "brw_vec4_builder.h"
> +#include "brw_vec4_surface_builder.h"
> #include "glsl/ir_uniform.h"
>
> +using namespace brw;
> +using namespace brw::surface_access;
> +
> namespace brw {
>
> void
> @@ -556,6 +561,149 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> break;
> }
>
> + case nir_intrinsic_store_ssbo_indirect:
> + has_indirect = true;
> + /* fallthrough */
> + case nir_intrinsic_store_ssbo: {
> + assert(devinfo->gen >= 7);
> +
> + /* Block index */
> + src_reg surf_index;
> + nir_const_value *const_uniform_block =
> + nir_src_as_const_value(instr->src[1]);
> + if (const_uniform_block) {
> + unsigned index = prog_data->base.binding_table.ubo_start +
> + const_uniform_block->u[0];
> + surf_index = src_reg(index);
> + brw_mark_surface_used(&prog_data->base, index);
> + } else {
> + surf_index = src_reg(this, glsl_type::uint_type);
> + emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
> + src_reg(prog_data->base.binding_table.ubo_start)));
> + surf_index = emit_uniformize(surf_index);
> +
> + brw_mark_surface_used(&prog_data->base,
> + prog_data->base.binding_table.ubo_start +
> + shader_prog->NumUniformBlocks - 1);
> + }
> +
> + /* Offset */
> + src_reg offset_reg = src_reg(this, glsl_type::uint_type);
> + unsigned const_offset_bytes = 0;
> + if (has_indirect) {
> + emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
> + } else {
> + const_offset_bytes = instr->const_index[0];
> + emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
> + }
> +
> + /* Value */
> + src_reg val_reg = get_nir_src(instr->src[0], 4);
> +
> + /* Writemask */
> + unsigned write_mask = instr->const_index[1];
> +
> + /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
> + * writes will use SIMD8 mode. In order to hide this and keep symmetry across
> + * typed and untyped messages and across hardware platforms, the
> + * current implementation of the untyped messages will transparently convert
> + * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
> + * and enabling only channel X on the SEND instruction.
> + *
> + * The above, works well for full vector writes, but not for partial writes
> + * where we want to write some channels and not others, like when we have
> + * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
> + * quite restrictive with regards to the channel enables we can configure in
> + * the message descriptor (not all combinations are allowed) we cannot simply
> + * implement these scenarios with a single message while keeping the
> + * aforementioned symmetry in the implementation. For now we de decided that
> + * it is better to keep the symmetry to reduce complexity, so in situations
> + * such as the one described we end up emitting two untyped write messages
> + * (one for xy and another for w).
> + *
> + * The code below packs consecutive channels into a single write message,
> + * detects gaps in the vector write and if needed, sends a second message
> + * with the remaining channels. If in the future we decide that we want to
> + * emit a single message at the expense of losing the symmetry in the
> + * implementation we can:
> + *
> + * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
> + * message payload. In this mode we can write up to 8 offsets and dwords
> + * to the red channel only (for the two vec4s in the SIMD4x2 execution)
> + * and select which of the 8 channels carry data to write by setting the
> + * appropriate writemask in the dst register of the SEND instruction.
> + * It would require to write a new generator opcode specifically for
> + * IvyBridge since we would need to prepare a SIMD8 payload that could
> + * use any channel, not just X.
> + *
> + * 2) For Haswell+: Simply send a single write message but set the writemask
> + * on the dst of the SEND instruction to select the channels we want to
> + * write. It would require to modify the current messages to receive
> + * and honor the writemask provided.
> + */
> + const vec4_builder bld = vec4_builder(this).at_end()
> + .annotate(current_annotation, base_ir);
> +
> + int swizzle[4] = { 0, 0, 0, 0};
> + int num_channels = 0;
> + unsigned skipped_channels = 0;
> + int num_components = instr->num_components;
> + for (int i = 0; i < num_components; i++) {
> + /* Check if this channel needs to be written. If so, record the
> + * channel we need to take the data from in the swizzle array
> + */
> + int component_mask = 1 << i;
> + int write_test = write_mask & component_mask;
> + if (write_test)
> + swizzle[num_channels++] = i;
> +
> + /* If we don't have to write this channel it means we have a gap in the
> + * vector, so write the channels we accumulated until now, if any. Do
> + * the same if this was the last component in the vector.
> + */
> + if (!write_test || i == num_components - 1) {
> + if (num_channels > 0) {
> + /* We have channels to write, so update the offset we need to
> + * write at to skip the channels we skipped, if any.
> + */
> + if (skipped_channels > 0) {
> + if (!has_indirect) {
> + const_offset_bytes += 4 * skipped_channels;
> + offset_reg = src_reg(const_offset_bytes);
> + } else {
> + emit(ADD(dst_reg(offset_reg), offset_reg,
> + brw_imm_ud(4 * skipped_channels)));
> + }
> + }
> +
> + /* Swizzle the data register so we take the data from the channels
> + * we need to write and send the write message. This will write
> + * num_channels consecutive dwords starting at offset.
> + */
> + val_reg.swizzle =
> + BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
> + emit_untyped_write(bld, surf_index, offset_reg, val_reg,
> + 1 /* dims */, num_channels /* size */,
> + BRW_PREDICATE_NONE);
> +
> + /* If we have to do a second write we will have to update the
> + * offset so that we jump over the channels we have just written
> + * now.
> + */
> + skipped_channels = num_channels;
Shouldn't this be
skipped_channels += num_channels;
to handle write mask reg.yw?
> + /* Restart the count for the next write message */
> + num_channels = 0;
> + }
> +
> + /* We did not write the current channel, so increase skipped count */
> + skipped_channels++;
> + }
> + }
> +
> + break;
> + }
> +
> case nir_intrinsic_load_vertex_id:
> unreachable("should be lowered by lower_vertex_id()");
>
> --
> 1.9.1
>
More information about the mesa-dev
mailing list