[Mesa-dev] [PATCH v2 065/103] i965/vec4: Fix UBO loads for 64-bit data
Iago Toral Quiroga
itoral at igalia.com
Tue Oct 11 09:02:09 UTC 2016
We need to emit 2 32-bit load messages to load a full dvec4. If only
1 or 2 double components are needed dead-code-elimination will remove
the second one.
We also need to shuffle the result of the 32-bit messages to form
valid 64-bit SIMD4x2 data.
---
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 46 +++++++++++++++++++++---------
1 file changed, 32 insertions(+), 14 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 04e95a7..f234e65 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -829,31 +829,49 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir->info.num_ubos - 1);
}
- src_reg offset;
+ src_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset = brw_imm_ud(const_offset->u32[0] & ~15);
+ offset_reg = src_reg(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset->u32[0] & ~15)));
} else {
- offset = get_nir_src(instr->src[1], nir_type_uint32, 1);
+ offset_reg = get_nir_src(instr->src[1], nir_type_uint32, 1);
}
- src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
- packed_consts.type = dest.type;
+ src_reg packed_consts;
+ if (nir_dest_bit_size(instr->dest) == 32) {
+ packed_consts = src_reg(this, glsl_type::vec4_type);
+ emit_pull_constant_load_reg(dst_reg(packed_consts),
+ surf_index,
+ offset_reg,
+ NULL, NULL /* before_block/inst */);
+ } else {
+ src_reg temp = src_reg(this, glsl_type::dvec4_type);
+ src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F);
+
+ emit_pull_constant_load_reg(dst_reg(temp_float),
+ surf_index, offset_reg, NULL, NULL);
- emit_pull_constant_load_reg(dst_reg(packed_consts),
- surf_index,
- offset,
- NULL, NULL /* before_block/inst */);
+ emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u)));
+ emit_pull_constant_load_reg(dst_reg(offset(temp_float, 1)),
+ surf_index, offset_reg, NULL, NULL);
+
+ packed_consts = src_reg(this, glsl_type::dvec4_type);
+ shuffle_64bit_data(dst_reg(packed_consts), temp, false);
+ }
packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
if (const_offset) {
- packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u32[0] % 16 / 4,
- const_offset->u32[0] % 16 / 4,
- const_offset->u32[0] % 16 / 4,
- const_offset->u32[0] % 16 / 4);
+ unsigned type_size = type_sz(dest.type);
+ packed_consts.swizzle +=
+ BRW_SWIZZLE4(const_offset->u32[0] % 16 / type_size,
+ const_offset->u32[0] % 16 / type_size,
+ const_offset->u32[0] % 16 / type_size,
+ const_offset->u32[0] % 16 / type_size);
}
- emit(MOV(dest, packed_consts));
+ emit(MOV(dest, retype(packed_consts, dest.type)));
+
break;
}
--
2.7.4
More information about the mesa-dev
mailing list