[Mesa-dev] [PATCH 2/4] st/glsl_to_tgsi: add support for 64-bit integers
Nicolai Hähnle
nhaehnle at gmail.com
Thu Jan 26 19:09:54 UTC 2017
From: Dave Airlie <airlied at redhat.com>
v2: add conversion opcodes.
v3 (idr): Rebase on replacemtn of TGSI_OPCODE_I2U64 with
TGSI_OPCODE_I2I64.
v4 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b. Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.
v5 (nha): add clarifying comment about a subtle assumption
Signed-off-by: Dave Airlie <airlied at redhat.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 220 ++++++++++++++++++++++++++---
1 file changed, 202 insertions(+), 18 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index a437645..224789e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -894,27 +894,46 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
if (op == TGSI_OPCODE_MOV)
return op;
assert(src0.type != GLSL_TYPE_ARRAY);
assert(src0.type != GLSL_TYPE_STRUCT);
assert(src1.type != GLSL_TYPE_ARRAY);
assert(src1.type != GLSL_TYPE_STRUCT);
if (is_resource_instruction(op))
type = src1.type;
+ else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64)
+ type = GLSL_TYPE_INT64;
+ else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64)
+ type = GLSL_TYPE_UINT64;
else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
type = GLSL_TYPE_FLOAT;
else if (native_integers)
type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
+#define case7(c, f, i, u, d, i64, ui64) \
+ case TGSI_OPCODE_##c: \
+ if (type == GLSL_TYPE_UINT64) \
+ op = TGSI_OPCODE_##ui64; \
+ else if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+ else if (type == GLSL_TYPE_DOUBLE) \
+ op = TGSI_OPCODE_##d; \
+ else if (type == GLSL_TYPE_INT) \
+ op = TGSI_OPCODE_##i; \
+ else if (type == GLSL_TYPE_UINT) \
+ op = TGSI_OPCODE_##u; \
+ else \
+ op = TGSI_OPCODE_##f; \
+ break;
#define case5(c, f, i, u, d) \
case TGSI_OPCODE_##c: \
if (type == GLSL_TYPE_DOUBLE) \
op = TGSI_OPCODE_##d; \
else if (type == GLSL_TYPE_INT) \
op = TGSI_OPCODE_##i; \
else if (type == GLSL_TYPE_UINT) \
op = TGSI_OPCODE_##u; \
else \
op = TGSI_OPCODE_##f; \
@@ -924,57 +943,66 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
case TGSI_OPCODE_##c: \
if (type == GLSL_TYPE_INT) \
op = TGSI_OPCODE_##i; \
else if (type == GLSL_TYPE_UINT) \
op = TGSI_OPCODE_##u; \
else \
op = TGSI_OPCODE_##f; \
break;
#define case3(f, i, u) case4(f, f, i, u)
-#define case4d(f, i, u, d) case5(f, f, i, u, d)
+#define case6d(f, i, u, d, i64, u64) case7(f, f, i, u, d, i64, u64)
#define case3fid(f, i, d) case5(f, f, i, i, d)
+#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64)
#define case2fi(f, i) case4(f, f, i, i)
#define case2iu(i, u) case4(i, LAST, i, u)
-#define casecomp(c, f, i, u, d) \
+#define case2iu64(i, i64) case7(i, LAST, i, i, LAST, i64, i64)
+#define case4iu64(i, u, i64, u64) case7(i, LAST, i, u, LAST, i64, u64)
+
+#define casecomp(c, f, i, u, d, i64, ui64) \
case TGSI_OPCODE_##c: \
- if (type == GLSL_TYPE_DOUBLE) \
+ if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+ else if (type == GLSL_TYPE_UINT64) \
+ op = TGSI_OPCODE_##ui64; \
+ else if (type == GLSL_TYPE_DOUBLE) \
op = TGSI_OPCODE_##d; \
else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
op = TGSI_OPCODE_##i; \
else if (type == GLSL_TYPE_UINT) \
op = TGSI_OPCODE_##u; \
else if (native_integers) \
op = TGSI_OPCODE_##f; \
else \
op = TGSI_OPCODE_##c; \
break;
switch(op) {
- case3fid(ADD, UADD, DADD);
- case3fid(MUL, UMUL, DMUL);
+ case3fid64(ADD, UADD, DADD, U64ADD);
+ case3fid64(MUL, UMUL, DMUL, U64MUL);
case3fid(MAD, UMAD, DMAD);
case3fid(FMA, UMAD, DFMA);
- case4d(DIV, IDIV, UDIV, DDIV);
- case4d(MAX, IMAX, UMAX, DMAX);
- case4d(MIN, IMIN, UMIN, DMIN);
- case2iu(MOD, UMOD);
+ case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
+ case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
+ case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
+ case4iu64(MOD, UMOD, I64MOD, U64MOD);
- casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
- casecomp(SNE, FSNE, USNE, USNE, DSNE);
- casecomp(SGE, FSGE, ISGE, USGE, DSGE);
- casecomp(SLT, FSLT, ISLT, USLT, DSLT);
+ casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
+ casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
+ casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
+ casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
- case2iu(ISHR, USHR);
+ case2iu64(SHL, U64SHL);
+ case4iu64(ISHR, USHR, I64SHR, U64SHR);
- case3fid(SSG, ISSG, DSSG);
+ case3fid64(SSG, ISSG, DSSG, I64SSG);
case2iu(IBFE, UBFE);
case2iu(IMSB, UMSB);
case2iu(IMUL_HI, UMUL_HI);
case3fid(SQRT, SQRT, DSQRT);
case3fid(RCP, RCP, DRCP);
case3fid(RSQ, RSQ, DRSQ);
@@ -1096,21 +1124,23 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
size, datatype, &swizzle);
if (swizzle_out)
*swizzle_out = swizzle;
return result;
}
assert(file == PROGRAM_IMMEDIATE);
int index = 0;
immediate_storage *entry;
- int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
+ int size32 = size * ((datatype == GL_DOUBLE ||
+ datatype == GL_INT64_ARB ||
+ datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
int i;
/* Search immediate storage to see if we already have an identical
* immediate that we can use instead of adding a duplicate entry.
*/
foreach_in_list(immediate_storage, entry, &this->immediates) {
immediate_storage *tmp = entry;
for (i = 0; i * 4 < size32; i++) {
int slot_size = MIN2(size32 - (i * 4), 4);
@@ -1587,37 +1617,41 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
/* Previously 'SEQ dst, src, 0.0' was used for this. However, many
* older GPUs implement SEQ using multiple instructions (i915 uses two
* SGE instructions and a MUL instruction). Since our logic values are
* 0.0 and 1.0, 1-x also implements !x.
*/
op[0].negate = ~op[0].negate;
emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
}
break;
case ir_unop_neg:
- if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
+ if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
+ else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
else {
op[0].negate = ~op[0].negate;
result_src = op[0];
}
break;
case ir_unop_subroutine_to_int:
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
case ir_unop_abs:
if (result_dst.type == GLSL_TYPE_FLOAT)
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
+ else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
else
emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
break;
case ir_unop_sign:
emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
break;
case ir_unop_rcp:
emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
break;
@@ -1950,20 +1984,22 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
}
/* fallthrough to next case otherwise */
case ir_unop_b2f:
if (native_integers) {
emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
break;
}
/* fallthrough to next case otherwise */
case ir_unop_i2u:
case ir_unop_u2i:
+ case ir_unop_i642u64:
+ case ir_unop_u642i64:
/* Converting between signed and unsigned integers is a no-op. */
result_src = op[0];
result_src.type = result_dst.type;
break;
case ir_unop_b2i:
if (native_integers) {
/* Booleans are stored as integers using ~0 for true and 0 for false.
* GLSL requires that int(bool) return 1 for true and 0 for false.
* This conversion is done with AND, but it could be done with NEG.
*/
@@ -2007,20 +2043,33 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
break;
case ir_unop_d2b:
emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
break;
case ir_unop_i2b:
if (native_integers)
emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
else
emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
break;
+ case ir_unop_bitcast_u642d:
+ case ir_unop_bitcast_i642d:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_DOUBLE;
+ break;
+ case ir_unop_bitcast_d2i64:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_INT64;
+ break;
+ case ir_unop_bitcast_d2u64:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_UINT64;
+ break;
case ir_unop_trunc:
emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
case ir_unop_ceil:
emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
break;
case ir_unop_floor:
emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
break;
case ir_unop_round_even:
@@ -2252,20 +2301,24 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
break;
case ir_unop_d2u:
emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
break;
case ir_unop_u2d:
emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
break;
case ir_unop_unpack_double_2x32:
case ir_unop_pack_double_2x32:
+ case ir_unop_unpack_int_2x32:
+ case ir_unop_pack_int_2x32:
+ case ir_unop_unpack_uint_2x32:
+ case ir_unop_pack_uint_2x32:
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
case ir_binop_ldexp:
if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
} else {
assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
}
break;
@@ -2295,21 +2348,134 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
case ir_unop_vote_any:
emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]);
break;
case ir_unop_vote_all:
emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]);
break;
case ir_unop_vote_eq:
emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
break;
-
+ case ir_unop_u2i64:
+ case ir_unop_u2u64:
+ case ir_unop_b2i64: {
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ unsigned orig_swz = op[0].swizzle;
+ /*
+ * To convert unsigned to 64-bit:
+ * zero Y channel, copy X channel.
+ */
+ temp_dst.writemask = WRITEMASK_Y;
+ if (vector_elements > 1)
+ temp_dst.writemask |= WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+ temp_dst.writemask = WRITEMASK_X;
+ if (vector_elements > 1)
+ temp_dst.writemask |= WRITEMASK_Z;
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0),
+ GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1));
+ if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ result_src = temp;
+ result_src.type = GLSL_TYPE_UINT64;
+ if (vector_elements > 2) {
+ /* Subtle: We rely on the fact that get_temp here returns the next
+ * TGSI temporary register directly after the temp register used for
+ * the first two components, so that the result gets picked up
+ * automatically.
+ */
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ temp_dst.writemask = WRITEMASK_Y;
+ if (vector_elements > 3)
+ temp_dst.writemask |= WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+
+ temp_dst.writemask = WRITEMASK_X;
+ if (vector_elements > 3)
+ temp_dst.writemask |= WRITEMASK_Z;
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
+ GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
+ if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ }
+ break;
+ }
+ case ir_unop_i642i:
+ case ir_unop_u642i:
+ case ir_unop_u642u:
+ case ir_unop_i642u: {
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ unsigned orig_swz = op[0].swizzle;
+ unsigned orig_idx = op[0].index;
+ int el;
+ temp_dst.writemask = WRITEMASK_X;
+
+ for (el = 0; el < vector_elements; el++) {
+ unsigned swz = GET_SWZ(orig_swz, el);
+ if (swz & 1)
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
+ else
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ if (swz > 2)
+ op[0].index = orig_idx + 1;
+ op[0].type = GLSL_TYPE_UINT;
+ temp_dst.writemask = WRITEMASK_X << el;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ }
+ result_src = temp;
+ if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u)
+ result_src.type = GLSL_TYPE_UINT;
+ else
+ result_src.type = GLSL_TYPE_INT;
+ break;
+ }
+ case ir_unop_i642b:
+ emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
+ break;
+ case ir_unop_i642f:
+ emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
+ break;
+ case ir_unop_u642f:
+ emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]);
+ break;
+ case ir_unop_i642d:
+ emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]);
+ break;
+ case ir_unop_u642d:
+ emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]);
+ break;
+ case ir_unop_i2i64:
+ emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+ break;
+ case ir_unop_f2i64:
+ emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]);
+ break;
+ case ir_unop_d2i64:
+ emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]);
+ break;
+ case ir_unop_i2u64:
+ emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+ break;
+ case ir_unop_f2u64:
+ emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]);
+ break;
+ case ir_unop_d2u64:
+ emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]);
+ break;
+ /* these might be needed */
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_4x8:
case ir_unop_unpack_snorm_2x16:
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_4x8:
@@ -3187,20 +3353,34 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
for (i = 0; i < ir->type->vector_elements; i++) {
values[i].f = ir->value.f[i];
}
break;
case GLSL_TYPE_DOUBLE:
gl_type = GL_DOUBLE;
for (i = 0; i < ir->type->vector_elements; i++) {
memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
}
break;
+ case GLSL_TYPE_INT64:
+ gl_type = GL_INT64_ARB;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ values[i * 2].i = *(uint32_t *)&ir->value.d[i];
+ values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
+ }
+ break;
+ case GLSL_TYPE_UINT64:
+ gl_type = GL_UNSIGNED_INT64_ARB;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ values[i * 2].i = *(uint32_t *)&ir->value.d[i];
+ values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
+ }
+ break;
case GLSL_TYPE_UINT:
gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
if (native_integers)
values[i].u = ir->value.u[i];
else
values[i].f = ir->value.u[i];
}
break;
case GLSL_TYPE_INT:
@@ -5308,20 +5488,24 @@ emit_immediate(struct st_translate *t,
int type, int size)
{
struct ureg_program *ureg = t->ureg;
switch(type)
{
case GL_FLOAT:
return ureg_DECL_immediate(ureg, &values[0].f, size);
case GL_DOUBLE:
return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
+ case GL_INT64_ARB:
+ return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size);
+ case GL_UNSIGNED_INT64_ARB:
+ return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size);
case GL_INT:
return ureg_DECL_immediate_int(ureg, &values[0].i, size);
case GL_UNSIGNED_INT:
case GL_BOOL:
return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
default:
assert(!"should not get here - type must be float, int, uint, or bool");
return ureg_src_undef();
}
}
--
2.7.4
More information about the mesa-dev
mailing list