[Mesa-dev] [PATCH 5/7] translate_sse: Preserve low bit during unsigned -> float conversion.
Andreas Hartmetz
ahartmetz at gmail.com
Sun Apr 13 13:29:36 PDT 2014
From: Andreas Hartmetz <andreas.hartmetz at kdab.com>
---
src/gallium/auxiliary/translate/translate_sse.c | 35 ++++++++++++++++++++-----
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index dace722..03d8276 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -71,7 +71,7 @@ struct translate_group
#define ELEMENT_BUFFER_INSTANCE_ID 1001
-#define NUM_CONSTS 7
+#define NUM_CONSTS 8
enum
{
@@ -81,6 +81,7 @@ enum
CONST_INV_32767,
CONST_INV_65535,
CONST_INV_2147483647,
+ CONST_INV_4294967295,
CONST_255
};
@@ -92,6 +93,7 @@ static float consts[NUM_CONSTS][4] = {
C(1.0 / 32767.0),
C(1.0 / 65535.0),
C(1.0 / 2147483647.0),
+ C(1.0 / 4294967295.0),
C(255.0)
};
@@ -515,6 +517,7 @@ translate_attr_convert(struct translate_sse *p,
|| a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
|| a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT)) {
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
for (i = 0; i < output_desc->nr_channels; ++i) {
if (swizzle[i] == UTIL_FORMAT_SWIZZLE_0
@@ -546,17 +549,38 @@ translate_attr_convert(struct translate_sse *p,
*/
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
break;
case 16:
sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
break;
- case 32: /* we lose precision here */
+ case 32: /* we lose precision if value > 2^23 - 1 */
+ /* ...but try to keep all bits for value <= 2^23 - 1 */
+ /* this could be done much smarter for 1 or 2 dwords of input
+ * on X64, using sth like "cvtsi2ss xmm0, rax" (note *R*ax) */
+
+ /* save the low bit */
+ sse_movss(p->func, dataXMM2, dataXMM);
+ /* right shift & convert, losing the low bit - must clear
+ * high bit because there is no unsigned convert instruction */
sse2_psrld_imm(p->func, dataXMM, 1);
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+
+ /* convert low bit to float */
+ sse2_pslld_imm(p->func, dataXMM2, 31);
+ sse2_psrld_imm(p->func, dataXMM2, 31);
+ sse2_cvtdq2ps(p->func, dataXMM2, dataXMM2);
+
+ /* mostly undo the right-shift by 1 */
+ sse_addps(p->func, dataXMM, dataXMM);
+ /* add back the lost low bit */
+ sse_addps(p->func, dataXMM, dataXMM2);
break;
default:
return FALSE;
}
- sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+
if (input_desc->channel[0].normalized) {
struct x86_reg factor;
switch (input_desc->channel[0].size) {
@@ -567,7 +591,7 @@ translate_attr_convert(struct translate_sse *p,
factor = get_const(p, CONST_INV_65535);
break;
case 32:
- factor = get_const(p, CONST_INV_2147483647);
+ factor = get_const(p, CONST_INV_4294967295);
break;
default:
assert(0);
@@ -579,9 +603,6 @@ translate_attr_convert(struct translate_sse *p,
}
sse_mulps(p->func, dataXMM, factor);
}
- else if (input_desc->channel[0].size == 32)
- /* compensate for the bit we threw away to fit u32 into s32 */
- sse_addps(p->func, dataXMM, dataXMM);
break;
case UTIL_FORMAT_TYPE_SIGNED:
if (!(x86_target_caps(p->func) & X86_SSE2))
--
1.9.1
More information about the mesa-dev
mailing list