Mesa (gallium-0.2): cell: use 7-bit weights in sample_texture_2d_bilinear_int()
Brian Paul
brianp at kemper.freedesktop.org
Mon Oct 20 15:35:30 UTC 2008
Module: Mesa
Branch: gallium-0.2
Commit: fa7b8388066651c5cfafd4ce6461fc43c982d8c7
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa7b8388066651c5cfafd4ce6461fc43c982d8c7
Author: Brian Paul <brian.paul at tungstengraphics.com>
Date: Thu Oct 16 15:48:04 2008 -0600
cell: use 7-bit weights in sample_texture_2d_bilinear_int()
This allows us to use 16-bit signed mul/add instructions. Had to
used unsigned mul before and there's no unsigned mul/add instruction.
---
src/gallium/drivers/cell/spu/spu_texture.c | 62 ++++++++++++++--------------
1 files changed, 31 insertions(+), 31 deletions(-)
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
index c0af05e..4e12a11 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -314,19 +314,19 @@ sample_texture_2d_bilinear_int(vector float s, vector float t,
vector float ss = spu_madd(s, tlevel->scale_s, half);
vector float tt = spu_madd(t, tlevel->scale_t, half);
- /* convert float coords to fixed-pt coords with 8 fraction bits */
- vector signed int is = spu_convts(ss, 8);
- vector signed int it = spu_convts(tt, 8);
+ /* convert float coords to fixed-pt coords with 7 fraction bits */
+ vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
+ vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
- /* compute integer texel weights in [0, 255] */
- vector signed int sWeights0 = spu_and(is, 255);
- vector signed int tWeights0 = spu_and(it, 255);
- vector signed int sWeights1 = spu_sub(255, sWeights0);
- vector signed int tWeights1 = spu_sub(255, tWeights0);
+ /* compute integer texel weights in [0, 127] */
+ vector signed int sWeights0 = spu_and(is, 127);
+ vector signed int tWeights0 = spu_and(it, 127);
+ vector signed int sWeights1 = spu_sub(127, sWeights0);
+ vector signed int tWeights1 = spu_sub(127, tWeights0);
- /* texel coords: is0 = is / 256, it0 = is / 256 */
- vector signed int is0 = spu_rlmask(is, -8);
- vector signed int it0 = spu_rlmask(it, -8);
+ /* texel coords: is0 = is / 128, it0 = is / 128 */
+ vector signed int is0 = spu_rlmask(is, -7);
+ vector signed int it0 = spu_rlmask(it, -7);
/* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
vector signed int is1 = spu_add(is0, 1);
@@ -377,36 +377,36 @@ sample_texture_2d_bilinear_int(vector float s, vector float t,
vector unsigned int c0, c1, c2, c3, cSum;
/* red */
- c0 = (vector unsigned int) si_mpyu((qword) texel0, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel4, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel8, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel12, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[0] = spu_convtf(cSum, 24);
+ colors[0] = spu_convtf(cSum, 22);
/* green */
- c0 = (vector unsigned int) si_mpyu((qword) texel1, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel5, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel9, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel13, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[1] = spu_convtf(cSum, 24);
+ colors[1] = spu_convtf(cSum, 22);
/* blue */
- c0 = (vector unsigned int) si_mpyu((qword) texel2, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel6, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel10, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel14, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[2] = spu_convtf(cSum, 24);
+ colors[2] = spu_convtf(cSum, 22);
/* alpha */
- c0 = (vector unsigned int) si_mpyu((qword) texel3, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel7, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel11, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel15, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[3] = spu_convtf(cSum, 24);
+ colors[3] = spu_convtf(cSum, 22);
}
More information about the mesa-commit
mailing list