[Mesa-dev] [PATCH 1/3] gallivm: minor rho calculation optimization for 1 or 3 coords
sroland at vmware.com
sroland at vmware.com
Tue Apr 2 18:32:28 PDT 2013
From: Roland Scheidegger <sroland at vmware.com>
Using a different packing for the single coord case should save a shuffle.
Plus some minor style fixes.
---
src/gallium/auxiliary/gallivm/lp_bld_quad.c | 20 +++++++---------
src/gallium/auxiliary/gallivm/lp_bld_sample.c | 31 +++++++++++--------------
2 files changed, 22 insertions(+), 29 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index 1955add..f2a762a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -81,7 +81,8 @@ lp_build_ddy(struct lp_build_context *bld,
/*
* Helper for building packed ddx/ddy vector for one coord (scalar per quad
* values). The vector will look like this (8-wide):
- * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
+ * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
+ * This only requires one shuffle instead of two for more straightforward packing.
*/
LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
@@ -91,19 +92,15 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef vec1, vec2;
- /* same packing as _twocoord, but can use aos swizzle helper */
+ /* use aos swizzle helper */
- /*
- * XXX could make swizzle1 a noop swizzle by using right top/bottom
- * pair for ddy
- */
- static const unsigned char swizzle1[] = {
- LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ static const unsigned char swizzle1[] = { /* no-op swizzle */
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
};
static const unsigned char swizzle2[] = {
- LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
};
vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
@@ -120,6 +117,7 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
* Helper for building packed ddx/ddy vector for one coord (scalar per quad
* values). The vector will look like this (8-wide):
* ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ * This only needs 2 (v)shufps.
*/
LLVMValueRef
lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index fc8bae7..9a00897 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -226,7 +226,6 @@ lp_build_rho(struct lp_build_sample_context *bld,
LLVMValueRef int_size, float_size;
LLVMValueRef rho;
LLVMValueRef first_level, first_level_vec;
- LLVMValueRef abs_ddx_ddy[2];
unsigned length = coord_bld->type.length;
unsigned num_quads = length / 4;
unsigned i;
@@ -279,32 +278,28 @@ lp_build_rho(struct lp_build_sample_context *bld,
ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
}
else if (dims >= 2) {
- ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld,
- s, t);
+ ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
if (dims > 2) {
ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
}
}
- abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
+ ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
if (dims > 2) {
- abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
- }
- else {
- abs_ddx_ddy[1] = NULL;
+ ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
}
- if (dims == 1) {
- static const unsigned char swizzle1[] = {
+ if (dims < 2) {
+ static const unsigned char swizzle1[] = { /* no-op swizzle */
0, LP_BLD_SWIZZLE_DONTCARE,
LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
};
static const unsigned char swizzle2[] = {
- 1, LP_BLD_SWIZZLE_DONTCARE,
+ 2, LP_BLD_SWIZZLE_DONTCARE,
LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
};
- rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
- rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+ rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle1);
+ rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
}
else if (dims == 2) {
static const unsigned char swizzle1[] = {
@@ -315,8 +310,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
1, 3,
LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
};
- rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
- rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+ rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle1);
+ rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
}
else {
LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
@@ -329,12 +324,12 @@ lp_build_rho(struct lp_build_sample_context *bld,
shuffles1[4*i + 3] = i32undef;
shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
- shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
+ shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
shuffles2[4*i + 3] = i32undef;
}
- rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+ rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
LLVMConstVector(shuffles1, length), "");
- rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+ rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
LLVMConstVector(shuffles2, length), "");
}
--
1.7.9.5
More information about the mesa-dev
mailing list