[Mesa-dev] [PATCH 15/75] st/nine: Add support for swvp constants in shaders
Axel Davy
axel.davy at ens.fr
Wed Oct 5 20:08:48 UTC 2016
swvp has relaxed limits (more nested loops, etc).
In particular it enables more constants.
Signed-off-by: Axel Davy <axel.davy at ens.fr>
---
src/gallium/state_trackers/nine/nine_shader.c | 156 ++++++++++++++++++------
src/gallium/state_trackers/nine/nine_shader.h | 2 +
src/gallium/state_trackers/nine/vertexshader9.c | 2 +
3 files changed, 123 insertions(+), 37 deletions(-)
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 140226e..480f096 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -432,11 +432,7 @@ struct sm1_local_const
{
INT idx;
struct ureg_src reg;
- union {
- boolean b;
- float f[4];
- int32_t i[4];
- } imm;
+ float f[4]; /* for indirect addressing of float constants */
};
struct shader_translator
@@ -507,8 +503,10 @@ struct shader_translator
struct sm1_local_const *lconstf;
unsigned num_lconstf;
- struct sm1_local_const lconsti[NINE_MAX_CONST_I];
- struct sm1_local_const lconstb[NINE_MAX_CONST_B];
+ struct sm1_local_const *lconsti;
+ unsigned num_lconsti;
+ struct sm1_local_const *lconstb;
+ unsigned num_lconstb;
boolean indirect_const_access;
boolean failure;
@@ -542,6 +540,7 @@ static boolean
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
INT i;
+
if (index < 0 || index >= tx->num_constf_allowed) {
tx->failure = TRUE;
return FALSE;
@@ -557,24 +556,36 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
static boolean
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
{
+ int i;
+
if (index < 0 || index >= tx->num_consti_allowed) {
tx->failure = TRUE;
return FALSE;
}
- if (tx->lconsti[index].idx == index)
- *src = tx->lconsti[index].reg;
- return tx->lconsti[index].idx == index;
+ for (i = 0; i < tx->num_lconsti; ++i) {
+ if (tx->lconsti[i].idx == index) {
+ *src = tx->lconsti[i].reg;
+ return TRUE;
+ }
+ }
+ return FALSE;
}
static boolean
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
{
+ int i;
+
if (index < 0 || index >= tx->num_constb_allowed) {
tx->failure = TRUE;
return FALSE;
}
- if (tx->lconstb[index].idx == index)
- *src = tx->lconstb[index].reg;
- return tx->lconstb[index].idx == index;
+ for (i = 0; i < tx->num_lconstb; ++i) {
+ if (tx->lconstb[i].idx == index) {
+ *src = tx->lconstb[i].reg;
+ return TRUE;
+ }
+ }
+ return FALSE;
}
static void
@@ -599,23 +610,55 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
tx->lconstf[n].idx = index;
tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
- memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
+ memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
}
static void
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
{
+ unsigned n;
+
FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
- tx->lconsti[index].idx = index;
- tx->lconsti[index].reg = tx->native_integers ?
+
+ for (n = 0; n < tx->num_lconsti; ++n)
+ if (tx->lconsti[n].idx == index)
+ break;
+ if (n == tx->num_lconsti) {
+ if ((n % 8) == 0) {
+ tx->lconsti = REALLOC(tx->lconsti,
+ (n + 0) * sizeof(tx->lconsti[0]),
+ (n + 8) * sizeof(tx->lconsti[0]));
+ assert(tx->lconsti);
+ }
+ tx->num_lconsti++;
+ }
+
+ tx->lconsti[n].idx = index;
+ tx->lconsti[n].reg = tx->native_integers ?
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
}
static void
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
{
+ unsigned n;
+
FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
- tx->lconstb[index].idx = index;
- tx->lconstb[index].reg = tx->native_integers ?
+
+ for (n = 0; n < tx->num_lconstb; ++n)
+ if (tx->lconstb[n].idx == index)
+ break;
+ if (n == tx->num_lconstb) {
+ if ((n % 8) == 0) {
+ tx->lconstb = REALLOC(tx->lconstb,
+ (n + 0) * sizeof(tx->lconstb[0]),
+ (n + 8) * sizeof(tx->lconstb[0]));
+ assert(tx->lconstb);
+ }
+ tx->num_lconstb++;
+ }
+
+ tx->lconstb[n].idx = index;
+ tx->lconstb[n].reg = tx->native_integers ?
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
}
@@ -942,7 +985,24 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
if (!param->rel)
nine_info_mark_const_f_used(tx->info, param->idx);
- src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
+ /* vswp constant handling: we use two buffers
+ * to fit all the float constants. The special handling
+ * doesn't need to be elsewhere, because all the instructions
+ * accessing the constants directly are VS1, and swvp
+ * is VS >= 2 */
+ if (IS_VS && tx->info->swvp_on) {
+ if (!param->rel) {
+ if (param->idx < 4096)
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
+ else {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
+ src = ureg_src_dimension(src, 1);
+ }
+ } else {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
+ }
+ } else
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
}
if (!IS_VS && tx->version.major < 2) {
/* ps 1.X clamps constants */
@@ -964,8 +1024,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
assert(!param->rel);
if (!tx_lconsti(tx, &src, param->idx)) {
nine_info_mark_const_i_used(tx->info, param->idx);
- src = ureg_src_register(TGSI_FILE_CONSTANT,
- tx->info->const_i_base + param->idx);
+ if (IS_VS && tx->info->swvp_on) {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
+ src = ureg_src_dimension(src, 2);
+ } else
+ src = ureg_src_register(TGSI_FILE_CONSTANT,
+ tx->info->const_i_base + param->idx);
}
break;
case D3DSPR_CONSTBOOL:
@@ -974,8 +1038,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
char r = param->idx / 4;
char s = param->idx & 3;
nine_info_mark_const_b_used(tx->info, param->idx);
- src = ureg_src_register(TGSI_FILE_CONSTANT,
- tx->info->const_b_base + r);
+ if (IS_VS && tx->info->swvp_on) {
+ src = ureg_src_register(TGSI_FILE_CONSTANT, r);
+ src = ureg_src_dimension(src, 3);
+ } else
+ src = ureg_src_register(TGSI_FILE_CONSTANT,
+ tx->info->const_b_base + r);
src = ureg_swizzle(src, s, s, s, s);
}
break;
@@ -3353,8 +3421,6 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
struct shader_translator *tx;
HRESULT hr = D3D_OK;
const unsigned processor = info->type;
- unsigned s, slot_max;
- unsigned max_const_f;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
@@ -3411,6 +3477,12 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
tx->num_constb_allowed = NINE_MAX_CONST_B;
}
+ if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
+ tx->num_constf_allowed = 8192;
+ tx->num_consti_allowed = 2048;
+ tx->num_constb_allowed = 2048;
+ }
+
/* VS must always write position. Declare it here to make it the 1st output.
* (Some drivers like nv50 are buggy and rely on that.)
*/
@@ -3485,7 +3557,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
k = i;
}
indices[n] = tx->lconstf[k].idx;
- memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
+ memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
tx->lconstf[k].idx = INT_MAX;
}
@@ -3520,25 +3592,35 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
/* r500 */
if (info->const_float_slots > device->max_vs_const_f &&
- (info->const_int_slots || info->const_bool_slots))
+ (info->const_int_slots || info->const_bool_slots) &&
+ (!IS_VS || !info->swvp_on))
ERR("Overlapping constant slots. The shader is likely to be buggy\n");
if (tx->indirect_const_access) /* vs only */
info->const_float_slots = device->max_vs_const_f;
- max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
- slot_max = info->const_bool_slots > 0 ?
- max_const_f + NINE_MAX_CONST_I
- + DIV_ROUND_UP(info->const_bool_slots, 4) :
- info->const_int_slots > 0 ?
- max_const_f + info->const_int_slots :
- info->const_float_slots;
+ if (!IS_VS || !info->swvp_on) {
+ unsigned s, slot_max;
+ unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
+
+ slot_max = info->const_bool_slots > 0 ?
+ max_const_f + NINE_MAX_CONST_I
+ + DIV_ROUND_UP(info->const_bool_slots, 4) :
+ info->const_int_slots > 0 ?
+ max_const_f + info->const_int_slots :
+ info->const_float_slots;
- info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
+ info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
- for (s = 0; s < slot_max; s++)
- ureg_DECL_constant(tx->ureg, s);
+ for (s = 0; s < slot_max; s++)
+ ureg_DECL_constant(tx->ureg, s);
+ } else {
+ ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
+ ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
+ ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
+ ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
+ }
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
unsigned count;
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index 7f2f57f..092ae63 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -76,6 +76,8 @@ struct nine_shader_info
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
uint8_t bumpenvmat_needed;
+
+ boolean swvp_on;
};
static inline void
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index 3113575..bc09a41 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -63,6 +63,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
info.fog_enable = 0;
info.point_size_min = 0;
info.point_size_max = 0;
+ info.swvp_on = false;
hr = nine_translate_shader(device, &info);
if (FAILED(hr))
@@ -167,6 +168,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
+ info.swvp_on = false;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
--
2.10.0
More information about the mesa-dev
mailing list