[Mesa-dev] [PATCH 4/6] translate: add support for 8/16-bit indices
Luca Barbieri
luca at luca-barbieri.com
Fri Aug 13 04:42:03 PDT 2010
Currently, only 32-bit indices are supported, but some use cases
translate needs support for all types.
---
src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 14 ++++
src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 +
src/gallium/auxiliary/translate/translate.h | 12 ++++
.../auxiliary/translate/translate_generic.c | 34 ++++++++++
src/gallium/auxiliary/translate/translate_sse.c | 65 ++++++++++++++------
5 files changed, 108 insertions(+), 19 deletions(-)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 9f70b73..63007c1 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -586,6 +586,20 @@ void x86_mov( struct x86_function *p,
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb6);
+ emit_modrm(p, dst, src);
+}
+
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb7);
+ emit_modrm(p, dst, src);
+}
+
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 6208e8f..365dec1 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -237,6 +237,8 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index eb6f2cc..a753802 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
+ void (PIPE_CDECL *run_elts16)( struct translate *,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run_elts8)( struct translate *,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index e7f5384..10706a7 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -441,6 +441,38 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
}
}
+static void PIPE_CDECL generic_run_elts16( struct translate *translate,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+static void PIPE_CDECL generic_run_elts8( struct translate *translate,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
+
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
@@ -498,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
+ tg->translate.run_elts16 = generic_run_elts16;
+ tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 68c71f4..f9aab92 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -67,6 +67,8 @@ struct translate_sse {
struct x86_function linear_func;
struct x86_function elt_func;
+ struct x86_function elt16_func;
+ struct x86_function elt8_func;
struct x86_function *func;
boolean loaded_identity;
@@ -362,7 +364,7 @@ static boolean translate_attr( struct translate_sse *p,
static boolean init_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
unsigned i;
struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
@@ -372,7 +374,7 @@ static boolean init_inputs( struct translate_sse *p,
struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
- if (linear || varient->instance_divisor) {
+ if (!index_size || varient->instance_divisor) {
struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
get_offset(p, &buffer->stride));
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
@@ -421,7 +423,7 @@ static boolean init_inputs( struct translate_sse *p,
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (linear && p->nr_buffer_varients == 1)
+ if (!index_size && p->nr_buffer_varients == 1)
x86_mov(p->func, elt, tmp_EAX);
else
x86_mov(p->func, buf_ptr, tmp_EAX);
@@ -433,7 +435,7 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
- boolean linear,
+ unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
@@ -441,10 +443,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
return x86_make_disp(p->machine_EDX,
get_offset(p, &p->instance_id));
}
- if (linear && p->nr_buffer_varients == 1) {
+ if (!index_size && p->nr_buffer_varients == 1) {
return p->idx_EBX;
}
- else if (linear || p->buffer_varient[var_idx].instance_divisor) {
+ else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
struct x86_reg ptr = p->tmp_EAX;
struct x86_reg buf_ptr =
x86_make_disp(p->machine_EDX,
@@ -469,8 +471,19 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
/* Calculate pointer to current attrib:
*/
- x86_mov(p->func, ptr, buf_stride);
- x86_imul(p->func, ptr, elt);
+ switch(index_size)
+ {
+ case 1:
+ x86_movzx8(p->func, ptr, elt);
+ break;
+ case 2:
+ x86_movzx16(p->func, ptr, elt);
+ break;
+ case 4:
+ x86_mov(p->func, ptr, elt);
+ break;
+ }
+ x86_imul(p->func, ptr, buf_stride);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
@@ -479,9 +492,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
- if (linear && p->nr_buffer_varients == 1) {
+ if (!index_size && p->nr_buffer_varients == 1) {
struct x86_reg stride = x86_make_disp(p->machine_EDX,
get_offset(p, &p->buffer[0].stride));
@@ -490,7 +503,7 @@ static boolean incr_inputs( struct translate_sse *p,
sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
}
}
- else if (linear) {
+ else if (!index_size) {
unsigned i;
/* Is this worthwhile??
@@ -511,7 +524,7 @@ static boolean incr_inputs( struct translate_sse *p,
}
}
else {
- x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
+ x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, index_size));
}
return TRUE;
@@ -536,7 +549,7 @@ static boolean incr_inputs( struct translate_sse *p,
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
- boolean linear )
+ unsigned index_size )
{
int fixup, label;
unsigned j;
@@ -585,13 +598,13 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* always load, needed or not:
*/
- init_inputs(p, linear);
+ init_inputs(p, index_size);
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
- struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
+ struct x86_reg elt = !index_size ? p->idx_EBX : x86_deref(p->idx_EBX);
int last_varient = -1;
struct x86_reg vb;
@@ -603,7 +616,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
*/
if (varient != last_varient) {
last_varient = varient;
- vb = get_buffer_ptr(p, linear, varient, elt);
+ vb = get_buffer_ptr(p, index_size, varient, elt);
}
if (!translate_attr( p, a,
@@ -621,7 +634,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* Incr index
*/
- incr_inputs( p, linear );
+ incr_inputs( p, index_size );
}
/* decr count, loop if not zero
@@ -736,10 +749,16 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
- if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ if (!build_vertex_emit(p, &p->linear_func, 0))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, 4))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt16_func, 2))
goto fail;
- if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ if (!build_vertex_emit(p, &p->elt8_func, 1))
goto fail;
p->translate.run = (void*)x86_get_func(&p->linear_func);
@@ -750,6 +769,14 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (p->translate.run_elts == NULL)
goto fail;
+ p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
+ if (p->translate.run_elts16 == NULL)
+ goto fail;
+
+ p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
+ if (p->translate.run_elts8 == NULL)
+ goto fail;
+
return &p->translate;
fail:
--
1.7.0.4
More information about the mesa-dev
mailing list