[Liboil-commit] 2 commits - orc/orcprogram-x86.c orc/orcprogram.h
David Schleef
ds at kemper.freedesktop.org
Mon May 26 18:08:49 PDT 2008
orc/orcprogram-x86.c | 363 +++++++++++++++++++++++++++++++++++++++++++++++----
orc/orcprogram.h | 10 +
2 files changed, 344 insertions(+), 29 deletions(-)
New commits:
commit 97fbf6b517bc7c39c404e780d110328093423175
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Mon May 26 18:08:23 2008 -0700
[orc] add sse rules (doesn't work yet)
diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index 36aab28..0778ad2 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -50,6 +50,7 @@ void x86_test (OrcProgram *program);
void orc_program_x86_register_rules (void);
void orc_program_mmx_register_rules (void);
+void orc_program_sse_register_rules (void);
enum {
X86_EAX = ORC_GP_REG_BASE,
@@ -77,7 +78,21 @@ enum {
X86_MM6,
X86_MM7,
X86_XMM0 = ORC_VEC2_REG_BASE,
- X86_XMM1
+ X86_XMM1,
+ X86_XMM2,
+ X86_XMM3,
+ X86_XMM4,
+ X86_XMM5,
+ X86_XMM6,
+ X86_XMM7,
+ X86_XMM8,
+ X86_XMM9,
+ X86_XMM10,
+ X86_XMM11,
+ X86_XMM12,
+ X86_XMM13,
+ X86_XMM14,
+ X86_XMM15
};
#ifdef HAVE_AMD64
@@ -178,6 +193,23 @@ x86_get_regname_mmx(int i)
}
}
+static const char *
+x86_get_regname_sse(int i)
+{
+ static const char *x86_regs[] = { "xmm0", "xmm1", "xmm2", "xmm3",
+ "xmm4", "xmm5", "xmm6", "xmm7" };
+
+ if (i>=X86_XMM0 && i<X86_XMM0 + 8) return x86_regs[i - X86_XMM0];
+ switch (i) {
+ case 0:
+ return "UNALLOCATED";
+ case 1:
+ return "direct";
+ default:
+ return "ERROR";
+ }
+}
+
int
orc_program_x86_allocate_register (OrcProgram *program, int data_reg)
{
@@ -285,6 +317,7 @@ orc_x86_init (void)
{
orc_program_x86_register_rules ();
orc_program_mmx_register_rules ();
+ orc_program_sse_register_rules ();
}
void
@@ -431,13 +464,13 @@ orc_program_assemble_x86 (OrcProgram *program)
x86_emit_mov_memoffset_reg (program, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n),
x86_exec_ptr, X86_ECX);
- x86_emit_sar_imm_reg (program, 4, 2, X86_ECX);
+ x86_emit_sar_imm_reg (program, 4, program->loop_shift, X86_ECX);
x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
x86_emit_mov_memoffset_reg (program, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
- x86_emit_and_imm_reg (program, 4, 3, X86_ECX);
+ x86_emit_and_imm_reg (program, 4, (1<<program->loop_shift)-1, X86_ECX);
x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
@@ -839,6 +872,153 @@ orc_program_mmx_register_rules (void)
}
}
+/* sse rules */
+
+static void
+sse_emit_loadi_s16 (OrcProgram *p, int reg, int value)
+{
+ if (value == 0) {
+ printf(" pxor %%%s, %%%s\n", x86_get_regname_sse(reg),
+ x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0xef;
+ x86_emit_modrm_reg (p, reg, reg);
+ } else {
+ x86_emit_mov_imm_reg (p, 4, value, X86_ECX);
+
+ printf(" movd %%ecx, %%%s\n", x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x6e;
+ x86_emit_modrm_reg (p, X86_ECX, reg);
+
+ printf(" pshufw $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
+ x86_get_regname_sse(reg));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x70;
+ x86_emit_modrm_reg (p, reg, reg);
+ *p->codeptr++ = 0x00;
+ }
+}
+
+static void
+sse_rule_loadi_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_loadi_s16 (p, p->vars[insn->args[0]].alloc,
+ p->vars[insn->args[2]].s16);
+}
+
+static void
+sse_rule_add_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+ printf(" paddw %%%s, %%%s\n",
+ x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0xfd;
+ x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
+ p->vars[insn->args[0]].alloc);
+}
+
+static void
+sse_rule_sub_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+ printf(" psubw %%%s, %%%s\n",
+ x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0xf9;
+ x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
+ p->vars[insn->args[0]].alloc);
+}
+
+static void
+sse_rule_mul_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+ printf(" pmullw %%%s, %%%s\n",
+ x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0xd5;
+ x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
+ p->vars[insn->args[0]].alloc);
+}
+
+static void
+sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+ if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) {
+ printf(" psllw $%d, %%%s\n",
+ p->vars[insn->args[2]].s16,
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x71;
+ x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6);
+ *p->codeptr++ = p->vars[insn->args[2]].s16;
+ } else {
+ /* FIXME this doesn't work quite right */
+ printf(" psllw %%%s, %%%s\n",
+ x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0xf1;
+ x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
+ p->vars[insn->args[2]].alloc);
+ }
+}
+
+static void
+sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
+{
+ if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) {
+ printf(" psraw $%d, %%%s\n",
+ p->vars[insn->args[2]].s16,
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x71;
+ x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4);
+ *p->codeptr++ = p->vars[insn->args[2]].s16;
+ } else {
+ /* FIXME this doesn't work quite right */
+ printf(" psraw %%%s, %%%s\n",
+ x86_get_regname_sse(p->vars[insn->args[2]].alloc),
+ x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0xe1;
+ x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
+ p->vars[insn->args[2]].alloc);
+ }
+}
+
+void
+orc_program_sse_register_rules (void)
+{
+ int i;
+
+ orc_rule_register ("_loadi_s16", ORC_RULE_SSE_4, sse_rule_loadi_s16, NULL,
+ ORC_RULE_REG_IMM);
+
+ for(i=ORC_RULE_SSE_1; i <= ORC_RULE_SSE_8; i++) {
+ orc_rule_register ("add_s16", i, sse_rule_add_s16, NULL,
+ ORC_RULE_REG_REG);
+ orc_rule_register ("sub_s16", i, sse_rule_sub_s16, NULL,
+ ORC_RULE_REG_REG);
+ orc_rule_register ("mul_s16", i, sse_rule_mul_s16, NULL,
+ ORC_RULE_REG_REG);
+ orc_rule_register ("lshift_s16", i, sse_rule_lshift_s16, NULL,
+ ORC_RULE_REG_REG);
+ orc_rule_register ("rshift_s16", i, sse_rule_rshift_s16, NULL,
+ ORC_RULE_REG_REG);
+ }
+}
+
/* code generation */
void
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 666af63..3ae3e00 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -32,7 +32,7 @@ typedef void (*OrcRuleEmitFunc)(OrcProgram *p, void *user, OrcInstruction *insn)
#define ORC_REGCLASS_GP 1
#define ORC_OPCODE_N_ARGS 4
-#define ORC_OPCODE_N_RULES 8
+#define ORC_OPCODE_N_RULES 12
#define ORC_STRUCT_OFFSET(struct_type, member) \
((long) ((unsigned int *) &((struct_type*) 0)->member))
@@ -52,6 +52,10 @@ enum {
ORC_RULE_MMX_2,
ORC_RULE_MMX_4,
ORC_RULE_MMX_8,
+ ORC_RULE_SSE_1,
+ ORC_RULE_SSE_2,
+ ORC_RULE_SSE_4,
+ ORC_RULE_SSE_8,
ORC_RULE_ALTIVEC_1
};
commit 4df85f912cca4b14150d3883042120bd881149ab
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Mon May 26 15:06:39 2008 -0700
[orc] handle end pieces in MMX_4
diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index 67ddd49..36aab28 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -29,13 +29,19 @@ void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
void x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg);
void x86_emit_dec_memoffset (OrcProgram *program, int size, int offset, int reg);
void x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value, int offset, int reg);
+void x86_emit_and_imm_memoffset (OrcProgram *program, int size, int value, int offset, int reg);
void x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg);
+void x86_emit_and_imm_reg (OrcProgram *program, int size, int value, int reg);
+void x86_emit_cmp_imm_memoffset (OrcProgram *program, int size, int value,
+ int offset, int reg);
void x86_emit_emms (OrcProgram *program);
void x86_emit_ret (OrcProgram *program);
void x86_emit_je (OrcProgram *program, int label);
void x86_emit_jne (OrcProgram *program, int label);
void x86_emit_label (OrcProgram *program, int label);
+void x86_emit_loop (OrcProgram *program);
+
static void mmx_emit_loadi_s16 (OrcProgram *p, int reg, int value);
void x86_emit_modrm_memoffset (OrcProgram *program, int reg1, int offset, int reg2);
@@ -421,29 +427,69 @@ x86_emit_store_dest (OrcProgram *program, OrcVariable *var)
void
orc_program_assemble_x86 (OrcProgram *program)
{
- int j;
- int k;
- OrcInstruction *insn;
- OrcOpcode *opcode;
- OrcVariable *args[10];
- OrcRule *rule;
-
x86_emit_prologue (program);
x86_emit_mov_memoffset_reg (program, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n),
x86_exec_ptr, X86_ECX);
+ x86_emit_sar_imm_reg (program, 4, 2, X86_ECX);
+ x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
- x86_emit_sar_imm_reg (program, 4, program->loop_shift, X86_ECX);
+ x86_emit_mov_memoffset_reg (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
+ x86_emit_and_imm_reg (program, 4, 3, X86_ECX);
x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,counter), x86_exec_ptr);
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
- x86_emit_test_reg_reg (program, 4, X86_ECX, X86_ECX);
+ x86_load_constants (program);
+ x86_emit_cmp_imm_memoffset (program, 4, 0,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
x86_emit_je (program, 1);
- x86_load_constants (program);
-
+ program->rule_set = ORC_RULE_MMX_1;
+ program->n_per_loop = 1;
+ program->loop_shift = 0;
x86_emit_label (program, 0);
+ x86_emit_loop (program);
+ x86_emit_dec_memoffset (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1),
+ x86_exec_ptr);
+ x86_emit_cmp_imm_memoffset (program, 4, 0,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1),
+ x86_exec_ptr);
+ x86_emit_jne (program, 0);
+ x86_emit_label (program, 1);
+
+ x86_emit_cmp_imm_memoffset (program, 4, 0,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
+ x86_emit_je (program, 3);
+
+ program->rule_set = ORC_RULE_MMX_4;
+ program->n_per_loop = 4;
+ program->loop_shift = 2;
+ x86_emit_label (program, 2);
+ x86_emit_loop (program);
+ x86_emit_dec_memoffset (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2),
+ x86_exec_ptr);
+ x86_emit_jne (program, 2);
+ x86_emit_label (program, 3);
+
+ x86_emit_epilogue (program);
+
+ x86_do_fixups (program);
+}
+
+void
+x86_emit_loop (OrcProgram *program)
+{
+ int j;
+ int k;
+ OrcInstruction *insn;
+ OrcOpcode *opcode;
+ OrcVariable *args[10];
+ OrcRule *rule;
for(j=0;j<program->n_insns;j++){
insn = program->insns + j;
@@ -514,22 +560,8 @@ orc_program_assemble_x86 (OrcProgram *program)
}
}
}
-
- x86_emit_dec_memoffset (program, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,counter),
- x86_exec_ptr);
- x86_emit_jne (program, 0);
- x86_emit_label (program, 1);
-
- x86_emit_epilogue (program);
-
- //x86_test (program);
-
-
- x86_do_fixups (program);
}
-
/* rules */
static void
@@ -1085,6 +1117,70 @@ x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg)
}
void
+x86_emit_and_imm_memoffset (OrcProgram *program, int size, int value,
+ int offset, int reg)
+{
+ if (size == 2) {
+ printf(" andw $%d, %d(%%%s)\n", value, offset,
+ x86_get_regname_ptr(reg));
+ *program->codeptr++ = 0x66;
+ } else if (size == 4) {
+ printf(" andl $%d, %d(%%%s)\n", value, offset,
+ x86_get_regname_ptr(reg));
+ } else {
+ printf(" and $%d, %d(%%%s)\n", value, offset,
+ x86_get_regname_ptr(reg));
+ }
+
+ x86_emit_rex(program, size, 0, 0, reg);
+ if (value >= -128 && value < 128) {
+ *program->codeptr++ = 0x83;
+ /* FIXME */
+ x86_emit_modrm_memoffset (program, 0, offset, reg);
+ *program->codeptr++ = (value & 0xff);
+ } else {
+ *program->codeptr++ = 0x81;
+ /* FIXME */
+ x86_emit_modrm_memoffset (program, 0, offset, reg);
+ *program->codeptr++ = (value & 0xff);
+ *program->codeptr++ = ((value>>8) & 0xff);
+ if (size == 4) {
+ *program->codeptr++ = ((value>>16) & 0xff);
+ *program->codeptr++ = ((value>>24) & 0xff);
+ }
+ }
+}
+
+void
+x86_emit_and_imm_reg (OrcProgram *program, int size, int value, int reg)
+{
+ if (size == 2) {
+ printf(" andw $%d, %%%s\n", value, x86_get_regname_16(reg));
+ *program->codeptr++ = 0x66;
+ } else if (size == 4) {
+ printf(" andl $%d, %%%s\n", value, x86_get_regname(reg));
+ } else {
+ printf(" and $%d, %%%s\n", value, x86_get_regname_64(reg));
+ }
+
+ x86_emit_rex(program, size, 0, 0, reg);
+ if (value >= -128 && value < 128) {
+ *program->codeptr++ = 0x83;
+ x86_emit_modrm_reg (program, reg, 4);
+ *program->codeptr++ = (value & 0xff);
+ } else {
+ *program->codeptr++ = 0x81;
+ x86_emit_modrm_reg (program, reg, 4);
+ *program->codeptr++ = (value & 0xff);
+ *program->codeptr++ = ((value>>8) & 0xff);
+ if (size == 4) {
+ *program->codeptr++ = ((value>>16) & 0xff);
+ *program->codeptr++ = ((value>>24) & 0xff);
+ }
+ }
+}
+
+void
x86_emit_add_imm_memoffset (OrcProgram *program, int size, int value,
int offset, int reg)
{
@@ -1147,6 +1243,39 @@ x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg)
}
void
+x86_emit_cmp_imm_memoffset (OrcProgram *program, int size, int value,
+ int offset, int reg)
+{
+ if (size == 2) {
+ printf(" cmpw $%d, %d(%%%s)\n", value, offset,
+ x86_get_regname_ptr(reg));
+ *program->codeptr++ = 0x66;
+ } else if (size == 4) {
+ printf(" cmpl $%d, %d(%%%s)\n", value, offset,
+ x86_get_regname_ptr(reg));
+ } else {
+ printf(" cmp $%d, %d(%%%s)\n", value, offset,
+ x86_get_regname_ptr(reg));
+ }
+
+ x86_emit_rex(program, size, 0, 0, reg);
+ if (value >= -128 && value < 128) {
+ *program->codeptr++ = 0x83;
+ x86_emit_modrm_memoffset (program, 7, offset, reg);
+ *program->codeptr++ = (value & 0xff);
+ } else {
+ *program->codeptr++ = 0x81;
+ x86_emit_modrm_memoffset (program, 7, offset, reg);
+ *program->codeptr++ = (value & 0xff);
+ *program->codeptr++ = ((value>>8) & 0xff);
+ if (size == 4) {
+ *program->codeptr++ = ((value>>16) & 0xff);
+ *program->codeptr++ = ((value>>24) & 0xff);
+ }
+ }
+}
+
+void
x86_emit_dec_memoffset (OrcProgram *program, int size,
int offset, int reg)
{
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 5a02b83..666af63 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -181,7 +181,9 @@ struct _OrcProgram {
struct _OrcExecutor {
OrcProgram *program;
int n;
- int counter;
+ int counter1;
+ int counter2;
+ int counter3;
void *arrays[ORC_N_VARIABLES];
More information about the Liboil-commit
mailing list