[Mesa-dev] [PATCH 2/3] rtasm: add minimal x86-64 support and new instructions (v2)

Vinson Lee vlee at vmware.com
Thu Aug 19 12:36:09 PDT 2010


Luca,

The Windows x86-64 path can be enabled. I tested by removing the '!defined(_WIN64)' check in translate.c and translate_test passed all tests on Windows 7 x64.

Vinson



> -----Original Message-----
> From: mesa-dev-bounces+vlee=vmware.com at lists.freedesktop.org [mailto:mesa-
> dev-bounces+vlee=vmware.com at lists.freedesktop.org] On Behalf Of José
> Fonseca
> Sent: Friday, August 13, 2010 1:48 PM
> To: Luca Barbieri
> Cc: mesa-dev at lists.freedesktop.org
> Subject: Re: [Mesa-dev] [PATCH 2/3] rtasm: add minimal x86-64 support and
> new instructions (v2)
>
> Luca,
>
> This is great stuff.
>
> But one request: if Win64 is untested, please make sure it is disabled
> by default until somebody had opportunity to test it. Unfortunately I'm
> really busy with other stuff ATM and don't have the time.
>
> Jose
>
> On Fri, 2010-08-13 at 06:47 -0700, Luca Barbieri wrote:
> > Changes in v2:
> > - Win64 support (untested)
> > - Use u_cpu_detect.h constants instead of #ifs
> >
> > This commit adds minimal x86-64 support: only movs between registers
> > are supported for r8-r15, and x64_rexw() must be used to ask for 64-bit
> > operations.
> >
> > It also adds several new instructions for the new translate_sse code.
> > ---
> >  src/gallium/auxiliary/rtasm/rtasm_cpu.c    |    6 +-
> >  src/gallium/auxiliary/rtasm/rtasm_x86sse.c |  455
> ++++++++++++++++++++++++++--
> >  src/gallium/auxiliary/rtasm/rtasm_x86sse.h |   69 ++++-
> >  3 files changed, 493 insertions(+), 37 deletions(-)
> >
> > diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> > index 2e15751..0461c81 100644
> > --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> > +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> > @@ -30,7 +30,7 @@
> >  #include "rtasm_cpu.h"
> >
> >
> > -#if defined(PIPE_ARCH_X86)
> > +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> >  static boolean rtasm_sse_enabled(void)
> >  {
> >     static boolean firsttime = 1;
> > @@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
> >  int rtasm_cpu_has_sse(void)
> >  {
> >     /* FIXME: actually detect this at run-time */
> > -#if defined(PIPE_ARCH_X86)
> > +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> >     return rtasm_sse_enabled();
> >  #else
> >     return 0;
> > @@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
> >  int rtasm_cpu_has_sse2(void)
> >  {
> >     /* FIXME: actually detect this at run-time */
> > -#if defined(PIPE_ARCH_X86)
> > +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> >     return rtasm_sse_enabled();
> >  #else
> >     return 0;
> > diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> > index 63007c1..88b182b 100644
> > --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> > +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> > @@ -22,8 +22,9 @@
> >
> **************************************************************************
> /
> >
> >  #include "pipe/p_config.h"
> > +#include "util/u_cpu_detect.h"
> >
> > -#if defined(PIPE_ARCH_X86)
> > +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> >
> >  #include "pipe/p_compiler.h"
> >  #include "util/u_debug.h"
> > @@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
> >
> >     assert(reg.mod == mod_REG);
> >
> > +   /* TODO: support extended x86-64 registers */
> > +   assert(reg.idx < 8);
> > +   assert(regmem.idx < 8);
> > +
> >     val |= regmem.mod << 6;             /* mod field */
> >     val |= reg.idx << 3;                /* reg field */
> >     val |= regmem.idx;          /* r/m field */
> > @@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
> >   */
> >
> >
> > +void x64_rexw(struct x86_function *p)
> > +{
> > +   if(util_cpu_arch == UTIL_CPU_ARCH_X86_64)
> > +      emit_1ub(p, 0x48);
> > +}
> > +
> >  void x86_jcc( struct x86_function *p,
> >               enum x86_cc cc,
> >               int label )
> > @@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p,
> struct x86_reg dst, int imm )
> >     emit_1i(p, imm);
> >  }
> >
> > +void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
> > +{
> > +   DUMP_RI( dst, imm );
> > +   if(dst.mod == mod_REG)
> > +      x86_mov_reg_imm(p, dst, imm);
> > +   else
> > +   {
> > +      emit_1ub(p, 0xc7);
> > +      emit_modrm_noreg(p, 0, dst);
> > +      emit_1i(p, imm);
> > +   }
> > +}
> > +
> > +void x86_mov16_imm( struct x86_function *p, struct x86_reg dst,
> uint16_t imm )
> > +{
> > +   DUMP_RI( dst, imm );
> > +   emit_1ub(p, 0x66);
> > +   if(dst.mod == mod_REG)
> > +   {
> > +      emit_1ub(p, 0xb8 + dst.idx);
> > +      emit_2ub(p, imm & 0xff, imm >> 8);
> > +   }
> > +   else
> > +   {
> > +      emit_1ub(p, 0xc7);
> > +      emit_modrm_noreg(p, 0, dst);
> > +      emit_2ub(p, imm & 0xff, imm >> 8);
> > +   }
> > +}
> > +
> > +void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t
> imm )
> > +{
> > +   DUMP_RI( dst, imm );
> > +   if(dst.mod == mod_REG)
> > +   {
> > +      emit_1ub(p, 0xb0 + dst.idx);
> > +      emit_1ub(p, imm);
> > +   }
> > +   else
> > +   {
> > +      emit_1ub(p, 0xc6);
> > +      emit_modrm_noreg(p, 0, dst);
> > +      emit_1ub(p, imm);
> > +   }
> > +}
> > +
> >  /**
> >   * Immediate group 1 instructions.
> >   */
> > @@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
> >     }
> >
> >
> > -   p->stack_offset += 4;
> > +   p->stack_offset += sizeof(void*);
> >  }
> >
> >  void x86_push_imm32( struct x86_function *p,
> > @@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
> >     emit_1ub(p, 0x68);
> >     emit_1i(p,  imm32);
> >
> > -   p->stack_offset += 4;
> > +   p->stack_offset += sizeof(void*);
> >  }
> >
> >
> > @@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
> >     DUMP_R( reg );
> >     assert(reg.mod == mod_REG);
> >     emit_1ub(p, 0x58 + reg.idx);
> > -   p->stack_offset -= 4;
> > +   p->stack_offset -= sizeof(void*);
> >  }
> >
> >  void x86_inc( struct x86_function *p,
> >               struct x86_reg reg )
> >  {
> >     DUMP_R( reg );
> > -   assert(reg.mod == mod_REG);
> > -   emit_1ub(p, 0x40 + reg.idx);
> > +   if(util_cpu_arch == UTIL_CPU_ARCH_X86 && reg.mod == mod_REG)
> > +   {
> > +      emit_1ub(p, 0x40 + reg.idx);
> > +      return;
> > +   }
> > +   emit_1ub(p, 0xff);
> > +   emit_modrm_noreg(p, 0, reg);
> >  }
> >
> >  void x86_dec( struct x86_function *p,
> >               struct x86_reg reg )
> >  {
> >     DUMP_R( reg );
> > -   assert(reg.mod == mod_REG);
> > -   emit_1ub(p, 0x48 + reg.idx);
> > +   if(util_cpu_arch == UTIL_CPU_ARCH_X86 && reg.mod == mod_REG)
> > +   {
> > +      emit_1ub(p, 0x48 + reg.idx);
> > +      return;
> > +   }
> > +   emit_1ub(p, 0xff);
> > +   emit_modrm_noreg(p, 1, reg);
> >  }
> >
> >  void x86_ret( struct x86_function *p )
> > @@ -583,6 +650,65 @@ void x86_mov( struct x86_function *p,
> >               struct x86_reg src )
> >  {
> >     DUMP_RR( dst, src );
> > +   /* special hack for reading arguments until we support x86-64
> registers everywhere */
> > +   if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 ||
> dst.idx >= 8))
> > +   {
> > +      uint8_t rex = 0x40;
> > +      if(dst.idx >= 8)
> > +      {
> > +         rex |= 4;
> > +         dst.idx -= 8;
> > +      }
> > +      if(src.idx >= 8)
> > +      {
> > +         rex |= 1;
> > +         src.idx -= 8;
> > +      }
> > +      emit_1ub(p, rex);
> > +   }
> > +   emit_op_modrm( p, 0x8b, 0x89, dst, src );
> > +}
> > +
> > +void x86_mov16( struct x86_function *p,
> > +             struct x86_reg dst,
> > +             struct x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_1ub(p, 0x66);
> > +   emit_op_modrm( p, 0x8b, 0x89, dst, src );
> > +}
> > +
> > +void x86_mov8( struct x86_function *p,
> > +             struct x86_reg dst,
> > +             struct x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_op_modrm( p, 0x8a, 0x88, dst, src );
> > +}
> > +
> > +void x64_mov64( struct x86_function *p,
> > +             struct x86_reg dst,
> > +             struct x86_reg src )
> > +{
> > +   uint8_t rex = 0x48;
> > +   DUMP_RR( dst, src );
> > +   assert(util_cpu_arch == UTIL_CPU_ARCH_X86_64);
> > +
> > +   /* special hack for reading arguments until we support x86-64
> registers everywhere */
> > +   if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 ||
> dst.idx >= 8))
> > +   {
> > +      if(dst.idx >= 8)
> > +      {
> > +         rex |= 4;
> > +         dst.idx -= 8;
> > +      }
> > +      if(src.idx >= 8)
> > +      {
> > +         rex |= 1;
> > +         src.idx -= 8;
> > +      }
> > +   }
> > +   emit_1ub(p, rex);
> >     emit_op_modrm( p, 0x8b, 0x89, dst, src );
> >  }
> >
> > @@ -694,6 +820,61 @@ void x86_div( struct x86_function *p,
> >     emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
> >  }
> >
> > +void x86_bswap( struct x86_function *p, struct x86_reg reg )
> > +{
> > +   DUMP_R(reg);
> > +   assert(reg.file == file_REG32);
> > +   assert(reg.mod == mod_REG);
> > +   emit_2ub(p, 0x0f, 0xc8 + reg.idx);
> > +}
> > +
> > +void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned
> imm )
> > +{
> > +   DUMP_RI(reg, imm);
> > +   if(imm == 1)
> > +   {
> > +      emit_1ub(p, 0xd1);
> > +      emit_modrm_noreg(p, 5, reg);
> > +   }
> > +   else
> > +   {
> > +      emit_1ub(p, 0xc1);
> > +      emit_modrm_noreg(p, 5, reg);
> > +      emit_1ub(p, imm);
> > +   }
> > +}
> > +
> > +void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned
> imm )
> > +{
> > +   DUMP_RI(reg, imm);
> > +   if(imm == 1)
> > +   {
> > +      emit_1ub(p, 0xd1);
> > +      emit_modrm_noreg(p, 7, reg);
> > +   }
> > +   else
> > +   {
> > +      emit_1ub(p, 0xc1);
> > +      emit_modrm_noreg(p, 7, reg);
> > +      emit_1ub(p, imm);
> > +   }
> > +}
> > +
> > +void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned
> imm  )
> > +{
> > +   DUMP_RI(reg, imm);
> > +   if(imm == 1)
> > +   {
> > +      emit_1ub(p, 0xd1);
> > +      emit_modrm_noreg(p, 4, reg);
> > +   }
> > +   else
> > +   {
> > +      emit_1ub(p, 0xc1);
> > +      emit_modrm_noreg(p, 4, reg);
> > +      emit_1ub(p, imm);
> > +   }
> > +}
> >
> >
> >
> /***********************************************************************
> > @@ -1027,6 +1208,70 @@ void sse_movmskps( struct x86_function *p,
> >   * SSE2 instructions
> >   */
> >
> > +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   emit_2ub(p, 0x66, 0x0f);
> > +   if(dst.mod == mod_REG && dst.file == file_REG32)
> > +   {
> > +      emit_1ub(p, 0x7e);
> > +      emit_modrm(p, src, dst);
> > +   }
> > +   else
> > +   {
> > +      emit_op_modrm(p, 0x6e, 0x7e, dst, src);
> > +   }
> > +}
> > +
> > +void sse2_movq( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   switch (dst.mod) {
> > +   case mod_REG:
> > +      emit_3ub(p, 0xf3, 0x0f, 0x7e);
> > +      emit_modrm(p, dst, src);
> > +      break;
> > +   case mod_INDIRECT:
> > +   case mod_DISP32:
> > +   case mod_DISP8:
> > +      assert(src.mod == mod_REG);
> > +      emit_3ub(p, 0x66, 0x0f, 0xd6);
> > +      emit_modrm(p, src, dst);
> > +      break;
> > +   default:
> > +      assert(0);
> > +      break;
> > +   }
> > +}
> > +
> > +void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   emit_2ub(p, 0xf3, 0x0f);
> > +   emit_op_modrm(p, 0x6f, 0x7f, dst, src);
> > +}
> > +
> > +void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   emit_2ub(p, 0xf2, 0x0f);
> > +   emit_op_modrm(p, 0x10, 0x11, dst, src);
> > +}
> > +
> > +void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   emit_2ub(p, 0x66, 0x0f);
> > +   emit_op_modrm(p, 0x10, 0x11, dst, src);
> > +}
> > +
> > +void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   emit_2ub(p, 0x66, 0x0f);
> > +   emit_op_modrm(p, 0x28, 0x29, dst, src);
> > +}
> > +
> >  /**
> >   * Perform a reduced swizzle:
> >   */
> > @@ -1041,6 +1286,28 @@ void sse2_pshufd( struct x86_function *p,
> >     emit_1ub(p, shuf);
> >  }
> >
> > +void sse2_pshuflw( struct x86_function *p,
> > +                  struct x86_reg dst,
> > +                  struct x86_reg src,
> > +                  unsigned char shuf)
> > +{
> > +   DUMP_RRI( dst, src, shuf );
> > +   emit_3ub(p, 0xf2, X86_TWOB, 0x70);
> > +   emit_modrm(p, dst, src);
> > +   emit_1ub(p, shuf);
> > +}
> > +
> > +void sse2_pshufhw( struct x86_function *p,
> > +                  struct x86_reg dst,
> > +                  struct x86_reg src,
> > +                  unsigned char shuf)
> > +{
> > +   DUMP_RRI( dst, src, shuf );
> > +   emit_3ub(p, 0xf3, X86_TWOB, 0x70);
> > +   emit_modrm(p, dst, src);
> > +   emit_1ub(p, shuf);
> > +}
> > +
> >  void sse2_cvttps2dq( struct x86_function *p,
> >                       struct x86_reg dst,
> >                       struct x86_reg src )
> > @@ -1059,6 +1326,24 @@ void sse2_cvtps2dq( struct x86_function *p,
> >     emit_modrm( p, dst, src );
> >  }
> >
> > +void sse2_cvtsd2ss( struct x86_function *p,
> > +                    struct x86_reg dst,
> > +                    struct x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_3ub(p, 0xf2, 0x0f, 0x5a);
> > +   emit_modrm( p, dst, src );
> > +}
> > +
> > +void sse2_cvtpd2ps( struct x86_function *p,
> > +                    struct x86_reg dst,
> > +                    struct x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_3ub(p, 0x66, 0x0f, 0x5a);
> > +   emit_modrm( p, dst, src );
> > +}
> > +
> >  void sse2_packssdw( struct x86_function *p,
> >                     struct x86_reg dst,
> >                     struct x86_reg src )
> > @@ -1095,6 +1380,97 @@ void sse2_punpcklbw( struct x86_function *p,
> >     emit_modrm( p, dst, src );
> >  }
> >
> > +void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_3ub(p, 0x66, 0x0f, 0x61);
> > +   emit_modrm( p, dst, src );
> > +}
> > +
> > +void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_3ub(p, 0x66, 0x0f, 0x62);
> > +   emit_modrm( p, dst, src );
> > +}
> > +
> > +void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst,
> struct x86_reg src )
> > +{
> > +   DUMP_RR( dst, src );
> > +   emit_3ub(p, 0x66, 0x0f, 0x6c);
> > +   emit_modrm( p, dst, src );
> > +}
> > +
> > +void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x71);
> > +   emit_modrm_noreg(p, 6, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x72);
> > +   emit_modrm_noreg(p, 6, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x73);
> > +   emit_modrm_noreg(p, 6, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x71);
> > +   emit_modrm_noreg(p, 2, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x72);
> > +   emit_modrm_noreg(p, 2, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x73);
> > +   emit_modrm_noreg(p, 2, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x71);
> > +   emit_modrm_noreg(p, 4, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm )
> > +{
> > +   DUMP_RI(dst, imm);
> > +   emit_3ub(p, 0x66, 0x0f, 0x72);
> > +   emit_modrm_noreg(p, 4, dst);
> > +   emit_1ub(p, imm);
> > +}
> > +
> > +void sse2_por( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src )
> > +{
> > +   DUMP_RR(dst, src);
> > +   emit_3ub(p, 0x66, 0x0f, 0xeb);
> > +   emit_modrm(p, dst, src);
> > +}
> >
> >  void sse2_rcpps( struct x86_function *p,
> >                   struct x86_reg dst,
> > @@ -1114,18 +1490,6 @@ void sse2_rcpss( struct x86_function *p,
> >     emit_modrm( p, dst, src );
> >  }
> >
> > -void sse2_movd( struct x86_function *p,
> > -               struct x86_reg dst,
> > -               struct x86_reg src )
> > -{
> > -   DUMP_RR( dst, src );
> > -   emit_2ub(p, 0x66, X86_TWOB);
> > -   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
> > -}
> > -
> > -
> > -
> > -
> >
> /***********************************************************************
> >   * x87 instructions
> >   */
> > @@ -1716,17 +2080,56 @@ void x86_cdecl_caller_pop_regs( struct
> x86_function *p )
> >  }
> >
> >
> > -/* Retreive a reference to one of the function arguments, taking into
> > - * account any push/pop activity:
> > - */
> >  struct x86_reg x86_fn_arg( struct x86_function *p,
> > -                          unsigned arg )
> > +                           unsigned arg )
> >  {
> > -   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> > +   if(util_cpu_arch == UTIL_CPU_ARCH_X86_64)
> > +   {
> > +      /* Microsoft uses a different calling convention than the rest of
> the world */
> > +      if(util_cpu_abi == UTIL_CPU_ABI_WIN64)
> > +      {
> > +         switch(arg)
> > +         {
> > +         case 1:
> > +            return x86_make_reg(file_REG32, reg_CX);
> > +         case 2:
> > +            return x86_make_reg(file_REG32, reg_DX);
> > +         case 3:
> > +            return x86_make_reg(file_REG32, reg_R8);
> > +         case 4:
> > +            return x86_make_reg(file_REG32, reg_R9);
> > +         default:
> > +            return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> > +                  p->stack_offset + (arg - 4) * 8);     /* ??? */
> > +         }
> > +      }
> > +      else
> > +      {
> > +         switch(arg)
> > +         {
> > +         case 1:
> > +            return x86_make_reg(file_REG32, reg_DI);
> > +         case 2:
> > +            return x86_make_reg(file_REG32, reg_SI);
> > +         case 3:
> > +            return x86_make_reg(file_REG32, reg_DX);
> > +         case 4:
> > +            return x86_make_reg(file_REG32, reg_CX);
> > +         case 5:
> > +            return x86_make_reg(file_REG32, reg_R8);
> > +         case 6:
> > +            return x86_make_reg(file_REG32, reg_R9);
> > +         default:
> > +            return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> > +                              p->stack_offset + (arg - 6) * 8);     /*
> ??? */
> > +         }
> > +      }
> > +   }
> > +   else
> > +      return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> >                         p->stack_offset + arg * 4);     /* ??? */
> >  }
> >
> > -
> >  void x86_init_func( struct x86_function *p )
> >  {
> >     p->size = 0;
> > diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> > index 365dec1..a23fc5e 100644
> > --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> > +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> > @@ -26,15 +26,15 @@
> >
> >  #include "pipe/p_config.h"
> >
> > -#if defined(PIPE_ARCH_X86)
> > +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> >
> >  /* It is up to the caller to ensure that instructions issued are
> >   * suitable for the host cpu.  There are no checks made in this module
> >   * for mmx/sse/sse2 support on the cpu.
> >   */
> >  struct x86_reg {
> > -   unsigned file:3;
> > -   unsigned idx:3;
> > +   unsigned file:2;
> > +   unsigned idx:4;
> >     unsigned mod:2;             /* mod_REG if this is just a register */
> >     int      disp:24;           /* only +/- 23bits of offset - should be
> enough... */
> >  };
> > @@ -75,7 +75,15 @@ enum x86_reg_name {
> >     reg_SP,
> >     reg_BP,
> >     reg_SI,
> > -   reg_DI
> > +   reg_DI,
> > +   reg_R8,
> > +   reg_R9,
> > +   reg_R10,
> > +   reg_R11,
> > +   reg_R12,
> > +   reg_R13,
> > +   reg_R14,
> > +   reg_R15
> >  };
> >
> >
> > @@ -138,6 +146,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg
> );
> >   */
> >  int x86_get_label( struct x86_function *p );
> >
> > +void x64_rexw(struct x86_function *p);
> > +
> >  void x86_jcc( struct x86_function *p,
> >               enum x86_cc cc,
> >               int label );
> > @@ -178,18 +188,54 @@ void mmx_movq( struct x86_function *p, struct
> x86_reg dst, struct x86_reg src );
> >  void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >
> > +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_movq( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +
> >  void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +
> >  void sse2_movd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct
> x86_reg arg0,
> >                    unsigned char shuf );
> > +void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct
> x86_reg arg0,
> > +                  unsigned char shuf );
> > +void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct
> x86_reg arg0,
> > +                  unsigned char shuf );
> >  void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >
> > +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst,
> struct x86_reg src );
> > +
> > +void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +
> > +void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +
> > +void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst,
> unsigned imm );
> > +
> > +void sse2_por( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +
> > +void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src, uint8_t imm );
> > +void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src, uint8_t imm );
> > +void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src, uint8_t imm );
> >
> >  void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
> >  void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
> > @@ -227,7 +273,6 @@ void sse_shufps( struct x86_function *p, struct
> x86_reg dest, struct x86_reg arg
> >  void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct
> x86_reg src );
> > -void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src);
> >
> >  void x86_add( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > @@ -237,8 +282,13 @@ void x86_dec( struct x86_function *p, struct
> x86_reg reg );
> >  void x86_inc( struct x86_function *p, struct x86_reg reg );
> >  void x86_lea( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void x86_mov( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > -void x86_movzx8( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > -void x86_movzx16( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void x86_mov8( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void x86_mov16( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> > +void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
> > +void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t
> imm );
> > +void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t
> imm );
> >  void x86_mul( struct x86_function *p, struct x86_reg src );
> >  void x86_imul( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg
> src );
> > @@ -252,7 +302,10 @@ void x86_test( struct x86_function *p, struct
> x86_reg dst, struct x86_reg src );
> >  void x86_xor( struct x86_function *p, struct x86_reg dst, struct
> x86_reg src );
> >  void x86_sahf( struct x86_function *p );
> >  void x86_div( struct x86_function *p, struct x86_reg src );
> > -
> > +void x86_bswap( struct x86_function *p, struct x86_reg src );
> > +void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned
> imm );
> > +void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned
> imm );
> > +void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned
> imm  );
> >
> >  void x86_cdecl_caller_push_regs( struct x86_function *p );
> >  void x86_cdecl_caller_pop_regs( struct x86_function *p );
> > --
> > 1.7.0.4
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list