[Mesa-dev] [PATCH 2/3] rtasm: add minimal x86-64 support and new instructions (v2)

José Fonseca jfonseca at vmware.com
Fri Aug 13 13:47:54 PDT 2010


Luca,

This is great stuff. 

But one request: if Win64 is untested, please make sure it is disabled
by default until somebody had opportunity to test it. Unfortunately I'm
really busy with other stuff ATM and don't have the time.

Jose

On Fri, 2010-08-13 at 06:47 -0700, Luca Barbieri wrote:
> Changes in v2:
> - Win64 support (untested)
> - Use u_cpu_detect.h constants instead of #ifs
> 
> This commit adds minimal x86-64 support: only movs between registers
> are supported for r8-r15, and x64_rexw() must be used to ask for 64-bit
> operations.
> 
> It also adds several new instructions for the new translate_sse code.
> ---
>  src/gallium/auxiliary/rtasm/rtasm_cpu.c    |    6 +-
>  src/gallium/auxiliary/rtasm/rtasm_x86sse.c |  455 ++++++++++++++++++++++++++--
>  src/gallium/auxiliary/rtasm/rtasm_x86sse.h |   69 ++++-
>  3 files changed, 493 insertions(+), 37 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> index 2e15751..0461c81 100644
> --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
> @@ -30,7 +30,7 @@
>  #include "rtasm_cpu.h"
> 
> 
> -#if defined(PIPE_ARCH_X86)
> +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
>  static boolean rtasm_sse_enabled(void)
>  {
>     static boolean firsttime = 1;
> @@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
>  int rtasm_cpu_has_sse(void)
>  {
>     /* FIXME: actually detect this at run-time */
> -#if defined(PIPE_ARCH_X86)
> +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
>     return rtasm_sse_enabled();
>  #else
>     return 0;
> @@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
>  int rtasm_cpu_has_sse2(void)
>  {
>     /* FIXME: actually detect this at run-time */
> -#if defined(PIPE_ARCH_X86)
> +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
>     return rtasm_sse_enabled();
>  #else
>     return 0;
> diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> index 63007c1..88b182b 100644
> --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
> @@ -22,8 +22,9 @@
>   **************************************************************************/
> 
>  #include "pipe/p_config.h"
> +#include "util/u_cpu_detect.h"
> 
> -#if defined(PIPE_ARCH_X86)
> +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> 
>  #include "pipe/p_compiler.h"
>  #include "util/u_debug.h"
> @@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
> 
>     assert(reg.mod == mod_REG);
> 
> +   /* TODO: support extended x86-64 registers */
> +   assert(reg.idx < 8);
> +   assert(regmem.idx < 8);
> +
>     val |= regmem.mod << 6;             /* mod field */
>     val |= reg.idx << 3;                /* reg field */
>     val |= regmem.idx;          /* r/m field */
> @@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
>   */
> 
> 
> +void x64_rexw(struct x86_function *p)
> +{
> +   if(util_cpu_arch == UTIL_CPU_ARCH_X86_64)
> +      emit_1ub(p, 0x48);
> +}
> +
>  void x86_jcc( struct x86_function *p,
>               enum x86_cc cc,
>               int label )
> @@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
>     emit_1i(p, imm);
>  }
> 
> +void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
> +{
> +   DUMP_RI( dst, imm );
> +   if(dst.mod == mod_REG)
> +      x86_mov_reg_imm(p, dst, imm);
> +   else
> +   {
> +      emit_1ub(p, 0xc7);
> +      emit_modrm_noreg(p, 0, dst);
> +      emit_1i(p, imm);
> +   }
> +}
> +
> +void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
> +{
> +   DUMP_RI( dst, imm );
> +   emit_1ub(p, 0x66);
> +   if(dst.mod == mod_REG)
> +   {
> +      emit_1ub(p, 0xb8 + dst.idx);
> +      emit_2ub(p, imm & 0xff, imm >> 8);
> +   }
> +   else
> +   {
> +      emit_1ub(p, 0xc7);
> +      emit_modrm_noreg(p, 0, dst);
> +      emit_2ub(p, imm & 0xff, imm >> 8);
> +   }
> +}
> +
> +void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
> +{
> +   DUMP_RI( dst, imm );
> +   if(dst.mod == mod_REG)
> +   {
> +      emit_1ub(p, 0xb0 + dst.idx);
> +      emit_1ub(p, imm);
> +   }
> +   else
> +   {
> +      emit_1ub(p, 0xc6);
> +      emit_modrm_noreg(p, 0, dst);
> +      emit_1ub(p, imm);
> +   }
> +}
> +
>  /**
>   * Immediate group 1 instructions.
>   */
> @@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
>     }
> 
> 
> -   p->stack_offset += 4;
> +   p->stack_offset += sizeof(void*);
>  }
> 
>  void x86_push_imm32( struct x86_function *p,
> @@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
>     emit_1ub(p, 0x68);
>     emit_1i(p,  imm32);
> 
> -   p->stack_offset += 4;
> +   p->stack_offset += sizeof(void*);
>  }
> 
> 
> @@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
>     DUMP_R( reg );
>     assert(reg.mod == mod_REG);
>     emit_1ub(p, 0x58 + reg.idx);
> -   p->stack_offset -= 4;
> +   p->stack_offset -= sizeof(void*);
>  }
> 
>  void x86_inc( struct x86_function *p,
>               struct x86_reg reg )
>  {
>     DUMP_R( reg );
> -   assert(reg.mod == mod_REG);
> -   emit_1ub(p, 0x40 + reg.idx);
> +   if(util_cpu_arch == UTIL_CPU_ARCH_X86 && reg.mod == mod_REG)
> +   {
> +      emit_1ub(p, 0x40 + reg.idx);
> +      return;
> +   }
> +   emit_1ub(p, 0xff);
> +   emit_modrm_noreg(p, 0, reg);
>  }
> 
>  void x86_dec( struct x86_function *p,
>               struct x86_reg reg )
>  {
>     DUMP_R( reg );
> -   assert(reg.mod == mod_REG);
> -   emit_1ub(p, 0x48 + reg.idx);
> +   if(util_cpu_arch == UTIL_CPU_ARCH_X86 && reg.mod == mod_REG)
> +   {
> +      emit_1ub(p, 0x48 + reg.idx);
> +      return;
> +   }
> +   emit_1ub(p, 0xff);
> +   emit_modrm_noreg(p, 1, reg);
>  }
> 
>  void x86_ret( struct x86_function *p )
> @@ -583,6 +650,65 @@ void x86_mov( struct x86_function *p,
>               struct x86_reg src )
>  {
>     DUMP_RR( dst, src );
> +   /* special hack for reading arguments until we support x86-64 registers everywhere */
> +   if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
> +   {
> +      uint8_t rex = 0x40;
> +      if(dst.idx >= 8)
> +      {
> +         rex |= 4;
> +         dst.idx -= 8;
> +      }
> +      if(src.idx >= 8)
> +      {
> +         rex |= 1;
> +         src.idx -= 8;
> +      }
> +      emit_1ub(p, rex);
> +   }
> +   emit_op_modrm( p, 0x8b, 0x89, dst, src );
> +}
> +
> +void x86_mov16( struct x86_function *p,
> +             struct x86_reg dst,
> +             struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_1ub(p, 0x66);
> +   emit_op_modrm( p, 0x8b, 0x89, dst, src );
> +}
> +
> +void x86_mov8( struct x86_function *p,
> +             struct x86_reg dst,
> +             struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_op_modrm( p, 0x8a, 0x88, dst, src );
> +}
> +
> +void x64_mov64( struct x86_function *p,
> +             struct x86_reg dst,
> +             struct x86_reg src )
> +{
> +   uint8_t rex = 0x48;
> +   DUMP_RR( dst, src );
> +   assert(util_cpu_arch == UTIL_CPU_ARCH_X86_64);
> +
> +   /* special hack for reading arguments until we support x86-64 registers everywhere */
> +   if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
> +   {
> +      if(dst.idx >= 8)
> +      {
> +         rex |= 4;
> +         dst.idx -= 8;
> +      }
> +      if(src.idx >= 8)
> +      {
> +         rex |= 1;
> +         src.idx -= 8;
> +      }
> +   }
> +   emit_1ub(p, rex);
>     emit_op_modrm( p, 0x8b, 0x89, dst, src );
>  }
> 
> @@ -694,6 +820,61 @@ void x86_div( struct x86_function *p,
>     emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
>  }
> 
> +void x86_bswap( struct x86_function *p, struct x86_reg reg )
> +{
> +   DUMP_R(reg);
> +   assert(reg.file == file_REG32);
> +   assert(reg.mod == mod_REG);
> +   emit_2ub(p, 0x0f, 0xc8 + reg.idx);
> +}
> +
> +void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
> +{
> +   DUMP_RI(reg, imm);
> +   if(imm == 1)
> +   {
> +      emit_1ub(p, 0xd1);
> +      emit_modrm_noreg(p, 5, reg);
> +   }
> +   else
> +   {
> +      emit_1ub(p, 0xc1);
> +      emit_modrm_noreg(p, 5, reg);
> +      emit_1ub(p, imm);
> +   }
> +}
> +
> +void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
> +{
> +   DUMP_RI(reg, imm);
> +   if(imm == 1)
> +   {
> +      emit_1ub(p, 0xd1);
> +      emit_modrm_noreg(p, 7, reg);
> +   }
> +   else
> +   {
> +      emit_1ub(p, 0xc1);
> +      emit_modrm_noreg(p, 7, reg);
> +      emit_1ub(p, imm);
> +   }
> +}
> +
> +void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  )
> +{
> +   DUMP_RI(reg, imm);
> +   if(imm == 1)
> +   {
> +      emit_1ub(p, 0xd1);
> +      emit_modrm_noreg(p, 4, reg);
> +   }
> +   else
> +   {
> +      emit_1ub(p, 0xc1);
> +      emit_modrm_noreg(p, 4, reg);
> +      emit_1ub(p, imm);
> +   }
> +}
> 
> 
>  /***********************************************************************
> @@ -1027,6 +1208,70 @@ void sse_movmskps( struct x86_function *p,
>   * SSE2 instructions
>   */
> 
> +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   emit_2ub(p, 0x66, 0x0f);
> +   if(dst.mod == mod_REG && dst.file == file_REG32)
> +   {
> +      emit_1ub(p, 0x7e);
> +      emit_modrm(p, src, dst);
> +   }
> +   else
> +   {
> +      emit_op_modrm(p, 0x6e, 0x7e, dst, src);
> +   }
> +}
> +
> +void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   switch (dst.mod) {
> +   case mod_REG:
> +      emit_3ub(p, 0xf3, 0x0f, 0x7e);
> +      emit_modrm(p, dst, src);
> +      break;
> +   case mod_INDIRECT:
> +   case mod_DISP32:
> +   case mod_DISP8:
> +      assert(src.mod == mod_REG);
> +      emit_3ub(p, 0x66, 0x0f, 0xd6);
> +      emit_modrm(p, src, dst);
> +      break;
> +   default:
> +      assert(0);
> +      break;
> +   }
> +}
> +
> +void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   emit_2ub(p, 0xf3, 0x0f);
> +   emit_op_modrm(p, 0x6f, 0x7f, dst, src);
> +}
> +
> +void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   emit_2ub(p, 0xf2, 0x0f);
> +   emit_op_modrm(p, 0x10, 0x11, dst, src);
> +}
> +
> +void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   emit_2ub(p, 0x66, 0x0f);
> +   emit_op_modrm(p, 0x10, 0x11, dst, src);
> +}
> +
> +void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   emit_2ub(p, 0x66, 0x0f);
> +   emit_op_modrm(p, 0x28, 0x29, dst, src);
> +}
> +
>  /**
>   * Perform a reduced swizzle:
>   */
> @@ -1041,6 +1286,28 @@ void sse2_pshufd( struct x86_function *p,
>     emit_1ub(p, shuf);
>  }
> 
> +void sse2_pshuflw( struct x86_function *p,
> +                  struct x86_reg dst,
> +                  struct x86_reg src,
> +                  unsigned char shuf)
> +{
> +   DUMP_RRI( dst, src, shuf );
> +   emit_3ub(p, 0xf2, X86_TWOB, 0x70);
> +   emit_modrm(p, dst, src);
> +   emit_1ub(p, shuf);
> +}
> +
> +void sse2_pshufhw( struct x86_function *p,
> +                  struct x86_reg dst,
> +                  struct x86_reg src,
> +                  unsigned char shuf)
> +{
> +   DUMP_RRI( dst, src, shuf );
> +   emit_3ub(p, 0xf3, X86_TWOB, 0x70);
> +   emit_modrm(p, dst, src);
> +   emit_1ub(p, shuf);
> +}
> +
>  void sse2_cvttps2dq( struct x86_function *p,
>                       struct x86_reg dst,
>                       struct x86_reg src )
> @@ -1059,6 +1326,24 @@ void sse2_cvtps2dq( struct x86_function *p,
>     emit_modrm( p, dst, src );
>  }
> 
> +void sse2_cvtsd2ss( struct x86_function *p,
> +                    struct x86_reg dst,
> +                    struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_3ub(p, 0xf2, 0x0f, 0x5a);
> +   emit_modrm( p, dst, src );
> +}
> +
> +void sse2_cvtpd2ps( struct x86_function *p,
> +                    struct x86_reg dst,
> +                    struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_3ub(p, 0x66, 0x0f, 0x5a);
> +   emit_modrm( p, dst, src );
> +}
> +
>  void sse2_packssdw( struct x86_function *p,
>                     struct x86_reg dst,
>                     struct x86_reg src )
> @@ -1095,6 +1380,97 @@ void sse2_punpcklbw( struct x86_function *p,
>     emit_modrm( p, dst, src );
>  }
> 
> +void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_3ub(p, 0x66, 0x0f, 0x61);
> +   emit_modrm( p, dst, src );
> +}
> +
> +void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_3ub(p, 0x66, 0x0f, 0x62);
> +   emit_modrm( p, dst, src );
> +}
> +
> +void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR( dst, src );
> +   emit_3ub(p, 0x66, 0x0f, 0x6c);
> +   emit_modrm( p, dst, src );
> +}
> +
> +void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x71);
> +   emit_modrm_noreg(p, 6, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x72);
> +   emit_modrm_noreg(p, 6, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x73);
> +   emit_modrm_noreg(p, 6, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x71);
> +   emit_modrm_noreg(p, 2, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x72);
> +   emit_modrm_noreg(p, 2, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x73);
> +   emit_modrm_noreg(p, 2, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x71);
> +   emit_modrm_noreg(p, 4, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
> +{
> +   DUMP_RI(dst, imm);
> +   emit_3ub(p, 0x66, 0x0f, 0x72);
> +   emit_modrm_noreg(p, 4, dst);
> +   emit_1ub(p, imm);
> +}
> +
> +void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
> +{
> +   DUMP_RR(dst, src);
> +   emit_3ub(p, 0x66, 0x0f, 0xeb);
> +   emit_modrm(p, dst, src);
> +}
> 
>  void sse2_rcpps( struct x86_function *p,
>                   struct x86_reg dst,
> @@ -1114,18 +1490,6 @@ void sse2_rcpss( struct x86_function *p,
>     emit_modrm( p, dst, src );
>  }
> 
> -void sse2_movd( struct x86_function *p,
> -               struct x86_reg dst,
> -               struct x86_reg src )
> -{
> -   DUMP_RR( dst, src );
> -   emit_2ub(p, 0x66, X86_TWOB);
> -   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
> -}
> -
> -
> -
> -
>  /***********************************************************************
>   * x87 instructions
>   */
> @@ -1716,17 +2080,56 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
>  }
> 
> 
> -/* Retreive a reference to one of the function arguments, taking into
> - * account any push/pop activity:
> - */
>  struct x86_reg x86_fn_arg( struct x86_function *p,
> -                          unsigned arg )
> +                           unsigned arg )
>  {
> -   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> +   if(util_cpu_arch == UTIL_CPU_ARCH_X86_64)
> +   {
> +      /* Microsoft uses a different calling convention than the rest of the world */
> +      if(util_cpu_abi == UTIL_CPU_ABI_WIN64)
> +      {
> +         switch(arg)
> +         {
> +         case 1:
> +            return x86_make_reg(file_REG32, reg_CX);
> +         case 2:
> +            return x86_make_reg(file_REG32, reg_DX);
> +         case 3:
> +            return x86_make_reg(file_REG32, reg_R8);
> +         case 4:
> +            return x86_make_reg(file_REG32, reg_R9);
> +         default:
> +            return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> +                  p->stack_offset + (arg - 4) * 8);     /* ??? */
> +         }
> +      }
> +      else
> +      {
> +         switch(arg)
> +         {
> +         case 1:
> +            return x86_make_reg(file_REG32, reg_DI);
> +         case 2:
> +            return x86_make_reg(file_REG32, reg_SI);
> +         case 3:
> +            return x86_make_reg(file_REG32, reg_DX);
> +         case 4:
> +            return x86_make_reg(file_REG32, reg_CX);
> +         case 5:
> +            return x86_make_reg(file_REG32, reg_R8);
> +         case 6:
> +            return x86_make_reg(file_REG32, reg_R9);
> +         default:
> +            return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
> +                              p->stack_offset + (arg - 6) * 8);     /* ??? */
> +         }
> +      }
> +   }
> +   else
> +      return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
>                         p->stack_offset + arg * 4);     /* ??? */
>  }
> 
> -
>  void x86_init_func( struct x86_function *p )
>  {
>     p->size = 0;
> diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> index 365dec1..a23fc5e 100644
> --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
> @@ -26,15 +26,15 @@
> 
>  #include "pipe/p_config.h"
> 
> -#if defined(PIPE_ARCH_X86)
> +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> 
>  /* It is up to the caller to ensure that instructions issued are
>   * suitable for the host cpu.  There are no checks made in this module
>   * for mmx/sse/sse2 support on the cpu.
>   */
>  struct x86_reg {
> -   unsigned file:3;
> -   unsigned idx:3;
> +   unsigned file:2;
> +   unsigned idx:4;
>     unsigned mod:2;             /* mod_REG if this is just a register */
>     int      disp:24;           /* only +/- 23bits of offset - should be enough... */
>  };
> @@ -75,7 +75,15 @@ enum x86_reg_name {
>     reg_SP,
>     reg_BP,
>     reg_SI,
> -   reg_DI
> +   reg_DI,
> +   reg_R8,
> +   reg_R9,
> +   reg_R10,
> +   reg_R11,
> +   reg_R12,
> +   reg_R13,
> +   reg_R14,
> +   reg_R15
>  };
> 
> 
> @@ -138,6 +146,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
>   */
>  int x86_get_label( struct x86_function *p );
> 
> +void x64_rexw(struct x86_function *p);
> +
>  void x86_jcc( struct x86_function *p,
>               enum x86_cc cc,
>               int label );
> @@ -178,18 +188,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> 
> +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +
>  void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +
>  void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
>                    unsigned char shuf );
> +void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
> +                  unsigned char shuf );
> +void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
> +                  unsigned char shuf );
>  void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> 
> +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +
> +void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +
> +void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +
> +void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
> +
> +void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +
> +void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
> +void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
> +void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
> 
>  void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
>  void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
> @@ -227,7 +273,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
>  void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
> -void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
> 
>  void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> @@ -237,8 +282,13 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
>  void x86_inc( struct x86_function *p, struct x86_reg reg );
>  void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> -void x86_movzx8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> -void x86_movzx16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> +void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
> +void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
> +void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
>  void x86_mul( struct x86_function *p, struct x86_reg src );
>  void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
> @@ -252,7 +302,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
>  void x86_sahf( struct x86_function *p );
>  void x86_div( struct x86_function *p, struct x86_reg src );
> -
> +void x86_bswap( struct x86_function *p, struct x86_reg src );
> +void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
> +void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
> +void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  );
> 
>  void x86_cdecl_caller_push_regs( struct x86_function *p );
>  void x86_cdecl_caller_pop_regs( struct x86_function *p );
> --
> 1.7.0.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev




More information about the mesa-dev mailing list