[Spice-devel] [PATCH 12/12] display: use RtlCopyMemory instead of fast_memcpy_*

Alon Levy alevy at redhat.com
Wed Apr 30 07:40:36 PDT 2014


It is common practice to use RtlCopyMemory. Currently it is defined by the
preprocessor to be memcpy, which outperforms our implementation, see
previous benchmark commit.

This commit removes the benchmarks and our internal implementation in
favor of RtlCopyMemory.

RHBZ: 705785
---
 xddm/build.bat                         |   0
 xddm/display/amd64/x64.asm             | 250 ---------------------
 xddm/display/driver.c                  |   7 -
 xddm/display/res.c                     | 392 ++-------------------------------
 xddm/display/res.h                     |   3 -
 xddm/display/sources                   |   4 +-
 xddm/tests/benchmark_format_results.py |  38 ----
 xddm/tests/build_benchmark.bat         |   7 -
 8 files changed, 17 insertions(+), 684 deletions(-)
 mode change 100755 => 100644 xddm/build.bat
 delete mode 100644 xddm/display/amd64/x64.asm
 delete mode 100644 xddm/tests/benchmark_format_results.py
 delete mode 100644 xddm/tests/build_benchmark.bat

diff --git a/xddm/build.bat b/xddm/build.bat
old mode 100755
new mode 100644
diff --git a/xddm/display/amd64/x64.asm b/xddm/display/amd64/x64.asm
deleted file mode 100644
index bb45d33..0000000
--- a/xddm/display/amd64/x64.asm
+++ /dev/null
@@ -1,250 +0,0 @@
-Extern have_sse2:DWORD
-
-.code
-
-CheckAndSetSSE2 proc
-    mov eax, 0000001h
-    cpuid
-    and edx, 4000000h
-    shr edx, 26
-    mov have_sse2, edx
-    ret
-CheckAndSetSSE2 endp
-
-RestoreFPU proc
-; rcx PDev *pdev
-; rdx size_t aligned_addr
-    movdqa xmm0, [rcx]
-    movdqa xmm1, [rcx + 16]
-    movdqa xmm2, [rcx + 32]
-    movdqa xmm3, [rcx + 48]
-    ret
-RestoreFPU endp
-
-SaveFPU proc
-; rcx PDev *pdev
-; rdx size_t aligned_addr
-    movdqa [rcx], xmm0
-    movdqa [rcx + 16], xmm1
-    movdqa [rcx + 32], xmm2
-    movdqa [rcx + 48], xmm3
-    ret
-SaveFPU endp
-
-fast_memcpy_aligned proc
-; rcx void *dest
-; rdx const void *src
-; r8 size_t len
-    ; Save rsi and rdi
-    mov r9, rsi
-    mov r10, rdi
-
-    mov rsi, rdx
-    mov rdi, rcx
-    mov rcx, r8
-
-    cmp rcx, 128
-    jb try_to_copy64
-
-    prefetchnta [rsi]
-    copy_128:
-        prefetchnta [rsi + 64]
-
-        movdqa xmm0, [rsi]
-        movdqa xmm1, [rsi + 16]
-        movdqa xmm2, [rsi + 32]
-        movdqa xmm3, [rsi + 48]
-
-        prefetchnta [rsi + 128]
-
-        movntdq [rdi], xmm0
-        movntdq [rdi + 16], xmm1
-        movntdq [rdi + 32], xmm2
-        movntdq [rdi + 48], xmm3
-
-        movdqa xmm0, [rsi + 64]
-        movdqa xmm1, [rsi + 80]
-        movdqa xmm2, [rsi + 96]
-        movdqa xmm3, [rsi + 112]
-
-        movntdq [rdi + 64], xmm0
-        movntdq [rdi + 80], xmm1
-        movntdq [rdi + 96], xmm2
-        movntdq [rdi + 112], xmm3
-
-        add rdi, 128
-        add rsi, 128
-        sub rcx, 128
-        cmp rcx, 128
-        jae copy_128
-
-   try_to_copy64:
-        cmp rcx, 64
-        jb try_to_copy32
-
-         movdqa xmm0, [rsi]
-         movdqa xmm1, [rsi + 16]
-         movdqa xmm2, [rsi + 32]
-         movdqa xmm3, [rsi + 48]
-
-         movntdq [rdi], xmm0
-         movntdq [rdi + 16], xmm1
-         movntdq [rdi + 32], xmm2
-         movntdq [rdi + 48], xmm3
-
-         add rdi, 64
-         add rsi, 64
-         sub rcx, 64
-         prefetchnta [rsi]
-
-    try_to_copy32:
-         cmp rcx, 32
-         jb try_to_copy16
-
-         movdqa xmm0, [rsi]
-         movdqa xmm1, [rsi + 16]
-         movntdq [rdi], xmm0
-         movntdq [rdi + 16], xmm1
-
-         add rdi, 32
-         add rsi, 32
-         sub rcx, 32
-
-    try_to_copy16:
-         cmp rcx, 16
-         jb try_to_copy4
-
-         movdqa xmm0, [rsi]
-         movntdq [rdi], xmm0
-
-         add rdi, 16
-         add rsi, 16
-         sub rcx, 16
-
-
-    try_to_copy4:
-        cmp rcx, 4
-        jb try_to_copy_1
-        movsd
-        sub rcx, 4
-        jmp try_to_copy4
-
-    try_to_copy_1:
-        rep movsb
-
-    sfence
-    ; Save rsi and rdi
-    mov rsi, r9
-    mov rdi, r10
-    ret
-fast_memcpy_aligned endp
-
-fast_memcpy_unaligned proc
-; rcx void *dest
-; rdx const void *src
-; r8 size_t len
-    ; Save rsi and rdi
-    mov r9, rsi
-    mov r10, rdi
-
-    mov rsi, rdx
-    mov rdi, rcx
-    mov rcx, r8
-
-    cmp rcx, 128
-    jb try_to_copy64
-
-    prefetchnta [rsi]
-    copy_128:
-        prefetchnta [rsi + 64]
-
-        movdqu xmm0, [rsi]
-        movdqu xmm1, [rsi + 16]
-        movdqu xmm2, [rsi + 32]
-        movdqu xmm3, [rsi + 48]
-
-        prefetchnta [rsi + 128]
-
-        movntdq [rdi], xmm0
-        movntdq [rdi + 16], xmm1
-        movntdq [rdi + 32], xmm2
-        movntdq [rdi + 48], xmm3
-
-        movdqu xmm0, [rsi + 64]
-        movdqu xmm1, [rsi + 80]
-        movdqu xmm2, [rsi + 96]
-        movdqu xmm3, [rsi + 112]
-
-        movntdq [rdi + 64], xmm0
-        movntdq [rdi + 80], xmm1
-        movntdq [rdi + 96], xmm2
-        movntdq [rdi + 112], xmm3
-
-        add rdi, 128
-        add rsi, 128
-        sub rcx, 128
-        cmp rcx, 128
-        jae copy_128
-
-   try_to_copy64:
-        cmp rcx, 64
-        jb try_to_copy32
-
-         movdqu xmm0, [rsi]
-         movdqu xmm1, [rsi + 16]
-         movdqu xmm2, [rsi + 32]
-         movdqu xmm3, [rsi + 48]
-
-         movntdq [rdi], xmm0
-         movntdq [rdi + 16], xmm1
-         movntdq [rdi + 32], xmm2
-         movntdq [rdi + 48], xmm3
-
-         add rdi, 64
-         add rsi, 64
-         sub rcx, 64
-         prefetchnta [rsi]
-
-    try_to_copy32:
-         cmp rcx, 32
-         jb try_to_copy16
-
-         movdqu xmm0, [rsi]
-         movdqu xmm1, [rsi + 16]
-         movntdq [rdi], xmm0
-         movntdq [rdi + 16], xmm1
-
-         add rdi, 32
-         add rsi, 32
-         sub rcx, 32
-
-    try_to_copy16:
-         cmp rcx, 16
-         jb try_to_copy4
-
-         movdqu xmm0, [rsi]
-         movntdq [rdi], xmm0
-
-         add rdi, 16
-         add rsi, 16
-         sub rcx, 16
-
-
-    try_to_copy4:
-        cmp rcx, 4
-        jb try_to_copy_1
-        movsd
-        sub rcx, 4
-        jmp try_to_copy4
-
-    try_to_copy_1:
-        rep movsb
-
-    sfence
-    ; restore rsi and rdi
-    mov rsi, r9
-    mov rdi, r10
-    ret
-fast_memcpy_unaligned endp
-
-end
\ No newline at end of file
diff --git a/xddm/display/driver.c b/xddm/display/driver.c
index bed1d58..6d196fb 100644
--- a/xddm/display/driver.c
+++ b/xddm/display/driver.c
@@ -246,9 +246,6 @@ BOOL DrvEnableDriver(ULONG engine_version, ULONG enable_data_size, PDRVENABLEDAT
     mspace_set_abort_func(mspace_abort);
     mspace_set_print_func(mspace_print);
     ResInitGlobals();
-#ifndef _WIN64
-    CheckAndSetSSE2();
-#endif
     DEBUG_PRINT((NULL, 1, "%s: end\n", __FUNCTION__));
     return TRUE;
 }
@@ -903,8 +900,6 @@ VOID EnableQXLPrimarySurface(PDev *pdev)
     pdev->surf_enable = TRUE;
 }
 
-void benchmark_memcpy(PDev *pdev);
-
 HSURF DrvEnableSurface(DHPDEV in_pdev)
 {
     PDev *pdev;
@@ -943,8 +938,6 @@ HSURF DrvEnableSurface(DHPDEV in_pdev)
 
     EnableQXLPrimarySurface(pdev);
 
-    benchmark_memcpy(pdev);
-
     DEBUG_PRINT((pdev, 1, "%s: 0x%lx exit\n", __FUNCTION__, pdev));
     return surf;
 
diff --git a/xddm/display/res.c b/xddm/display/res.c
index 589218b..86ed47f 100644
--- a/xddm/display/res.c
+++ b/xddm/display/res.c
@@ -36,11 +36,6 @@
 #include "devioctl.h"
 #include "ntddvdeo.h"
 
-void SaveFPU(PDev *pdev, size_t aligned_addr);
-void RestoreFPU(PDev *pdev, size_t aligned_addr);
-void fast_memcpy_unaligned(void *dest, const void *src, size_t len);
-void fast_memcpy_aligned(void *dest, const void *src, size_t len);
-
 static _inline QXLPHYSICAL PA(PDev *pdev, PVOID virt, UINT8 slot_id)
 {
     PMemSlot *p_slot = &pdev->mem_slots[slot_id];
@@ -129,12 +124,6 @@ typedef struct QXLOutput {
     UINT8 data[0];
 } QXLOutput;
 
-#ifndef _WIN64
-static int have_sse2 = FALSE;
-#else
-int have_sse2 = FALSE;
-#endif
-
 #ifndef DBG
 static _inline void DebugShowOutput(PDev *pdev, QXLOutput* output)
 {
@@ -1077,285 +1066,22 @@ static BOOL SetClip(PDev *pdev, CLIPOBJ *clip, QXLDrawable *drawable)
     return TRUE;
 }
 
-#ifndef _WIN64
-
-static _inline void fast_memcpy_aligment(void *dest, const void *src, size_t len)
-{
-    _asm
-    {
-        mov ecx, len
-        mov esi, src
-        mov edi, dest
-
-        cmp ecx, 128
-        jb try_to_copy64
- 
-        prefetchnta [esi]
-        copy_128:
-            prefetchnta [esi + 64]
-
-            movdqa xmm0, [esi]
-            movdqa xmm1, [esi + 16]
-            movdqa xmm2, [esi + 32]
-            movdqa xmm3, [esi + 48]
-
-            prefetchnta [esi + 128]
-
-            movntdq [edi], xmm0
-            movntdq [edi + 16], xmm1
-            movntdq [edi + 32], xmm2
-            movntdq [edi + 48], xmm3
-
-            movdqa xmm0, [esi + 64]
-            movdqa xmm1, [esi + 80]
-            movdqa xmm2, [esi + 96]
-            movdqa xmm3, [esi + 112]
-
-            movntdq [edi + 64], xmm0
-            movntdq [edi + 80], xmm1
-            movntdq [edi + 96], xmm2
-            movntdq [edi + 112], xmm3
-
-            add edi, 128
-            add esi, 128
-            sub ecx, 128
-            cmp ecx, 128
-            jae copy_128
- 
-       try_to_copy64:
-            cmp ecx, 64
-            jb try_to_copy32
-
-             movdqa xmm0, [esi]
-             movdqa xmm1, [esi + 16]
-             movdqa xmm2, [esi + 32]
-             movdqa xmm3, [esi + 48]
-
-             movntdq [edi], xmm0
-             movntdq [edi + 16], xmm1
-             movntdq [edi + 32], xmm2
-             movntdq [edi + 48], xmm3
-             
-             add edi, 64
-             add esi, 64
-             sub ecx, 64
-             prefetchnta [esi]
-
-        try_to_copy32:
-             cmp ecx, 32
-             jb try_to_copy16
-
-             movdqa xmm0, [esi]
-             movdqa xmm1, [esi + 16] 
-             movntdq [edi], xmm0
-             movntdq [edi + 16], xmm1
-
-             add edi, 32 
-             add esi, 32 
-             sub ecx, 32
-
-        try_to_copy16:
-             cmp ecx, 16
-             jb try_to_copy4
-
-             movdqa xmm0, [esi]
-             movntdq [edi], xmm0
-
-             add edi, 16
-             add esi, 16
-             sub ecx, 16
-
-
-        try_to_copy4:
-            cmp ecx, 4
-            jb try_to_copy_1 
-            movsd
-            sub ecx, 4
-            jmp try_to_copy4
-
-        try_to_copy_1:     
-            rep movsb
-
-        sfence
-    }
-}
-
-static _inline void fast_memcpy_unaligment(void *dest, const void *src, size_t len)
-{
-    _asm
-    {
-        mov ecx, len
-        mov esi, src
-        mov edi, dest
-
-        cmp ecx, 128
-        jb try_to_copy64
- 
-        prefetchnta [esi]
-        copy_128:
-            prefetchnta [esi + 64]
-
-            movdqu xmm0, [esi]
-            movdqu xmm1, [esi + 16]
-            movdqu xmm2, [esi + 32]
-            movdqu xmm3, [esi + 48]
-
-            prefetchnta [esi + 128]
-
-            movntdq [edi], xmm0
-            movntdq [edi + 16], xmm1
-            movntdq [edi + 32], xmm2
-            movntdq [edi + 48], xmm3
-
-            movdqu xmm0, [esi + 64]
-            movdqu xmm1, [esi + 80]
-            movdqu xmm2, [esi + 96]
-            movdqu xmm3, [esi + 112]
-
-            movntdq [edi + 64], xmm0
-            movntdq [edi + 80], xmm1
-            movntdq [edi + 96], xmm2
-            movntdq [edi + 112], xmm3
-
-            add edi, 128
-            add esi, 128
-            sub ecx, 128
-            cmp ecx, 128
-            jae copy_128
- 
-       try_to_copy64:
-            cmp ecx, 64
-            jb try_to_copy32
-
-             movdqu xmm0, [esi]
-             movdqu xmm1, [esi + 16]
-             movdqu xmm2, [esi + 32]
-             movdqu xmm3, [esi + 48]
-
-             movntdq [edi], xmm0
-             movntdq [edi + 16], xmm1
-             movntdq [edi + 32], xmm2
-             movntdq [edi + 48], xmm3
-             
-             add edi, 64
-             add esi, 64
-             sub ecx, 64
-             prefetchnta [esi]
-
-        try_to_copy32:
-             cmp ecx, 32
-             jb try_to_copy16
-
-             movdqu xmm0, [esi]
-             movdqu xmm1, [esi + 16] 
-             movntdq [edi], xmm0
-             movntdq [edi + 16], xmm1
-
-             add edi, 32 
-             add esi, 32 
-             sub ecx, 32
-
-        try_to_copy16:
-             cmp ecx, 16
-             jb try_to_copy4
-
-             movdqu xmm0, [esi]
-             movntdq [edi], xmm0
-
-             add edi, 16
-             add esi, 16
-             sub ecx, 16
-
-
-        try_to_copy4:
-            cmp ecx, 4
-            jb try_to_copy_1 
-            movsd
-            sub ecx, 4
-            jmp try_to_copy4
-
-        try_to_copy_1:     
-            rep movsb
-
-        sfence
-    }
-}
-
-#endif
-
-uint64_t time_usecs(void)
-{
-  ENG_TIME_FIELDS systime;
-  EngQueryLocalTime(&systime);
-  return (uint64_t)(systime.usMilliseconds * 1000 + systime.usSecond * 1e6 +
-                     systime.usMinute * 60e6 + systime.usHour * 3600e6);
-}
-
-void benchmark_memcpy(PDev *pdev)
-{
-  size_t i;
-  unsigned char *src_unaligned;
-  unsigned char *dest_unaligned;
-  uint64_t start, total1, total2;
-  unsigned char *src = NULL;
-  unsigned char *dest = NULL;
-  size_t size = 1024;
-  size_t iter = 1024 * 1024;
-
-  for (size = 1024; size < 1024*1024*2; size *= 2, iter /= 2) {
-    src_unaligned = EngAllocMem(0, size + 31, ALLOC_TAG);
-    dest_unaligned = EngAllocMem(0, size + 31, ALLOC_TAG);
-    src = (unsigned char *)((size_t)(src_unaligned + 31) & ~0x1f);
-    dest = (unsigned char *)((size_t)(dest_unaligned + 31) & ~0x1f);
-
-    for (i = 0 ; i < size ; ++i)
-      src[i] = i;
-
-    start = time_usecs();
-    for (i = 0 ; i < iter ; ++i) {
-      fast_memcpy_aligned(dest, src, size);
-    }
-    total2 = time_usecs() - start;
-
-    {
-      int errors = 0;
-      for (i = 0 ; i < size ; ++i) {
-        if (dest[i] != src[i]) {
-          errors++;
-        }
-      }
-      if (errors > 0) {
-        DEBUG_PRINT((pdev, 1, "!!! copy errors %d !!!\n", errors));
-      }
-    }
-
-    start = time_usecs();
-    for (i = 0 ; i < iter ; ++i)
-      memcpy(dest, src, size);
-    total1 = time_usecs() - start;
-
-    DEBUG_PRINT((pdev, 1, "%d: %lld, %lld\n", size, total1, total2));
-    EngFreeMem(src_unaligned);
-    EngFreeMem(dest_unaligned);
-  }
-}
-
 #ifdef DBG
     #define PutBytesAlign __PutBytesAlign
-#define PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size, use_sse)\
-    __PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, 1, use_sse)
+#define PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size)\
+    __PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, 1)
 #else
-#define  PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, alignment, use_sse)\
-    __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, alignment, use_sse)
-#define  PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size, use_sse)\
-    __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, 1, use_sse)
+#define  PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, alignment)\
+    __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, alignment)
+#define  PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size)\
+    __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, 1)
 #endif
 
 #define BITS_BUF_MAX (64 * 1024)
 
 static void __PutBytesAlign(PDev *pdev, QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
                             UINT8 **end_ptr, UINT8 *src, int size, int *page_counter,
-                            size_t alloc_size, uint32_t alignment, BOOL use_sse)
+                            size_t alloc_size, uint32_t alignment)
 {
     QXLDataChunk *chunk = *chunk_ptr;
     UINT8 *now = *now_ptr;
@@ -1374,34 +1100,7 @@ static void __PutBytesAlign(PDev *pdev, QXLDataChunk **chunk_ptr, UINT8 **now_pt
             NEW_DATA_CHUNK(page_counter, aligned_size);
             cp_size = (int)MIN(end - now, size);
         }
-        if (use_sse) {
-            offset = (size_t)now & SSE_MASK;
-            if (offset) {
-                offset = SSE_ALIGN - offset;
-                if (offset >= cp_size) {
-                    RtlCopyMemory(now, src, cp_size);
-                    src += cp_size;
-                    now += cp_size;
-                    chunk->data_size += cp_size;
-                    size -= cp_size;
-                    continue;
-                }
-                RtlCopyMemory(now, src,  offset);
-                now += offset;
-                src += offset;
-                size -= offset;
-                cp_size -= offset;
-                chunk->data_size += offset;
-            }
-    
-            if (((size_t)src & SSE_MASK) == 0) {
-                fast_memcpy_aligment(now, src, cp_size);
-            } else {
-                fast_memcpy_unaligment(now, src, cp_size);
-            }
-        } else {
-            RtlCopyMemory(now, src, cp_size);
-        }
+        RtlCopyMemory(now, src, cp_size);
         src += cp_size;
         now += cp_size;
         chunk->data_size += cp_size;
@@ -1871,36 +1570,6 @@ static void FreeBitmapImage(PDev *pdev, Resource *res) // todo: defer
     DEBUG_PRINT((pdev, 13, "%s: done\n", __FUNCTION__));
 }
 
-#ifndef _WIN64
-
-static _inline void RestoreFPU(PDev *pdev, size_t aligned_addr)
-{
-    _asm
-    {
-        mov esi, aligned_addr
-
-        movdqa xmm0, [esi]
-        movdqa xmm1, [esi + 16]
-        movdqa xmm2, [esi + 32]
-        movdqa xmm3, [esi + 48]
-    }
-}
-
-static _inline void SaveFPU(PDev *pdev, size_t aligned_addr)
-{
-    _asm
-    {
-        mov edi, aligned_addr
-    
-        movdqa [edi], xmm0
-        movdqa [edi + 16], xmm1
-        movdqa [edi + 32], xmm2
-        movdqa [edi + 48], xmm3
-    }
-}
-
-#endif
-
 static void FreeSurfaceImage(PDev *pdev, Resource *res)
 {
     DEBUG_PRINT((pdev, 12, "%s\n", __FUNCTION__));
@@ -1923,9 +1592,6 @@ static _inline Resource *GetBitmapImage(PDev *pdev, SURFOBJ *surf, XLATEOBJ *col
     UINT8 *src_end;
     UINT8 *dest;
     UINT8 *dest_end;
-    UINT8 FPUSaveUnaligned[16 * 4 + 15];
-    size_t FPUSave = ALIGN((size_t)(FPUSaveUnaligned), SSE_ALIGN);
-    BOOL use_sse = FALSE;
 
     DEBUG_PRINT((pdev, 12, "%s\n", __FUNCTION__));
     ASSERT(pdev, width > 0 && height > 0);
@@ -1963,16 +1629,9 @@ static _inline Resource *GetBitmapImage(PDev *pdev, SURFOBJ *surf, XLATEOBJ *col
     dest_end = (UINT8 *)image_res + alloc_size;
     alloc_size = height * line_size;
 
-    if (have_sse2 && alloc_size >= 1024) {
-        use_sse = TRUE;
-        SaveFPU(pdev, FPUSave);
-    }
     for (; src != src_end; src -= surf->lDelta, alloc_size -= line_size) {
         PutBytesAlign(pdev, &chunk, &dest, &dest_end, src, line_size,
-                      &pdev->num_bits_pages, alloc_size, line_size, use_sse);
-    }
-    if (use_sse) {
-        RestoreFPU(pdev, FPUSave);
+                      &pdev->num_bits_pages, alloc_size, line_size);
     }
 
     GetPallette(pdev, &internal->image.bitmap, color_trans);
@@ -3104,7 +2763,7 @@ static BOOL GetCursorCommon(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_
     src_end = src + (local_surf->lDelta * local_surf->sizlBitmap.cy);
     for (; src != src_end; src += local_surf->lDelta) {
         PutBytes(pdev, &info->chunk, &info->now, &info->end, src, line_size,
-                 &pdev->num_cursor_pages, PAGE_SIZE, FALSE);
+                 &pdev->num_cursor_pages, PAGE_SIZE);
     }
 
     CursorCacheAdd(pdev, internal);
@@ -3223,14 +2882,14 @@ BOOL GetColorCursor(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_y, SURFO
 
             if (pdev->bitmap_format == BMF_32BPP) {
                 PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)color_trans->pulXlate,
-                         256 << 2, &pdev->num_cursor_pages, PAGE_SIZE, FALSE);
+                         256 << 2, &pdev->num_cursor_pages, PAGE_SIZE);
             } else {
                 int i;
 
                 for (i = 0; i < 256; i++) {
                     UINT32 ent = _16bppTo32bpp(color_trans->pulXlate[i]);
                     PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)&ent,
-                             4, &pdev->num_cursor_pages, PAGE_SIZE, FALSE);
+                             4, &pdev->num_cursor_pages, PAGE_SIZE);
                 }
             }
             info.cursor->data_size += 256 << 2;
@@ -3243,14 +2902,14 @@ BOOL GetColorCursor(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_y, SURFO
 
             if (pdev->bitmap_format == BMF_32BPP) {
                 PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)color_trans->pulXlate,
-                         16 << 2, &pdev->num_cursor_pages, PAGE_SIZE, FALSE);
+                         16 << 2, &pdev->num_cursor_pages, PAGE_SIZE);
             } else {
                 int i;
 
                 for (i = 0; i < 16; i++) {
                     UINT32 ent = _16bppTo32bpp(color_trans->pulXlate[i]);
                     PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)&ent,
-                             4, &pdev->num_cursor_pages, PAGE_SIZE, FALSE);
+                             4, &pdev->num_cursor_pages, PAGE_SIZE);
                 }
             }
             info.cursor->data_size += 16 << 2;
@@ -3266,7 +2925,7 @@ BOOL GetColorCursor(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_y, SURFO
 
         for (; src != src_end; src += mask->lDelta) {
             PutBytes(pdev, &info.chunk, &info.now, &info.end, src, line_size,
-                     &pdev->num_cursor_pages, PAGE_SIZE, FALSE);
+                     &pdev->num_cursor_pages, PAGE_SIZE);
         }
     }
 
@@ -3403,23 +3062,4 @@ void ResDestroyGlobals()
 {
     EngDeleteSemaphore(image_id_sem);
     image_id_sem = NULL;
-}
-
-#ifndef _WIN64
-
-void CheckAndSetSSE2()
-{
-    _asm
-    {
-        mov eax, 0x0000001
-        cpuid
-        and edx, 0x4000000
-        mov have_sse2, edx
-    }
-
-    if (have_sse2) {
-        have_sse2 = TRUE;
-    }
-}
-
-#endif
+}
\ No newline at end of file
diff --git a/xddm/display/res.h b/xddm/display/res.h
index 4d179c5..0263fcf 100644
--- a/xddm/display/res.h
+++ b/xddm/display/res.h
@@ -67,9 +67,6 @@ BOOL ResInit(PDev *pdev);
 void ResDestroy(PDev *pdev);
 void ResInitGlobals();
 void ResDestroyGlobals();
-#ifndef _WIN64
-void CheckAndSetSSE2();
-#endif
 void EmptyReleaseRing(PDev *pdev);
 void InitDeviceMemoryResources(PDev *pdev);
 void ReleaseCacheDeviceMemoryResources(PDev *pdev);
diff --git a/xddm/display/sources b/xddm/display/sources
index 8a4f664..a6d3eaf 100644
--- a/xddm/display/sources
+++ b/xddm/display/sources
@@ -30,6 +30,4 @@ SOURCES=driver.c        \
         mspace.c        \
         quic.c          \
         surface.c       \
-        driver.rc
-
-AMD64_SOURCES=amd64\x64.asm
+        driver.rc
\ No newline at end of file
diff --git a/xddm/tests/benchmark_format_results.py b/xddm/tests/benchmark_format_results.py
deleted file mode 100644
index 96d302b..0000000
--- a/xddm/tests/benchmark_format_results.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import sys
-
-win7_32="""qxl/guest-0: 96463384453: qxldd: 1024: 47000, 109000
-qxl/guest-0: 96591785177: qxldd: 2048: 31000, 109000
-qxl/guest-0: 96722899152: qxldd: 4096: 16000, 109000
-qxl/guest-0: 96851422238: qxldd: 8192: 31000, 94000
-qxl/guest-0: 97013842048: qxldd: 16384: 31000, 141000
-qxl/guest-0: 97167323122: qxldd: 32768: 31000, 125000
-qxl/guest-0: 97316872306: qxldd: 65536: 31000, 109000
-qxl/guest-0: 97465747407: qxldd: 131072: 47000, 109000
-qxl/guest-0: 97624668249: qxldd: 262144: 47000, 109000
-qxl/guest-0: 97785876639: qxldd: 524288: 62000, 94000
-qxl/guest-0: 97953480643: qxldd: 1048576: 62000, 110000
-"""
-
-win7_64="""
-qxl/guest-0: 2278149101498: qxldd: 1024: 78000, 109000
-qxl/guest-0: 2278288271327: qxldd: 2048: 46000, 94000
-qxl/guest-0: 2278428135167: qxldd: 4096: 47000, 94000
-qxl/guest-0: 2278575078269: qxldd: 8192: 47000, 93000
-qxl/guest-0: 2278734906600: qxldd: 16384: 47000, 109000
-qxl/guest-0: 2278896881683: qxldd: 32768: 63000, 109000
-qxl/guest-0: 2279073699223: qxldd: 65536: 46000, 125000
-qxl/guest-0: 2279250403663: qxldd: 131072: 62000, 110000
-qxl/guest-0: 2279467314681: qxldd: 262144: 93000, 125000
-qxl/guest-0: 2279693375414: qxldd: 524288: 109000, 125000
-qxl/guest-0: 2279929972847: qxldd: 1048576: 109000, 125000
-"""
-
-filt = lambda txt: filt2(filt1(txt))
-filt2 = lambda data: [(s, system, ours*100.0/system) for t, s, system, ours in data]
-filt1 = lambda txt: map(lambda a: (int(a[1][:-1]), int(a[3][:-1]), int(a[4][:-1]), int(a[5])), map(lambda l: l.strip().split(), [l for l in txt.split('\n') if l.strip() != '']))
-display = lambda txt: sys.stdout.write('\n'.join('%10s %10s' % (a, '%3.0f' % c) for a, b, c in filt(txt))+'\n')
-
-print('size [bytes]'.ljust(18) + 'our time/system time [percent]')
-display(win7_32)
-print('size [bytes]'.ljust(18) + 'our time/system time [percent]')
-display(win7_64)
diff --git a/xddm/tests/build_benchmark.bat b/xddm/tests/build_benchmark.bat
deleted file mode 100644
index a184249..0000000
--- a/xddm/tests/build_benchmark.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-cl /Zi /nologo /c /I %CRT_INC_PATH% ..\display\benchmark_memcpy.c
-if defined AMD64 (
-ml64 /c /Zd ..\display\amd64\x64.asm
-link /nologo /debug /libpath:%BASEDIR%\lib\crt\amd64\ /libpath:%DDK_LIB_DEST%\amd64 x64.obj benchmark_memcpy.obj
-) else (
-link /nologo /debug /libpath:%BASEDIR%\lib\crt\i386\ /libpath:%DDK_LIB_DEST%\i386 benchmark_memcpy.obj
-)
-- 
1.9.0



More information about the Spice-devel mailing list