[Beignet] [PATCH newRT] Rename intel dir to gen.
junyan.he at inbox.com
junyan.he at inbox.com
Thu Mar 2 08:40:26 UTC 2017
From: Junyan He <junyan.he at intel.com>
We will put all the GEM related content to this dir.
The gen name is much clearer than the intel name.
Signed-off-by: Junyan He <junyan.he at intel.com>
---
src/CMakeLists.txt | 6 +-
src/cl_driver.cpp | 2 +-
src/cl_image.c | 2 +-
src/gen/intel_batchbuffer.c | 189 +++
src/gen/intel_batchbuffer.h | 151 ++
src/gen/intel_cl_gl_share_image_info.h | 18 +
src/gen/intel_defines.h | 351 ++++
src/gen/intel_driver.c | 1042 ++++++++++++
src/gen/intel_driver.h | 150 ++
src/gen/intel_gpgpu.c | 2581 ++++++++++++++++++++++++++++++
src/gen/intel_gpgpu.h | 98 ++
src/gen/intel_structs.h | 832 ++++++++++
src/intel/intel_batchbuffer.c | 189 ---
src/intel/intel_batchbuffer.h | 151 --
src/intel/intel_cl_gl_share_image_info.h | 18 -
src/intel/intel_defines.h | 351 ----
src/intel/intel_driver.c | 1042 ------------
src/intel/intel_driver.h | 150 --
src/intel/intel_gpgpu.c | 2581 ------------------------------
src/intel/intel_gpgpu.h | 98 --
src/intel/intel_structs.h | 832 ----------
21 files changed, 5417 insertions(+), 5417 deletions(-)
create mode 100644 src/gen/intel_batchbuffer.c
create mode 100644 src/gen/intel_batchbuffer.h
create mode 100644 src/gen/intel_cl_gl_share_image_info.h
create mode 100644 src/gen/intel_defines.h
create mode 100644 src/gen/intel_driver.c
create mode 100644 src/gen/intel_driver.h
create mode 100644 src/gen/intel_gpgpu.c
create mode 100644 src/gen/intel_gpgpu.h
create mode 100644 src/gen/intel_structs.h
delete mode 100644 src/intel/intel_batchbuffer.c
delete mode 100644 src/intel/intel_batchbuffer.h
delete mode 100644 src/intel/intel_cl_gl_share_image_info.h
delete mode 100644 src/intel/intel_defines.h
delete mode 100644 src/intel/intel_driver.c
delete mode 100644 src/intel/intel_driver.h
delete mode 100644 src/intel/intel_gpgpu.c
delete mode 100644 src/intel/intel_gpgpu.h
delete mode 100644 src/intel/intel_structs.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f3c4632..94e97ba 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -101,9 +101,9 @@ set(OPENCL_SRC
cl_driver.h
cl_driver.cpp
cl_driver_defs.c
- intel/intel_gpgpu.c
- intel/intel_batchbuffer.c
- intel/intel_driver.c
+ gen/intel_gpgpu.c
+ gen/intel_batchbuffer.c
+ gen/intel_driver.c
performance.c)
if (X11_FOUND)
diff --git a/src/cl_driver.cpp b/src/cl_driver.cpp
index 03b980e..e0d2ae3 100644
--- a/src/cl_driver.cpp
+++ b/src/cl_driver.cpp
@@ -18,7 +18,7 @@
*/
extern "C" {
-#include "intel/intel_driver.h"
+#include "gen/intel_driver.h"
#include "cl_utils.h"
#include <stdlib.h>
#include <string.h>
diff --git a/src/cl_image.c b/src/cl_image.c
index 5ff459a..89b5c72 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -19,7 +19,7 @@
#include "cl_image.h"
#include "cl_utils.h"
-#include "intel/intel_defines.h"
+#include "gen/intel_defines.h"
#include <assert.h>
diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
new file mode 100644
index 0000000..61bf363
--- /dev/null
+++ b/src/gen/intel_batchbuffer.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+LOCAL int
+intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
+{
+ if (batch->buffer != NULL) {
+ dri_bo_unreference(batch->buffer);
+ batch->buffer = NULL;
+ batch->last_bo = NULL;
+ }
+
+ batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+ "batch buffer",
+ sz,
+ 64);
+ if (!batch->buffer || (dri_bo_map(batch->buffer, 1) != 0)) {
+ if (batch->buffer)
+ dri_bo_unreference(batch->buffer);
+ batch->buffer = NULL;
+ return -1;
+ }
+ batch->map = (uint8_t*) batch->buffer->virtual;
+ batch->size = sz;
+ batch->ptr = batch->map;
+ batch->atomic = 0;
+ batch->last_bo = batch->buffer;
+ batch->enable_slm = 0;
+ return 0;
+}
+
+LOCAL void
+intel_batchbuffer_init(intel_batchbuffer_t *batch, intel_driver_t *intel)
+{
+ assert(intel);
+ batch->intel = intel;
+}
+
+LOCAL void
+intel_batchbuffer_terminate(intel_batchbuffer_t *batch)
+{
+ assert(batch->buffer);
+
+ if (batch->map) {
+ dri_bo_unmap(batch->buffer);
+ batch->map = NULL;
+ }
+
+ dri_bo_unreference(batch->buffer);
+ batch->buffer = NULL;
+}
+
+LOCAL int
+intel_batchbuffer_flush(intel_batchbuffer_t *batch)
+{
+ uint32_t used = batch->ptr - batch->map;
+ int is_locked = batch->intel->locked;
+ int err = 0;
+
+ if (used == 0)
+ return 0;
+
+ if ((used & 4) == 0) {
+ *(uint32_t*) batch->ptr = 0;
+ batch->ptr += 4;
+ }
+
+ *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ dri_bo_unmap(batch->buffer);
+ batch->ptr = batch->map = NULL;
+
+ if (!is_locked)
+ intel_driver_lock_hardware(batch->intel);
+
+ int flag = I915_EXEC_RENDER;
+ if(batch->enable_slm) {
+ /* use the hard code here temp, must change to
+ * I915_EXEC_ENABLE_SLM when it drm accept the patch */
+ flag |= (1<<13);
+ }
+ if (drm_intel_gem_bo_context_exec(batch->buffer, batch->intel->ctx, used, flag) < 0) {
+ fprintf(stderr, "drm_intel_gem_bo_context_exec() failed: %s\n", strerror(errno));
+ err = -1;
+ }
+
+ if (!is_locked)
+ intel_driver_unlock_hardware(batch->intel);
+
+ return err;
+}
+
+LOCAL void
+intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
+ dri_bo *bo,
+ uint32_t read_domains,
+ uint32_t write_domains,
+ uint32_t delta)
+{
+ assert(batch->ptr - batch->map < batch->size);
+ dri_bo_emit_reloc(batch->buffer,
+ read_domains,
+ write_domains,
+ delta,
+ batch->ptr - batch->map,
+ bo);
+ intel_batchbuffer_emit_dword(batch, bo->offset + delta);
+}
+
+LOCAL intel_batchbuffer_t*
+intel_batchbuffer_new(intel_driver_t *intel)
+{
+ intel_batchbuffer_t *batch = NULL;
+ assert(intel);
+ TRY_ALLOC_NO_ERR (batch, CALLOC(intel_batchbuffer_t));
+ intel_batchbuffer_init(batch, intel);
+
+exit:
+ return batch;
+error:
+ intel_batchbuffer_delete(batch);
+ batch = NULL;
+ goto exit;
+}
+
+LOCAL void
+intel_batchbuffer_delete(intel_batchbuffer_t *batch)
+{
+ if (batch == NULL)
+ return;
+ if(batch->buffer)
+ intel_batchbuffer_terminate(batch);
+
+ cl_free(batch);
+}
diff --git a/src/gen/intel_batchbuffer.h b/src/gen/intel_batchbuffer.h
new file mode 100644
index 0000000..0544e9a
--- /dev/null
+++ b/src/gen/intel_batchbuffer.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _INTEL_BATCHBUFFER_H_
+#define _INTEL_BATCHBUFFER_H_
+
+#include "intel_defines.h"
+#include "cl_utils.h"
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+#include <stdint.h>
+#include <memory.h>
+#include <assert.h>
+
+#define BEGIN_BATCH(b, n) do { \
+ intel_batchbuffer_require_space(b, (n) * 4); \
+} while (0)
+
+#define OUT_BATCH(b, d) do { \
+ intel_batchbuffer_emit_dword(b, d); \
+} while (0)
+
+#define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \
+ assert((delta) >= 0); \
+ intel_batchbuffer_emit_reloc(b, bo, read_domains, write_domain, delta); \
+} while (0)
+
+#define ADVANCE_BATCH(b) do { } while (0)
+
+struct intel_driver;
+
+typedef struct intel_batchbuffer
+{
+ struct intel_driver *intel;
+ drm_intel_bo *buffer;
+ /** Last bo submitted to the hardware. used for clFinish. */
+ drm_intel_bo *last_bo;
+ uint32_t size;
+ uint8_t *map;
+ uint8_t *ptr;
+ /** HSW: can't set LRI in batch buffer, set I915_EXEC_ENABLE_SLM
+ * flag when call exec. */
+ uint8_t enable_slm;
+ int atomic;
+} intel_batchbuffer_t;
+
+extern intel_batchbuffer_t* intel_batchbuffer_new(struct intel_driver*);
+extern void intel_batchbuffer_delete(intel_batchbuffer_t*);
+extern void intel_batchbuffer_emit_reloc(intel_batchbuffer_t*,
+ drm_intel_bo*,
+ uint32_t read_domains,
+ uint32_t write_domains,
+ uint32_t delta);
+extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*);
+extern void intel_batchbuffer_terminate(intel_batchbuffer_t*);
+extern int intel_batchbuffer_flush(intel_batchbuffer_t*);
+extern int intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz);
+
+static INLINE uint32_t
+intel_batchbuffer_space(const intel_batchbuffer_t *batch)
+{
+ assert(batch->ptr);
+ return batch->size - (batch->ptr - batch->map);
+}
+
+static INLINE void
+intel_batchbuffer_emit_dword(intel_batchbuffer_t *batch, uint32_t x)
+{
+ assert(intel_batchbuffer_space(batch) >= 4);
+ *(uint32_t*)batch->ptr = x;
+ batch->ptr += 4;
+}
+
+static INLINE void
+intel_batchbuffer_require_space(intel_batchbuffer_t *batch, uint32_t size) {
+ assert(size < batch->size - 8);
+ if (intel_batchbuffer_space(batch) < size)
+ intel_batchbuffer_space(batch);
+}
+
+static INLINE uint8_t*
+intel_batchbuffer_alloc_space(intel_batchbuffer_t *batch, uint32_t size)
+{
+ assert(intel_batchbuffer_space(batch) >= size);
+ uint8_t *space_ptr = batch->ptr;
+ batch->ptr += size;
+ return space_ptr;
+}
+
+static INLINE void
+intel_batchbuffer_start_atomic(intel_batchbuffer_t *batch, uint32_t size)
+{
+ assert(!batch->atomic);
+ intel_batchbuffer_require_space(batch, size);
+ batch->atomic = 1;
+}
+
+static INLINE void
+intel_batchbuffer_end_atomic(intel_batchbuffer_t *batch)
+{
+ assert(batch->atomic);
+ batch->atomic = 0;
+}
+
+#endif /* _INTEL_BATCHBUFFER_H_ */
+
diff --git a/src/gen/intel_cl_gl_share_image_info.h b/src/gen/intel_cl_gl_share_image_info.h
new file mode 100644
index 0000000..21fbbd1
--- /dev/null
+++ b/src/gen/intel_cl_gl_share_image_info.h
@@ -0,0 +1,18 @@
+#ifndef __INTEL_CL_GL_SHARE_IMAGE_INFO_
+#define __INTEL_CL_GL_SHARE_IMAGE_INFO_
+
+struct _intel_cl_gl_share_image_info {
+ int fd;
+ size_t w;
+ size_t h;
+ size_t depth;
+ size_t pitch;
+ int tiling;
+ size_t offset;
+ size_t tile_x;
+ size_t tile_y;
+ unsigned int gl_format;
+ size_t row_pitch, slice_pitch;
+};
+
+#endif
diff --git a/src/gen/intel_defines.h b/src/gen/intel_defines.h
new file mode 100644
index 0000000..6ada30c
--- /dev/null
+++ b/src/gen/intel_defines.h
@@ -0,0 +1,351 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith at tungstengraphics.com>
+ */
+#ifndef __GENX_DEFINES_H__
+#define __GENX_DEFINES_H__
+
+#define CMD(PIPELINE,OP,SUB_OP) ((3 << 29) | \
+ ((PIPELINE) << 27) | \
+ ((OP) << 24) | \
+ ((SUB_OP) << 16))
+
+#define CMD_URB_FENCE CMD(0, 0, 0)
+#define CMD_CS_URB_STATE CMD(0, 0, 1)
+#define CMD_CONSTANT_BUFFER CMD(0, 0, 2)
+#define CMD_STATE_PREFETCH CMD(0, 0, 3)
+#define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3)
+#define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4)
+#define CMD_GPGPU_WALKER CMD(2, 1, 5)
+#define CMD_PIPE_CONTROL CMD(3, 2, 0)
+
+#define CMD_LOAD_REGISTER_IMM (0x22 << 23)
+
+#define CMD_STATE_BASE_ADDRESS CMD(0, 1, 1)
+#define CMD_STATE_SIP CMD(0, 1, 2)
+#define CMD_PIPELINE_SELECT CMD(1, 1, 4)
+#define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2)
+
+#define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0)
+#define CMD_MEDIA CMD(2, 1, 0)
+#define CMD_MEDIA_EX CMD(2, 1, 1)
+
+#define CMD_PIPELINED_POINTERS CMD(3, 0, 0)
+#define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1)
+#define CMD_VERTEX_BUFFERS CMD(3, 0, 8)
+#define CMD_VERTEX_ELEMENTS CMD(3, 0, 9)
+#define CMD_DRAWING_RECTANGLE CMD(3, 1, 0)
+#define CMD_CONSTANT_COLOR CMD(3, 1, 1)
+#define CMD_3DPRIMITIVE CMD(3, 3, 0)
+
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+#define PIPELINE_SELECT_GPGPU 2
+#define PIPELINE_SELECT_MASK (3 << 8)
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+#define FLOATING_POINT_IEEE_754 0
+#define FLOATING_POINT_NON_IEEE_754 1
+
+#define I965_SURFACE_1D 0
+#define I965_SURFACE_2D 1
+#define I965_SURFACE_3D 2
+#define I965_SURFACE_CUBE 3
+#define I965_SURFACE_BUFFER 4
+#define I965_SURFACE_NULL 7
+
+#define I965_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define I965_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define I965_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define I965_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define I965_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define I965_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define I965_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define I965_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define I965_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define I965_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define I965_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define I965_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define I965_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define I965_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define I965_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define I965_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define I965_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define I965_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define I965_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define I965_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define I965_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define I965_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define I965_SURFACEFORMAT_R32G32_SINT 0x086
+#define I965_SURFACEFORMAT_R32G32_UINT 0x087
+#define I965_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define I965_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define I965_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define I965_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define I965_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define I965_SURFACEFORMAT_R64_FLOAT 0x08D
+#define I965_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define I965_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define I965_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define I965_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define I965_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define I965_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define I965_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define I965_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define I965_SURFACEFORMAT_R32G32_USCALED 0x096
+#define I965_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define I965_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define I965_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define I965_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define I965_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define I965_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define I965_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define I965_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define I965_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define I965_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define I965_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define I965_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define I965_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define I965_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define I965_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define I965_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define I965_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define I965_SURFACEFORMAT_R32_SINT 0x0D6
+#define I965_SURFACEFORMAT_R32_UINT 0x0D7
+#define I965_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define I965_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define I965_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define I965_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define I965_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define I965_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define I965_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define I965_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define I965_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define I965_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define I965_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define I965_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define I965_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define I965_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define I965_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define I965_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define I965_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define I965_SURFACEFORMAT_R32_UNORM 0x0F1
+#define I965_SURFACEFORMAT_R32_SNORM 0x0F2
+#define I965_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define I965_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define I965_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define I965_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define I965_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define I965_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define I965_SURFACEFORMAT_R32_USCALED 0x0F9
+#define I965_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define I965_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define I965_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define I965_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define I965_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define I965_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define I965_SURFACEFORMAT_R8G8_UNORM 0x106
+#define I965_SURFACEFORMAT_R8G8_SNORM 0x107
+#define I965_SURFACEFORMAT_R8G8_SINT 0x108
+#define I965_SURFACEFORMAT_R8G8_UINT 0x109
+#define I965_SURFACEFORMAT_R16_UNORM 0x10A
+#define I965_SURFACEFORMAT_R16_SNORM 0x10B
+#define I965_SURFACEFORMAT_R16_SINT 0x10C
+#define I965_SURFACEFORMAT_R16_UINT 0x10D
+#define I965_SURFACEFORMAT_R16_FLOAT 0x10E
+#define I965_SURFACEFORMAT_I16_UNORM 0x111
+#define I965_SURFACEFORMAT_L16_UNORM 0x112
+#define I965_SURFACEFORMAT_A16_UNORM 0x113
+#define I965_SURFACEFORMAT_L8A8_UNORM 0x114
+#define I965_SURFACEFORMAT_I16_FLOAT 0x115
+#define I965_SURFACEFORMAT_L16_FLOAT 0x116
+#define I965_SURFACEFORMAT_A16_FLOAT 0x117
+#define I965_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define I965_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define I965_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define I965_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define I965_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define I965_SURFACEFORMAT_R16_SSCALED 0x11E
+#define I965_SURFACEFORMAT_R16_USCALED 0x11F
+#define I965_SURFACEFORMAT_R8_UNORM 0x140
+#define I965_SURFACEFORMAT_R8_SNORM 0x141
+#define I965_SURFACEFORMAT_R8_SINT 0x142
+#define I965_SURFACEFORMAT_R8_UINT 0x143
+#define I965_SURFACEFORMAT_A8_UNORM 0x144
+#define I965_SURFACEFORMAT_I8_UNORM 0x145
+#define I965_SURFACEFORMAT_L8_UNORM 0x146
+#define I965_SURFACEFORMAT_P4A4_UNORM 0x147
+#define I965_SURFACEFORMAT_A4P4_UNORM 0x148
+#define I965_SURFACEFORMAT_R8_SSCALED 0x149
+#define I965_SURFACEFORMAT_R8_USCALED 0x14A
+#define I965_SURFACEFORMAT_R1_UINT 0x181
+#define I965_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define I965_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define I965_SURFACEFORMAT_BC1_UNORM 0x186
+#define I965_SURFACEFORMAT_BC2_UNORM 0x187
+#define I965_SURFACEFORMAT_BC3_UNORM 0x188
+#define I965_SURFACEFORMAT_BC4_UNORM 0x189
+#define I965_SURFACEFORMAT_BC5_UNORM 0x18A
+#define I965_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define I965_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define I965_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define I965_SURFACEFORMAT_MONO8 0x18E
+#define I965_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define I965_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define I965_SURFACEFORMAT_DXT1_RGB 0x191
+#define I965_SURFACEFORMAT_FXT1 0x192
+#define I965_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define I965_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define I965_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define I965_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define I965_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define I965_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define I965_SURFACEFORMAT_BC4_SNORM 0x199
+#define I965_SURFACEFORMAT_BC5_SNORM 0x19A
+#define I965_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define I965_SURFACEFORMAT_RAW 0x1FF
+
+#define I965_MAPFILTER_NEAREST 0x0
+#define I965_MAPFILTER_LINEAR 0x1
+#define I965_MAPFILTER_ANISOTROPIC 0x2
+
+#define I965_MIPFILTER_NONE 0
+#define I965_MIPFILTER_NEAREST 1
+#define I965_MIPFILTER_LINEAR 3
+
+#define I965_TEXCOORDMODE_WRAP 0
+#define I965_TEXCOORDMODE_MIRROR 1
+#define I965_TEXCOORDMODE_CLAMP 2
+#define I965_TEXCOORDMODE_CUBE 3
+#define I965_TEXCOORDMODE_CLAMP_BORDER 4
+#define I965_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define I965_SURFACERETURNFORMAT_FLOAT32 0
+#define I965_SURFACERETURNFORMAT_S1 1
+
+#define I965_TILEWALK_XMAJOR 0
+#define I965_TILEWALK_YMAJOR 1
+
+#define GEN8_TILEMODE_LINEAR 0
+#define GEN8_TILEMODE_WMAJOR 1
+#define GEN8_TILEMODE_XMAJOR 2
+#define GEN8_TILEMODE_YMAJOR 3
+
+#define I965_SURCHAN_SELECT_ZERO 0
+#define I965_SURCHAN_SELECT_ONE 1
+#define I965_SURCHAN_SELECT_RED 4
+#define I965_SURCHAN_SELECT_GREEN 5
+#define I965_SURCHAN_SELECT_BLUE 6
+#define I965_SURCHAN_SELECT_ALPHA 7
+
+#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
+ IS_G4X(intel->device_id) ? 384 : 256)
+// HSW
+#define HSW_SCRATCH1_OFFSET (0xB038)
+#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C)
+
+// L3 cache stuff
+#define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010)
+#define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020)
+#define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET (0xB024)
+
+#define GEN8_L3_CNTL_REG_ADDRESS_OFFSET (0x7034)
+
+// To issue pipe controls (reset L3 / SLM or stall)
+#define GEN7_PIPE_CONTROL_MEDIA 0x2
+#define GEN7_PIPE_CONTROL_3D 0x3
+#define GEN7_PIPE_CONTROL_INSTRUCTION_GFX 0x3
+#define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2
+#define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0
+#define GEN7_PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
+#define GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+
+
+#define GEN_MAPFILTER_NEAREST 0x0
+#define GEN_MAPFILTER_LINEAR 0x1
+#define GEN_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN_MIPFILTER_NONE 0
+#define GEN_MIPFILTER_NEAREST 1
+#define GEN_MIPFILTER_LINEAR 3
+
+#define GEN_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
+#define GEN_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
+#define GEN_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
+#define GEN_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
+#define GEN_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
+#define GEN_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
+
+#define GEN_TEXCOORDMODE_WRAP 0
+#define GEN_TEXCOORDMODE_MIRROR 1
+#define GEN_TEXCOORDMODE_CLAMP 2
+#define GEN_TEXCOORDMODE_CUBE 3
+#define GEN_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN_TEXCOORDMODE_MIRROR_ONCE 5
+
+#endif /* __GENX_DEFINES_H__ */
+
diff --git a/src/gen/intel_driver.c b/src/gen/intel_driver.c
new file mode 100644
index 0000000..b8a1b52
--- /dev/null
+++ b/src/gen/intel_driver.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang at intel.com>
+ * Zou Nan hai <nanhai.zou at intel.com>
+ *
+ */
+
+#if defined(HAS_GL_EGL)
+#define EGL_EGLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "EGL/egl.h"
+#include <EGL/eglext.h>
+#endif
+
+#ifdef HAS_X11
+#include <X11/Xlibint.h>
+#include "x11/dricommon.h"
+#endif
+
+#include "intel_driver.h"
+#include "intel_gpgpu.h"
+#include "intel_batchbuffer.h"
+#include "intel_bufmgr.h"
+#include "cl_mem.h"
+
+#include <assert.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <xf86drm.h>
+#include <stdio.h>
+
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_context.h"
+#include "cl_driver.h"
+#include "cl_device_id.h"
+#include "cl_platform_id.h"
+
+static void
+intel_driver_delete(intel_driver_t *driver)
+{
+ if (driver == NULL)
+ return;
+
+ cl_free(driver);
+}
+
+static intel_driver_t*
+intel_driver_new(void)
+{
+ intel_driver_t *driver = NULL;
+
+ TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t));
+ driver->fd = -1;
+
+exit:
+ return driver;
+error:
+intel_driver_delete(driver);
+driver = NULL;
+goto exit;
+}
+
+/* just used for maximum relocation number in drm_intel */
+#define BATCH_SIZE 0x4000
+
+/* set OCL_DUMP_AUB=1 to get aub file */
+static void
+intel_driver_aub_dump(intel_driver_t *driver)
+{
+char *val;
+val = getenv("OCL_DUMP_AUB");
+if (!val)
+ return;
+if (atoi(val) != 0) {
+ drm_intel_bufmgr_gem_set_aub_filename(driver->bufmgr,
+ "beignet.aub");
+ drm_intel_bufmgr_gem_set_aub_dump(driver->bufmgr, 1);
+}
+}
+
+static int
+intel_driver_memman_init(intel_driver_t *driver)
+{
+driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE);
+if (!driver->bufmgr) return 0;
+drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr);
+driver->device_id = drm_intel_bufmgr_gem_get_devid(driver->bufmgr);
+intel_driver_aub_dump(driver);
+return 1;
+}
+
+static int
+intel_driver_context_init(intel_driver_t *driver)
+{
+driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
+if (!driver->ctx)
+ return 0;
+driver->null_bo = NULL;
+#ifdef HAS_BO_SET_SOFTPIN
+drm_intel_bo *bo = dri_bo_alloc(driver->bufmgr, "null_bo", 64*1024, 4096);
+drm_intel_bo_set_softpin_offset(bo, 0);
+// don't reuse it, that would make two bo trying to bind to same address,
+// which is un-reasonable.
+drm_intel_bo_disable_reuse(bo);
+driver->null_bo = bo;
+#endif
+return 1;
+}
+
+static void
+intel_driver_context_destroy(intel_driver_t *driver)
+{
+if (driver->null_bo)
+ drm_intel_bo_unreference(driver->null_bo);
+if(driver->ctx)
+ drm_intel_gem_context_destroy(driver->ctx);
+driver->ctx = NULL;
+}
+
+static int
+intel_driver_init(intel_driver_t *driver, int dev_fd)
+{
+driver->fd = dev_fd;
+driver->locked = 0;
+pthread_mutex_init(&driver->ctxmutex, NULL);
+
+if (!intel_driver_memman_init(driver)) return 0;
+if (!intel_driver_context_init(driver)) return 0;
+
+#if EMULATE_GEN
+driver->gen_ver = EMULATE_GEN;
+if (EMULATE_GEN == 75)
+ driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */
+else if (EMULATE_GEN == 7)
+ driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */
+else if (EMULATE_GEN == 6)
+ driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */
+else
+ FATAL ("Unsupported Gen for emulation");
+#else
+if (IS_GEN9(driver->device_id))
+ driver->gen_ver = 9;
+else if (IS_GEN8(driver->device_id))
+ driver->gen_ver = 8;
+else if (IS_GEN75(driver->device_id))
+ driver->gen_ver = 75;
+else if (IS_GEN7(driver->device_id))
+ driver->gen_ver = 7;
+else if (IS_GEN6(driver->device_id))
+ driver->gen_ver = 6;
+else if(IS_IGDNG(driver->device_id))
+ driver->gen_ver = 5;
+else
+ driver->gen_ver = 4;
+#endif /* EMULATE_GEN */
+return 1;
+}
+
+static cl_int
+intel_driver_open(intel_driver_t *intel, cl_context_prop props)
+{
+int cardi;
+#ifdef HAS_X11
+char *driver_name;
+#endif
+if (props != NULL
+ && props->gl_type != CL_GL_NOSHARE
+ && props->gl_type != CL_GL_GLX_DISPLAY
+ && props->gl_type != CL_GL_EGL_DISPLAY) {
+ fprintf(stderr, "Unsupported gl share type %d.\n", props->gl_type);
+ return CL_INVALID_OPERATION;
+}
+
+#ifdef HAS_X11
+intel->x11_display = XOpenDisplay(NULL);
+
+if(intel->x11_display) {
+ if((intel->dri_ctx = getDRI2State(intel->x11_display,
+ DefaultScreen(intel->x11_display),
+ &driver_name))) {
+ intel_driver_init_shared(intel, intel->dri_ctx);
+ Xfree(driver_name);
+ }
+ else
+ fprintf(stderr, "X server found. dri2 connection failed! \n");
+}
+#endif
+
+if(!intel_driver_is_active(intel)) {
+ char card_name[20];
+ for(cardi = 0; cardi < 16; cardi++) {
+ sprintf(card_name, "/dev/dri/renderD%d", 128+cardi);
+ if (access(card_name, R_OK) != 0)
+ continue;
+ if(intel_driver_init_render(intel, card_name))
+ break;
+ }
+}
+
+if(!intel_driver_is_active(intel)) {
+ char card_name[20];
+ for(cardi = 0; cardi < 16; cardi++) {
+ sprintf(card_name, "/dev/dri/card%d", cardi);
+ if (access(card_name, R_OK) != 0)
+ continue;
+ if(intel_driver_init_master(intel, card_name))
+ break;
+ }
+}
+
+if(!intel_driver_is_active(intel)) {
+ fprintf(stderr, "Device open failed, aborting...\n");
+ return CL_DEVICE_NOT_FOUND;
+}
+
+#ifdef HAS_GL_EGL
+if (props && props->gl_type == CL_GL_EGL_DISPLAY) {
+ assert(props->egl_display);
+}
+#endif
+return CL_SUCCESS;
+}
+
+static void
+intel_driver_close(intel_driver_t *intel)
+{
+//Due to the drm change about the test usrptr, we need to destroy the bufmgr
+//befor the driver was closed, otherwise the test usrptr will not be freed.
+if (intel->bufmgr)
+ drm_intel_bufmgr_destroy(intel->bufmgr);
+#ifdef HAS_X11
+if(intel->dri_ctx) dri_state_release(intel->dri_ctx);
+if(intel->x11_display) XCloseDisplay(intel->x11_display);
+#endif
+if(intel->need_close) {
+ close(intel->fd);
+ intel->need_close = 0;
+}
+intel->dri_ctx = NULL;
+intel->x11_display = NULL;
+intel->fd = -1;
+}
+
+LOCAL int
+intel_driver_is_active(intel_driver_t *driver) {
+return driver->fd >= 0;
+}
+
+#ifdef HAS_X11
+LOCAL int
+intel_driver_init_shared(intel_driver_t *driver, dri_state_t *state)
+{
+int ret;
+assert(state);
+if(state->driConnectedFlag != DRI2)
+ return 0;
+ret = intel_driver_init(driver, state->fd);
+driver->need_close = 0;
+return ret;
+}
+#endif
+
+LOCAL int
+intel_driver_init_master(intel_driver_t *driver, const char* dev_name)
+{
+int dev_fd, ret;
+
+drm_client_t client;
+
+// usually dev_name = "/dev/dri/card%d"
+dev_fd = open(dev_name, O_RDWR);
+if (dev_fd == -1) {
+ fprintf(stderr, "open(\"%s\", O_RDWR) failed: %s\n", dev_name, strerror(errno));
+ return 0;
+}
+
+// Check that we're authenticated
+memset(&client, 0, sizeof(drm_client_t));
+ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client);
+if (ret == -1) {
+ fprintf(stderr, "ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client) failed: %s\n", strerror(errno));
+ close(dev_fd);
+ return 0;
+}
+
+if (!client.auth) {
+ fprintf(stderr, "%s not authenticated\n", dev_name);
+ close(dev_fd);
+ return 0;
+}
+
+ret = intel_driver_init(driver, dev_fd);
+driver->need_close = 1;
+
+return ret;
+}
+
+LOCAL int
+intel_driver_init_render(intel_driver_t *driver, const char* dev_name)
+{
+int dev_fd, ret;
+
+dev_fd = open(dev_name, O_RDWR);
+if (dev_fd == -1)
+ return 0;
+
+ret = intel_driver_init(driver, dev_fd);
+driver->need_close = 1;
+
+return ret;
+}
+
+LOCAL int
+intel_driver_terminate(intel_driver_t *driver)
+{
+pthread_mutex_destroy(&driver->ctxmutex);
+
+if(driver->need_close) {
+ close(driver->fd);
+ driver->need_close = 0;
+}
+driver->fd = -1;
+return 1;
+}
+
+LOCAL void
+intel_driver_lock_hardware(intel_driver_t *driver)
+{
+
+PPTHREAD_MUTEX_LOCK(driver);
+assert(!driver->locked);
+driver->locked = 1;
+}
+
+LOCAL void
+intel_driver_unlock_hardware(intel_driver_t *driver)
+{
+driver->locked = 0;
+PPTHREAD_MUTEX_UNLOCK(driver);
+}
+
+LOCAL dri_bo*
+intel_driver_share_buffer_from_name(intel_driver_t *driver, const char *sname, uint32_t name)
+{
+dri_bo *bo = intel_bo_gem_create_from_name(driver->bufmgr,
+ sname,
+ name);
+if (bo == NULL) {
+ fprintf(stderr, "intel_bo_gem_create_from_name create \"%s\" bo from name %d failed: %s\n", sname, name, strerror(errno));
+ return NULL;
+}
+return bo;
+}
+
+LOCAL dri_bo*
+intel_driver_share_buffer_from_fd(intel_driver_t *driver, int fd, int size)
+{
+dri_bo *bo = drm_intel_bo_gem_create_from_prime(driver->bufmgr,
+ fd,
+ size);
+if (bo == NULL) {
+ fprintf(stderr, "drm_intel_bo_gem_create_from_prime create bo(size %d) from fd %d failed: %s\n", size, fd, strerror(errno));
+ return NULL;
+}
+return bo;
+}
+
+LOCAL uint32_t
+intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo)
+{
+uint32_t name;
+assert(bo);
+dri_bo_flink(bo, &name);
+return name;
+}
+/* XXX a null props is ok? */
+static int
+intel_get_device_id(void)
+{
+intel_driver_t *driver = NULL;
+int intel_device_id;
+
+driver = intel_driver_new();
+assert(driver != NULL);
+if(UNLIKELY(intel_driver_open(driver, NULL) != CL_SUCCESS)) return INVALID_CHIP_ID;
+intel_device_id = driver->device_id;
+intel_driver_context_destroy(driver);
+intel_driver_close(driver);
+intel_driver_terminate(driver);
+intel_driver_delete(driver);
+
+return intel_device_id;
+}
+
+extern void intel_gpgpu_delete_all(intel_driver_t *driver);
+static void
+cl_intel_driver_delete(intel_driver_t *driver)
+{
+if (driver == NULL)
+ return;
+intel_gpgpu_delete_all(driver);
+intel_driver_context_destroy(driver);
+intel_driver_close(driver);
+intel_driver_terminate(driver);
+intel_driver_delete(driver);
+}
+
+#include "cl_gbe_loader.h"
+static intel_driver_t*
+cl_intel_driver_new(cl_context_prop props)
+{
+intel_driver_t *driver = NULL;
+TRY_ALLOC_NO_ERR (driver, intel_driver_new());
+if(UNLIKELY(intel_driver_open(driver, props) != CL_SUCCESS)) goto error;
+exit:
+return driver;
+error:
+cl_intel_driver_delete(driver);
+driver = NULL;
+goto exit;
+}
+
+static drm_intel_bufmgr*
+intel_driver_get_bufmgr(intel_driver_t *drv)
+{
+return drv->bufmgr;
+}
+
+static uint32_t
+intel_driver_get_ver(struct intel_driver *drv)
+{
+return drv->gen_ver;
+}
+
+static void
+intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t *stack_size)
+{
+ if (drv->gen_ver == 75)
+ *stack_size = *stack_size * 4;
+ else if (drv->device_id == PCI_CHIP_BROXTON_1 || drv->device_id == PCI_CHIP_BROXTON_3 ||
+ IS_CHERRYVIEW(drv->device_id))
+ *stack_size = *stack_size * 2;
+}
+
+static void
+intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)
+{
+drv->atomic_test_result = atomic_flag;
+}
+
+static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
+static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
+
+static int get_cl_tiling(uint32_t drm_tiling)
+{
+switch(drm_tiling) {
+case I915_TILING_X: return CL_TILE_X;
+case I915_TILING_Y: return CL_TILE_Y;
+case I915_TILING_NONE: return CL_NO_TILE;
+default:
+ assert(0);
+}
+return CL_NO_TILE;
+}
+
+static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
+{
+uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
+uint32_t ret = 0;
+
+switch (tiling_mode) {
+case CL_TILE_X:
+ if (dim == 0) { //tileX width in bytes
+ ret = 512;
+ } else if (dim == 1) { //tileX height in number of rows
+ ret = 8;
+ } else if (dim == 2) { //height to calculate slice pitch
+ if (gen_ver == 9) //SKL same as tileY height
+ ret = 8;
+ else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment
+ ret = 4;
+ else
+ ret = 2;
+ } else
+ assert(0);
+ break;
+
+case CL_TILE_Y:
+ if (dim == 0) { //tileY width in bytes
+ ret = 128;
+ } else if (dim == 1) { //tileY height in number of rows
+ ret = 32;
+ } else if (dim == 2) { //height to calculate slice pitch
+ if (gen_ver == 9) //SKL same as tileY height
+ ret = 32;
+ else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment
+ ret = 4;
+ else
+ ret = 2;
+ } else
+ assert(0);
+ break;
+
+case CL_NO_TILE:
+ if (dim == 1 || dim == 2) { //vertical alignment
+ if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch
+ ret = 4;
+ else
+ ret = 2;
+ } else
+ assert(0);
+ break;
+}
+
+return ret;
+}
+
+#if defined(HAS_GL_EGL)
+#include "intel_cl_gl_share_image_info.h"
+#include "cl_image.h"
+
+static PFNEGLEXPORTDMABUFIMAGEMESAPROC eglExportDMABUFImageMESA_func = NULL;
+
+static int
+get_required_egl_extensions(){
+
+if(eglExportDMABUFImageMESA_func == NULL){
+ eglExportDMABUFImageMESA_func = (PFNEGLEXPORTDMABUFIMAGEMESAPROC) eglGetProcAddress("eglExportDMABUFImageMESA");
+ if(eglExportDMABUFImageMESA_func == NULL){
+ fprintf(stderr, "Failed to get EGL extension function eglExportDMABUFImageMESA\n");
+ return -1;
+ }
+}
+return 0;
+}
+
+
+static int cl_get_clformat_from_texture(GLint tex_format, cl_image_format * cl_format)
+{
+cl_int ret = CL_SUCCESS;
+
+switch (tex_format) {
+case GL_RGBA8:
+case GL_RGBA:
+case GL_RGBA16:
+case GL_RGBA8I:
+case GL_RGBA16I:
+case GL_RGBA32I:
+case GL_RGBA8UI:
+case GL_RGBA16UI:
+case GL_RGBA32UI:
+case GL_RGBA16F:
+case GL_RGBA32F:
+ cl_format->image_channel_order = CL_RGBA;
+ break;
+case GL_BGRA:
+ cl_format->image_channel_order = CL_BGRA;
+ break;
+default:
+ ret = -1;
+ goto error;
+}
+
+switch (tex_format) {
+case GL_RGBA8:
+case GL_RGBA:
+case GL_BGRA:
+ cl_format->image_channel_data_type = CL_UNORM_INT8;
+ break;
+case GL_RGBA16:
+ cl_format->image_channel_data_type = CL_UNORM_INT16;
+ break;
+case GL_RGBA8I:
+ cl_format->image_channel_data_type = CL_SIGNED_INT8;
+ break;
+case GL_RGBA16I:
+ cl_format->image_channel_data_type = CL_SIGNED_INT16;
+ break;
+case GL_RGBA32I:
+ cl_format->image_channel_data_type = CL_SIGNED_INT32;
+ break;
+case GL_RGBA8UI:
+ cl_format->image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+case GL_RGBA16UI:
+ cl_format->image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+case GL_RGBA32UI:
+ cl_format->image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+case GL_RGBA16F:
+ cl_format->image_channel_data_type = CL_HALF_FLOAT;
+ break;
+case GL_RGBA32F:
+ cl_format->image_channel_order = CL_FLOAT;
+ break;
+default:
+ ret = -1;
+ goto error;
+}
+
+error:
+return ret;
+}
+
+static int
+get_mem_type_from_target(GLenum texture_target, cl_mem_object_type *type)
+{
+switch(texture_target) {
+case GL_TEXTURE_1D: *type = CL_MEM_OBJECT_IMAGE1D; break;
+case GL_TEXTURE_2D: *type = CL_MEM_OBJECT_IMAGE2D; break;
+case GL_TEXTURE_3D: *type = CL_MEM_OBJECT_IMAGE3D; break;
+case GL_TEXTURE_1D_ARRAY: *type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break;
+case GL_TEXTURE_2D_ARRAY: *type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break;
+default:
+ return -1;
+}
+return CL_SUCCESS;
+}
+
+static cl_buffer
+intel_alloc_buffer_from_texture_egl(cl_context ctx, unsigned int target,
+ int miplevel, unsigned int texture,
+ struct _cl_mem_image *image)
+{
+drm_intel_bo *intel_bo = NULL;
+struct _intel_cl_gl_share_image_info info;
+unsigned int bpp, intel_fmt;
+cl_image_format cl_format;
+EGLBoolean ret;
+
+EGLenum e_target;
+//We just support GL_TEXTURE_2D because we can't query info like slice_pitch now.
+if(target == GL_TEXTURE_2D)
+ e_target = EGL_GL_TEXTURE_2D;
+else
+ return NULL;
+
+if(get_required_egl_extensions() != 0)
+ return NULL;
+
+EGLAttrib attrib_list[] = {EGL_GL_TEXTURE_LEVEL, miplevel,
+ EGL_NONE};
+EGLImage e_image = eglCreateImage(EGL_DISP(ctx), EGL_CTX(ctx), e_target,
+ (EGLClientBuffer)texture, &attrib_list[0]);
+if(e_image == EGL_NO_IMAGE)
+ return NULL;
+
+int fd, stride, offset;
+ret = eglExportDMABUFImageMESA_func(EGL_DISP(ctx), e_image, &fd, &stride, &offset);
+if(ret != EGL_TRUE){
+ eglDestroyImage(EGL_DISP(ctx), e_image);
+ return NULL;
+}
+info.fd = fd;
+
+/* The size argument just takes effect in intel_driver_share_buffer_from_fd when
+ * Linux kernel is older than 3.12, so it doesn't matter we set to 0 here.
+ */
+int size = 0;
+intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, size);
+
+if (intel_bo == NULL) {
+ eglDestroyImage(EGL_DISP(ctx), e_image);
+ return NULL;
+}
+
+GLint param_value;
+glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_WIDTH, ¶m_value);
+info.w = param_value;
+glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_HEIGHT, ¶m_value);
+info.h = param_value;
+glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_DEPTH, ¶m_value);
+info.depth = 1;
+info.pitch = stride;
+uint32_t tiling_mode, swizzle_mode;
+drm_intel_bo_get_tiling(intel_bo, &tiling_mode, &swizzle_mode);
+info.offset = offset;
+info.tile_x = 0;
+info.tile_y = 0;
+glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, ¶m_value);
+info.gl_format = param_value;
+info.row_pitch = stride;
+info.slice_pitch = 0;
+
+info.tiling = get_cl_tiling(tiling_mode);
+if (cl_get_clformat_from_texture(info.gl_format, &cl_format) != 0)
+ goto error;
+
+if (cl_image_byte_per_pixel(&cl_format, &bpp) != CL_SUCCESS)
+ goto error;
+intel_fmt = cl_image_get_intel_format(&cl_format);
+if (intel_fmt == INTEL_UNSUPPORTED_FORMAT)
+ goto error;
+cl_mem_object_type image_type;
+if (get_mem_type_from_target(target, &image_type) != 0)
+ goto error;
+
+cl_mem_image_init(image, info.w, info.h,
+ image_type, info.depth, cl_format,
+ intel_fmt, bpp, info.row_pitch,
+ info.slice_pitch, info.tiling,
+ info.tile_x, info.tile_y, info.offset);
+
+struct _cl_mem_gl_image *gl_image = (struct _cl_mem_gl_image*)image;
+gl_image->fd = fd;
+gl_image->egl_image = e_image;
+
+return (cl_buffer) intel_bo;
+
+error:
+drm_intel_bo_unreference(intel_bo);
+close(fd);
+eglDestroyImage(EGL_DISP(ctx), e_image);
+return NULL;
+}
+
+static cl_buffer
+intel_alloc_buffer_from_texture(cl_context ctx, unsigned int target,
+ int miplevel, unsigned int texture,
+ struct _cl_mem_image *image)
+{
+
+if (IS_EGL_CONTEXT(ctx))
+ return intel_alloc_buffer_from_texture_egl(ctx, target, miplevel, texture, image);
+
+return NULL;
+}
+
+static int
+intel_release_buffer_from_texture(cl_context ctx, struct _cl_mem_gl_image *gl_image)
+{
+if (IS_EGL_CONTEXT(ctx)) {
+ close(gl_image->fd);
+ eglDestroyImage(EGL_DISP(ctx), gl_image->egl_image);
+ return CL_SUCCESS;
+}
+return -1;
+}
+#endif
+
+cl_buffer intel_share_buffer_from_libva(cl_context ctx,
+ unsigned int bo_name,
+ size_t *sz)
+{
+drm_intel_bo *intel_bo;
+
+intel_bo = intel_driver_share_buffer_from_name((intel_driver_t *)ctx->drv, "shared from libva", bo_name);
+
+if (intel_bo == NULL)
+ return NULL;
+
+if (sz)
+ *sz = intel_bo->size;
+
+return (cl_buffer)intel_bo;
+}
+
+cl_buffer intel_share_image_from_libva(cl_context ctx,
+ unsigned int bo_name,
+ struct _cl_mem_image *image)
+{
+drm_intel_bo *intel_bo;
+uint32_t intel_tiling, intel_swizzle_mode;
+
+intel_bo = intel_driver_share_buffer_from_name((intel_driver_t *)ctx->drv, "shared from libva", bo_name);
+
+if (intel_bo == NULL)
+ return NULL;
+
+drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode);
+image->tiling = get_cl_tiling(intel_tiling);
+
+return (cl_buffer)intel_bo;
+}
+
+cl_buffer intel_share_buffer_from_fd(cl_context ctx,
+ int fd,
+ int buffer_size)
+{
+drm_intel_bo *intel_bo;
+
+intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, buffer_size);
+
+if (intel_bo == NULL)
+ return NULL;
+
+return (cl_buffer)intel_bo;
+}
+
+cl_buffer intel_share_image_from_fd(cl_context ctx,
+ int fd,
+ int image_size,
+ struct _cl_mem_image *image)
+{
+drm_intel_bo *intel_bo;
+uint32_t intel_tiling, intel_swizzle_mode;
+
+intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, image_size);
+
+if (intel_bo == NULL)
+ return NULL;
+
+drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode);
+image->tiling = get_cl_tiling(intel_tiling);
+
+return (cl_buffer)intel_bo;
+}
+
+static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags)
+{
+#ifdef HAS_USERPTR
+drm_intel_bo *bo;
+bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags);
+/* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */
+if (bo == NULL)
+ bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED);
+return (cl_buffer)bo;
+#else
+return NULL;
+#endif
+}
+
+static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling)
+{
+switch (tiling) {
+ case CL_NO_TILE:
+ *intel_tiling = I915_TILING_NONE;
+ break;
+ case CL_TILE_X:
+ *intel_tiling = I915_TILING_X;
+ break;
+ case CL_TILE_Y:
+ *intel_tiling = I915_TILING_Y;
+ break;
+ default:
+ assert(0);
+ return -1;
+}
+return 0;
+}
+
+static int intel_buffer_set_tiling(cl_buffer bo,
+ cl_image_tiling_t tiling, size_t stride)
+{
+uint32_t intel_tiling;
+int ret;
+if (UNLIKELY((get_intel_tiling(tiling, &intel_tiling)) < 0))
+ return -1;
+#ifndef NDEBUG
+uint32_t required_tiling;
+required_tiling = intel_tiling;
+#endif
+ret = drm_intel_bo_set_tiling((drm_intel_bo*)bo, &intel_tiling, stride);
+assert(intel_tiling == required_tiling);
+return ret;
+}
+
+#define CHV_CONFIG_WARNING \
+ "Warning: can't get GPU's configurations, will use the minimal one. Please update your drm to 2.4.59+ and linux kernel to 4.0.0+.\n"
+static void
+intel_update_device_info(cl_device_id device)
+{
+intel_driver_t *driver;
+
+driver = intel_driver_new();
+assert(driver != NULL);
+if (intel_driver_open(driver, NULL) != CL_SUCCESS) {
+ intel_driver_delete(driver);
+ return;
+}
+
+#ifdef HAS_USERPTR
+const size_t sz = 4096;
+void *host_ptr;
+
+host_ptr = cl_aligned_malloc(sz, 4096);
+if (host_ptr != NULL) {
+ cl_buffer bo = intel_buffer_alloc_userptr((cl_buffer_mgr)driver->bufmgr,
+ "CL memory object", host_ptr, sz, 0);
+ if (bo == NULL)
+ device->host_unified_memory = CL_FALSE;
+ else
+ drm_intel_bo_unreference((drm_intel_bo*)bo);
+ cl_free(host_ptr);
+}
+else
+ device->host_unified_memory = CL_FALSE;
+#endif
+
+#ifdef HAS_EU_TOTAL
+unsigned int eu_total;
+
+/* Prefer driver-queried max compute units if supported */
+if (!drm_intel_get_eu_total(driver->fd, &eu_total))
+ device->max_compute_unit = eu_total;
+else if (IS_CHERRYVIEW(device->device_id))
+ printf(CHV_CONFIG_WARNING);
+#else
+if (IS_CHERRYVIEW(device->device_id)) {
+#if defined(__ANDROID__)
+ device->max_compute_unit = 12;
+#else
+ printf(CHV_CONFIG_WARNING);
+#endif
+}
+#endif
+
+#ifdef HAS_SUBSLICE_TOTAL
+unsigned int subslice_total;
+
+/* Prefer driver-queried subslice count if supported */
+if (!drm_intel_get_subslice_total(driver->fd, &subslice_total))
+ device->sub_slice_count = subslice_total;
+else if (IS_CHERRYVIEW(device->device_id))
+ printf(CHV_CONFIG_WARNING);
+#else
+if (IS_CHERRYVIEW(device->device_id)) {
+#if defined(__ANDROID__)
+ device->sub_slice_count = 2;
+#else
+ printf(CHV_CONFIG_WARNING);
+#endif
+}
+#endif
+
+#ifdef HAS_POOLED_EU
+/* BXT pooled eu, 3*6 to 2*9, like sub slice count is 2 */
+int has_pooled_eu;
+if((has_pooled_eu = drm_intel_get_pooled_eu(driver->fd)) > 0)
+ device->sub_slice_count = 2;
+
+#ifdef HAS_MIN_EU_IN_POOL
+int min_eu;
+/* for fused down 2x6 devices, beignet don't support. */
+if (has_pooled_eu > 0 && (min_eu = drm_intel_get_min_eu_in_pool(driver->fd)) > 0) {
+ assert(min_eu == 9); //don't support fuse down device.
+}
+#endif //HAS_MIN_EU_IN_POOL
+#endif //HAS_POOLED_EU
+//We should get the device memory dynamically, but the
+//mapablce mem size usage is unknown. Just ignore it.
+size_t total_mem,map_mem;
+if(drm_intel_get_aperture_sizes(driver->fd,&map_mem,&total_mem) == 0)
+ device->global_mem_size = (cl_ulong)total_mem;
+
+intel_driver_context_destroy(driver);
+intel_driver_close(driver);
+intel_driver_terminate(driver);
+intel_driver_delete(driver);
+}
+
+LOCAL void
+intel_setup_callbacks(void)
+{
+cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
+cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
+cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *) intel_driver_enlarge_stack_size;
+cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
+cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
+cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
+cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info;
+cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
+cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
+#ifdef HAS_BO_SET_SOFTPIN
+cl_buffer_set_softpin_offset = (cl_buffer_set_softpin_offset_cb *) drm_intel_bo_set_softpin_offset;
+cl_buffer_set_bo_use_full_range = (cl_buffer_set_bo_use_full_range_cb *) drm_intel_bo_use_48b_address_range;
+#endif
+ cl_buffer_disable_reuse = (cl_buffer_disable_reuse_cb *) drm_intel_bo_disable_reuse;
+ cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
+#if defined(HAS_GL_EGL)
+ cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;
+ cl_buffer_release_from_texture = (cl_buffer_release_from_texture_cb *) intel_release_buffer_from_texture;
+#endif
+ cl_buffer_get_buffer_from_libva = (cl_buffer_get_buffer_from_libva_cb *) intel_share_buffer_from_libva;
+ cl_buffer_get_image_from_libva = (cl_buffer_get_image_from_libva_cb *) intel_share_image_from_libva;
+ cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
+ cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
+ cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
+ cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
+ cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt;
+ cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt;
+ cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized;
+ cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
+ cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
+ cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
+ cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
+ cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
+ cl_buffer_get_subdata = (cl_buffer_get_subdata_cb *) drm_intel_bo_get_subdata;
+ cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
+ cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
+ cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
+ cl_buffer_get_buffer_from_fd = (cl_buffer_get_buffer_from_fd_cb *) intel_share_buffer_from_fd;
+ cl_buffer_get_image_from_fd = (cl_buffer_get_image_from_fd_cb *) intel_share_image_from_fd;
+ intel_set_gpgpu_callbacks(intel_get_device_id());
+}
diff --git a/src/gen/intel_driver.h b/src/gen/intel_driver.h
new file mode 100644
index 0000000..adf9e36
--- /dev/null
+++ b/src/gen/intel_driver.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef _INTEL_DRIVER_H_
+#define _INTEL_DRIVER_H_
+
+#include "cl_device_data.h"
+
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+#include "intel_gpgpu.h"
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+
+#define MI_NOOP (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23))
+
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04)
+#define XY_COLOR_BLT_WRITE_ALPHA (1 << 21)
+#define XY_COLOR_BLT_WRITE_RGB (1 << 20)
+#define XY_COLOR_BLT_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+struct dri_state;
+struct intel_gpgpu_node;
+typedef struct _XDisplay Display;
+
+typedef struct intel_driver
+{
+ dri_bufmgr *bufmgr;
+ drm_intel_context *ctx;
+ drm_intel_bo *null_bo;
+ int fd;
+ int device_id;
+ int gen_ver;
+ sigset_t sa_mask;
+ pthread_mutex_t ctxmutex;
+ int locked;
+ int need_close;
+ Display *x11_display;
+ struct dri_state *dri_ctx;
+ struct intel_gpgpu_node *gpgpu_list;
+ int atomic_test_result;
+} intel_driver_t;
+
+#define SET_BLOCKED_SIGSET(DRIVER) do { \
+ sigset_t bl_mask; \
+ sigfillset(&bl_mask); \
+ sigdelset(&bl_mask, SIGFPE); \
+ sigdelset(&bl_mask, SIGILL); \
+ sigdelset(&bl_mask, SIGSEGV); \
+ sigdelset(&bl_mask, SIGBUS); \
+ sigdelset(&bl_mask, SIGKILL); \
+ pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
+} while (0)
+
+#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
+ pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
+} while (0)
+
+#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
+ SET_BLOCKED_SIGSET(DRIVER); \
+ pthread_mutex_lock(&(DRIVER)->ctxmutex); \
+} while (0)
+
+#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
+ pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
+ RESTORE_BLOCKED_SIGSET(DRIVER); \
+} while (0)
+
+/* device control */
+extern void intel_driver_lock_hardware(intel_driver_t*);
+extern void intel_driver_unlock_hardware(intel_driver_t*);
+
+/* methods working in shared mode */
+extern dri_bo* intel_driver_share_buffer(intel_driver_t*, const char *sname, uint32_t name);
+extern uint32_t intel_driver_shared_name(intel_driver_t*, dri_bo*);
+
+/* init driver shared with X using dri state, acquired from X Display */
+extern int intel_driver_init_shared(intel_driver_t*, struct dri_state*);
+
+/* init driver in master mode (when X is not using the card)
+ * usually dev_name = "/dev/dri/card0"
+ */
+extern int intel_driver_init_master(intel_driver_t*, const char* dev_name);
+
+/* init driver for render node */
+extern int intel_driver_init_render(intel_driver_t*, const char* dev_name);
+
+/* terminate driver and all underlying structures */
+extern int intel_driver_terminate(intel_driver_t*);
+
+/* simple check if driver was initialized (checking fd should suffice) */
+extern int intel_driver_is_active(intel_driver_t*);
+
+/* init the call backs used by the ocl driver */
+extern void intel_setup_callbacks(void);
+
+#endif /* _INTEL_DRIVER_H_ */
+
diff --git a/src/gen/intel_gpgpu.c b/src/gen/intel_gpgpu.c
new file mode 100644
index 0000000..c35b618
--- /dev/null
+++ b/src/gen/intel_gpgpu.c
@@ -0,0 +1,2581 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Alexei Soupikov <alexei.soupikov at intel.com>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "intel_gpgpu.h"
+#include "intel_defines.h"
+#include "intel_structs.h"
+#include "program.h" // for BTI_RESERVED_NUM
+
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "cl_sampler.h"
+#include "cl_accelerator_intel.h"
+
+#ifndef CL_VERSION_1_2
+#define CL_MEM_OBJECT_IMAGE1D 0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
+#endif
+
+#define GEN_CMD_MEDIA_OBJECT (0x71000000)
+#define MO_TS_BIT (1 << 24)
+#define MO_RETAIN_BIT (1 << 28)
+#define SAMPLER_STATE_SIZE (16)
+
+#define TIMESTAMP_ADDR 0x2358
+
+/* Stores both binding tables and surface states */
+typedef struct surface_heap {
+ uint32_t binding_table[256];
+ char surface[256*sizeof(gen_surface_state_t)];
+} surface_heap_t;
+
+typedef struct intel_event {
+ drm_intel_bo *buffer;
+ drm_intel_bo *ts_buf;
+ int status;
+} intel_event_t;
+
+#define MAX_IF_DESC 32
+
+typedef struct intel_gpgpu intel_gpgpu_t;
+
+typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm);
+intel_gpgpu_set_L3_t *intel_gpgpu_set_L3 = NULL;
+
+typedef uint32_t (intel_gpgpu_get_scratch_index_t)(uint32_t size);
+intel_gpgpu_get_scratch_index_t *intel_gpgpu_get_scratch_index = NULL;
+
+typedef void (intel_gpgpu_post_action_t)(intel_gpgpu_t *gpgpu, int32_t flush_mode);
+intel_gpgpu_post_action_t *intel_gpgpu_post_action = NULL;
+
+typedef uint64_t (intel_gpgpu_read_ts_reg_t)(drm_intel_bufmgr *bufmgr);
+intel_gpgpu_read_ts_reg_t *intel_gpgpu_read_ts_reg = NULL;
+
+
+typedef void (intel_gpgpu_set_base_address_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_set_base_address_t *intel_gpgpu_set_base_address = NULL;
+
+typedef void (intel_gpgpu_setup_bti_t)(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
+ size_t size, unsigned char index, uint32_t format);
+intel_gpgpu_setup_bti_t *intel_gpgpu_setup_bti = NULL;
+
+
+typedef void (intel_gpgpu_load_vfe_state_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_load_vfe_state_t *intel_gpgpu_load_vfe_state = NULL;
+
+typedef void (intel_gpgpu_build_idrt_t)(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel);
+intel_gpgpu_build_idrt_t *intel_gpgpu_build_idrt = NULL;
+
+
+typedef void (intel_gpgpu_load_curbe_buffer_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL;
+
+
+typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL;
+
+typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL;
+
+typedef void (intel_gpgpu_select_pipeline_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_select_pipeline_t *intel_gpgpu_select_pipeline = NULL;
+
+static void
+intel_gpgpu_sync(void *buf)
+{
+ if (buf)
+ drm_intel_bo_wait_rendering((drm_intel_bo *)buf);
+}
+
+static void *intel_gpgpu_ref_batch_buf(intel_gpgpu_t *gpgpu)
+{
+ if (gpgpu->batch->last_bo)
+ drm_intel_bo_reference(gpgpu->batch->last_bo);
+
+ return gpgpu->batch->last_bo;
+}
+
+static void intel_gpgpu_unref_batch_buf(void *buf)
+{
+ if (buf)
+ drm_intel_bo_unreference((drm_intel_bo *)buf);
+}
+
+static void
+intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
+{
+ if (gpgpu == NULL)
+ return;
+ if(gpgpu->time_stamp_b.bo)
+ drm_intel_bo_unreference(gpgpu->time_stamp_b.bo);
+ if(gpgpu->printf_b.bo)
+ drm_intel_bo_unreference(gpgpu->printf_b.bo);
+ if (gpgpu->aux_buf.bo)
+ drm_intel_bo_unreference(gpgpu->aux_buf.bo);
+ if (gpgpu->perf_b.bo)
+ drm_intel_bo_unreference(gpgpu->perf_b.bo);
+ if (gpgpu->stack_b.bo)
+ drm_intel_bo_unreference(gpgpu->stack_b.bo);
+ if (gpgpu->scratch_b.bo)
+ drm_intel_bo_unreference(gpgpu->scratch_b.bo);
+ if (gpgpu->profiling_b.bo)
+ drm_intel_bo_unreference(gpgpu->profiling_b.bo);
+
+ if(gpgpu->constant_b.bo)
+ drm_intel_bo_unreference(gpgpu->constant_b.bo);
+
+ intel_batchbuffer_delete(gpgpu->batch);
+ cl_free(gpgpu);
+}
+
+/* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */
+void intel_gpgpu_delete_all(intel_driver_t *drv)
+{
+ struct intel_gpgpu_node *p;
+ if(drv->gpgpu_list == NULL)
+ return;
+
+ PPTHREAD_MUTEX_LOCK(drv);
+ while(drv->gpgpu_list) {
+ p = drv->gpgpu_list;
+ drv->gpgpu_list = p->next;
+ intel_gpgpu_delete_finished(p->gpgpu);
+ cl_free(p);
+ }
+ PPTHREAD_MUTEX_UNLOCK(drv);
+}
+
+static void
+intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
+{
+ if (gpgpu == NULL)
+ return;
+
+ intel_driver_t *drv = gpgpu->drv;
+ struct intel_gpgpu_node *p, *node;
+
+ PPTHREAD_MUTEX_LOCK(drv);
+ p = drv->gpgpu_list;
+ if(p) {
+ node = p->next;
+ while(node) {
+ if(node->gpgpu->batch && node->gpgpu->batch->buffer &&
+ !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
+ p->next = node->next;
+ intel_gpgpu_delete_finished(node->gpgpu);
+ cl_free(node);
+ node = p->next;
+ } else {
+ p = node;
+ node = node->next;
+ }
+ }
+ node = drv->gpgpu_list;
+ if(node->gpgpu->batch && node->gpgpu->batch->buffer &&
+ !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
+ drv->gpgpu_list = drv->gpgpu_list->next;
+ intel_gpgpu_delete_finished(node->gpgpu);
+ cl_free(node);
+ }
+ }
+ if (gpgpu == NULL)
+ return;
+
+ if(gpgpu->batch && gpgpu->batch->buffer &&
+ drm_intel_bo_busy(gpgpu->batch->buffer)) {
+ TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
+ node->gpgpu = gpgpu;
+ node->next = NULL;
+ p = drv->gpgpu_list;
+ if(p == NULL)
+ drv->gpgpu_list= node;
+ else {
+ while(p->next)
+ p = p->next;
+ p->next = node;
+ }
+ } else
+ intel_gpgpu_delete_finished(gpgpu);
+
+error:
+ PPTHREAD_MUTEX_UNLOCK(drv);
+}
+
+static intel_gpgpu_t*
+intel_gpgpu_new(intel_driver_t *drv)
+{
+ intel_gpgpu_t *state = NULL;
+
+ TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t));
+ state->drv = drv;
+ state->batch = intel_batchbuffer_new(state->drv);
+ assert(state->batch);
+
+exit:
+ return state;
+error:
+ intel_gpgpu_delete(state);
+ state = NULL;
+ goto exit;
+}
+
+static void
+intel_gpgpu_select_pipeline_gen7(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 1);
+ OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_select_pipeline_gen9(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 1);
+ OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MASK | PIPELINE_SELECT_GPGPU);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static uint32_t
+intel_gpgpu_get_cache_ctrl_gen7()
+{
+ return cc_llc_l3;
+}
+
+static uint32_t
+intel_gpgpu_get_cache_ctrl_gen75()
+{
+ return llccc_ec | l3cc_ec;
+}
+static uint32_t
+intel_gpgpu_get_cache_ctrl_gen8()
+{
+ return tcc_llc_ec_l3 | mtllc_wb;
+}
+static uint32_t
+intel_gpgpu_get_cache_ctrl_gen9()
+{
+ //Kernel-defined cache control registers 2:
+ //L3CC: WB; LeCC: WB; TC: LLC/eLLC;
+ int major = 0, minor = 0;
+ int mocs_index = 0x2;
+
+ struct utsname buf;
+ uname(&buf);
+ sscanf(buf.release, "%d.%d", &major, &minor);
+ //From linux 4.3, kernel redefined the mocs table's value,
+ //But before 4.3, still used the hw defautl value.
+ if(strcmp(buf.sysname, "Linux") == 0 &&
+ major == 4 && minor < 3) { /* linux kernel support skl from 4.x, so check from 4 */
+ mocs_index = 0x9;
+ }
+
+ return (mocs_index << 1);
+}
+
+static void
+intel_gpgpu_set_base_address_gen7(intel_gpgpu_t *gpgpu)
+{
+ const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
+ BEGIN_BATCH(gpgpu->batch, 10);
+ OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 8);
+ /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
+ /* 0, State Mem Obj CC */
+ /* We use a state base address for the surface heap since IVB clamp the
+ * binding table pointer at 11 bits. So, we cannot use pointers directly while
+ * using the surface heap
+ */
+ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY));
+
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */
+
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+ /* According to mesa i965 driver code, we must set the dynamic state access upper bound
+ * to a valid bound value, otherwise, the border color pointer may be rejected and you
+ * may get incorrect border color. This is a known hardware bug. */
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_set_base_address_gen8(intel_gpgpu_t *gpgpu)
+{
+ const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
+ BEGIN_BATCH(gpgpu->batch, 16);
+ OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 14);
+ /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16));
+ /* 0, State Mem Obj CC */
+ /* We use a state base address for the surface heap since IVB clamp the
+ * binding table pointer at 11 bits. So, we cannot use pointers directly while
+ * using the surface heap
+ */
+ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_SAMPLER,
+ I915_GEM_DOMAIN_SAMPLER,
+ gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
+ OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
+ OUT_BATCH(gpgpu->batch, 0);
+
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ /* According to mesa i965 driver code, we must set the dynamic state access upper bound
+ * to a valid bound value, otherwise, the border color pointer may be rejected and you
+ * may get incorrect border color. This is a known hardware bug. */
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_set_base_address_gen9(intel_gpgpu_t *gpgpu)
+{
+ const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
+ BEGIN_BATCH(gpgpu->batch, 19);
+ OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 17);
+ /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16));
+ /* 0, State Mem Obj CC */
+ /* We use a state base address for the surface heap since IVB clamp the
+ * binding table pointer at 11 bits. So, we cannot use pointers directly while
+ * using the surface heap
+ */
+ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_SAMPLER,
+ I915_GEM_DOMAIN_SAMPLER,
+ gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
+ OUT_BATCH(gpgpu->batch, 0);
+ //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
+ OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
+ OUT_BATCH(gpgpu->batch, 0);
+
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ /* According to mesa i965 driver code, we must set the dynamic state access upper bound
+ * to a valid bound value, otherwise, the border color pointer may be rejected and you
+ * may get incorrect border color. This is a known hardware bug. */
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
+ /* Bindless surface state base address */
+ OUT_BATCH(gpgpu->batch, (def_cc << 4) | BASE_ADDRESS_MODIFY);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0xfffff000);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+uint32_t intel_gpgpu_get_scratch_index_gen7(uint32_t size) {
+ return size / 1024 - 1;
+}
+
+uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t size) {
+ //align in backend, if non pow2, must align when alloc scratch bo.
+ assert((size & (size - 1)) == 0);
+ size = size >> 11;
+ uint32_t index = 0;
+ while((size >>= 1) > 0)
+ index++; //get leading one
+
+ return index;
+}
+
+uint32_t intel_gpgpu_get_scratch_index_gen8(uint32_t size) {
+ //align in backend, if non pow2, must align when alloc scratch bo.
+ assert((size & (size - 1)) == 0);
+ size = size >> 10;
+ uint32_t index = 0;
+ while((size >>= 1) > 0)
+ index++; //get leading one
+
+ return index;
+}
+
+
+static cl_int
+intel_gpgpu_get_max_curbe_size(uint32_t device_id)
+{
+ if (IS_BAYTRAIL_T(device_id) ||
+ IS_IVB_GT1(device_id))
+ return 992;
+ else
+ return 2016;
+}
+
+static cl_int
+intel_gpgpu_get_curbe_size(intel_gpgpu_t *gpgpu)
+{
+ int curbe_size = gpgpu->curb.size_cs_entry * gpgpu->curb.num_cs_entries;
+ int max_curbe_size = intel_gpgpu_get_max_curbe_size(gpgpu->drv->device_id);
+
+ if (curbe_size > max_curbe_size) {
+ fprintf(stderr, "warning, curbe size exceed limitation.\n");
+ return max_curbe_size;
+ } else
+ return curbe_size;
+}
+
+static void
+intel_gpgpu_load_vfe_state_gen7(intel_gpgpu_t *gpgpu)
+{
+ int32_t scratch_index;
+ BEGIN_BATCH(gpgpu->batch, 8);
+ OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (8-2));
+
+ if(gpgpu->per_thread_scratch > 0) {
+ scratch_index = intel_gpgpu_get_scratch_index(gpgpu->per_thread_scratch);
+ OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ scratch_index);
+ }
+ else {
+ OUT_BATCH(gpgpu->batch, 0);
+ }
+ /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
+ OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 8) | 0xc4);
+ OUT_BATCH(gpgpu->batch, 0);
+ /* curbe_size */
+ OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu));
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_vfe_state_gen8(intel_gpgpu_t *gpgpu)
+{
+ int32_t scratch_index;
+ BEGIN_BATCH(gpgpu->batch, 9);
+ OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (9-2));
+
+ if(gpgpu->per_thread_scratch > 0) {
+ scratch_index = intel_gpgpu_get_scratch_index(gpgpu->per_thread_scratch);
+ OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ scratch_index);
+ }
+ else {
+ OUT_BATCH(gpgpu->batch, 0);
+ }
+ OUT_BATCH(gpgpu->batch, 0);
+
+ /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
+ OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (2 << 8) | 0xc0); //urb entries can't be 0
+ OUT_BATCH(gpgpu->batch, 0);
+ /* urb entries size | curbe_size */
+ OUT_BATCH(gpgpu->batch, 2<<16 | intel_gpgpu_get_curbe_size(gpgpu));
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0);
+
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_curbe_buffer_gen7(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 4);
+ OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */
+ OUT_BATCH(gpgpu->batch, 0); /* mbz */
+ OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.curbe_offset);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_curbe_buffer_gen8(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 4);
+ OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */
+ OUT_BATCH(gpgpu->batch, 0); /* mbz */
+ OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32);
+ OUT_BATCH(gpgpu->batch, gpgpu->aux_offset.curbe_offset);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_idrt_gen7(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 4);
+ OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
+ OUT_BATCH(gpgpu->batch, 0); /* mbz */
+ OUT_BATCH(gpgpu->batch, 1 << 5);
+ OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.idrt_offset);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_load_idrt_gen8(intel_gpgpu_t *gpgpu)
+{
+ BEGIN_BATCH(gpgpu->batch, 4);
+ OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
+ OUT_BATCH(gpgpu->batch, 0); /* mbz */
+ OUT_BATCH(gpgpu->batch, 1 << 5);
+ OUT_BATCH(gpgpu->batch, gpgpu->aux_offset.idrt_offset);
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+
+static const uint32_t gpgpu_l3_config_reg1[] = {
+ 0x00080040, 0x02040040, 0x00800040, 0x01000038,
+ 0x02000030, 0x01000038, 0x00000038, 0x00000040,
+ 0x0A140091, 0x09100091, 0x08900091, 0x08900091,
+ 0x010000a1
+};
+
+static const uint32_t gpgpu_l3_config_reg2[] = {
+ 0x00000000, 0x00000000, 0x00080410, 0x00080410,
+ 0x00040410, 0x00040420, 0x00080420, 0x00080020,
+ 0x00204080, 0x00244890, 0x00284490, 0x002444A0,
+ 0x00040810
+};
+
+/* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. */
+static void
+intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx)
+{
+ BEGIN_BATCH(gpgpu->batch, 5);
+ OUT_BATCH(gpgpu->batch, CMD_PIPE_CONTROL | (5-2));
+ OUT_BATCH(gpgpu->batch, GEN7_PIPE_CONTROL_WRITE_TIMESTAMP);
+ OUT_RELOC(gpgpu->batch, gpgpu->time_stamp_b.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE | idx * sizeof(uint64_t));
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, 0);
+ ADVANCE_BATCH();
+}
+
+static void
+intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu)
+{
+ gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
+ intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+ memset(pc, 0, sizeof(*pc));
+ pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+ pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+ pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+ pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+ pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+ pc->dw1.render_target_cache_flush_enable = 1;
+ pc->dw1.texture_cache_invalidation_enable = 1;
+ pc->dw1.cs_stall = 1;
+ pc->dw1.dc_flush_enable = 1;
+ //pc->dw1.instruction_cache_invalidate_enable = 1;
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu)
+{
+ gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
+ intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+ memset(pc, 0, sizeof(*pc));
+ pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+ pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+ pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+ pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+ pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+ pc->dw1.cs_stall = 1;
+ pc->dw1.dc_flush_enable = 1;
+
+ pc = (gen6_pipe_control_t*)
+ intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+ memset(pc, 0, sizeof(*pc));
+ pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+ pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+ pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+ pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+ pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+ pc->dw1.render_target_cache_flush_enable = 1;
+ pc->dw1.texture_cache_invalidation_enable = 1;
+ pc->dw1.cs_stall = 1;
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_pipe_control_gen8(intel_gpgpu_t *gpgpu)
+{
+ gen8_pipe_control_t* pc = (gen8_pipe_control_t*)
+ intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen8_pipe_control_t));
+ memset(pc, 0, sizeof(*pc));
+ pc->dw0.length = SIZEOF32(gen8_pipe_control_t) - 2;
+ pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+ pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+ pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+ pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+ pc->dw1.render_target_cache_flush_enable = 1;
+ pc->dw1.texture_cache_invalidation_enable = 1;
+ pc->dw1.cs_stall = 1;
+ pc->dw1.dc_flush_enable = 1;
+ //pc->dw1.instruction_cache_invalidate_enable = 1;
+ ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
+intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm)
+{
+ BEGIN_BATCH(gpgpu->batch, 9);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0x00A00000);
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
+
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]);
+ else
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]);
+ else
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
+ ADVANCE_BATCH(gpgpu->batch);
+
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_set_L3_baytrail(intel_gpgpu_t *gpgpu, uint32_t use_slm)
+{
+ BEGIN_BATCH(gpgpu->batch, 9);
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0x00D30000); /* General credit : High credit = 26 : 6 */
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, 0x01020021); /* {SLM=64, URB=96, DC=16, RO=16, Sum=192} */
+ else
+ OUT_BATCH(gpgpu->batch, 0x02040040); /* {SLM=0, URB=128, DC=32, RO=32, Sum=192} */
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0x0); /* {I/S=0, Const=0, Tex=0} */
+
+ ADVANCE_BATCH(gpgpu->batch);
+
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
+{
+ /* still set L3 in batch buffer for fulsim. */
+ if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL)
+ {
+ BEGIN_BATCH(gpgpu->batch, 15);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ /* FIXME: KMD always disable the atomic in L3 for some reason.
+ I checked the spec, and don't think we need that workaround now.
+ Before I send a patch to kernel, let's just enable it here. */
+ OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
+ OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */
+ }
+ else
+ {
+ BEGIN_BATCH(gpgpu->batch, 9);
+ }
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0x08800000);
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
+
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]);
+ else
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
+
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
+ if (use_slm)
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]);
+ else
+ OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
+ ADVANCE_BATCH(gpgpu->batch);
+
+ //if(use_slm)
+ // gpgpu->batch->enable_slm = 1;
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_set_L3_gen8(intel_gpgpu_t *gpgpu, uint32_t use_slm)
+{
+ BEGIN_BATCH(gpgpu->batch, 3);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, GEN8_L3_CNTL_REG_ADDRESS_OFFSET);
+ // FIXME, this is a workaround for switch SLM enable and disable random hang
+ if(use_slm)
+ OUT_BATCH(gpgpu->batch, 0x60000121); /* {SLM=192, URB=128, Rest=384} */
+ else
+ OUT_BATCH(gpgpu->batch, 0x60000160); /* {SLM=0, URB=384, Rest=384, Sum=768} */
+
+ //if(use_slm)
+ // gpgpu->batch->enable_slm = 1;
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu)
+{
+ intel_batchbuffer_start_atomic(gpgpu->batch, 256);
+ intel_gpgpu_pipe_control(gpgpu);
+ assert(intel_gpgpu_set_L3);
+ intel_gpgpu_set_L3(gpgpu, gpgpu->ker->use_slm);
+ intel_gpgpu_select_pipeline(gpgpu);
+ intel_gpgpu_set_base_address(gpgpu);
+ intel_gpgpu_load_vfe_state(gpgpu);
+ intel_gpgpu_load_curbe_buffer(gpgpu);
+ intel_gpgpu_load_idrt(gpgpu);
+
+ if (gpgpu->perf_b.bo) {
+ BEGIN_BATCH(gpgpu->batch, 3);
+ OUT_BATCH(gpgpu->batch,
+ (0x28 << 23) | /* MI_REPORT_PERF_COUNT */
+ (3 - 2)); /* length-2 */
+ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 0 | /* Offset for the start "counters" */
+ 1); /* Use GTT and not PGTT */
+ OUT_BATCH(gpgpu->batch, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+ }
+
+ /* Insert PIPE_CONTROL for time stamp of start*/
+ if (gpgpu->time_stamp_b.bo)
+ intel_gpgpu_write_timestamp(gpgpu, 0);
+}
+
+static void
+intel_gpgpu_post_action_gen7(intel_gpgpu_t *gpgpu, int32_t flush_mode)
+{
+ if(flush_mode)
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_post_action_gen75(intel_gpgpu_t *gpgpu, int32_t flush_mode)
+{
+ /* flush force for set L3 */
+ intel_gpgpu_pipe_control(gpgpu);
+
+ /* Restore L3 control to disable SLM mode,
+ otherwise, may affect 3D pipeline */
+ intel_gpgpu_set_L3(gpgpu, 0);
+}
+
+static void
+intel_gpgpu_batch_end(intel_gpgpu_t *gpgpu, int32_t flush_mode)
+{
+ /* Insert PIPE_CONTROL for time stamp of end*/
+ if (gpgpu->time_stamp_b.bo)
+ intel_gpgpu_write_timestamp(gpgpu, 1);
+
+ /* Insert the performance counter command */
+ if (gpgpu->perf_b.bo) {
+ BEGIN_BATCH(gpgpu->batch, 3);
+ OUT_BATCH(gpgpu->batch,
+ (0x28 << 23) | /* MI_REPORT_PERF_COUNT */
+ (3 - 2)); /* length-2 */
+ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 512 | /* Offset for the end "counters" */
+ 1); /* Use GTT and not PGTT */
+ OUT_BATCH(gpgpu->batch, 0);
+ ADVANCE_BATCH(gpgpu->batch);
+ }
+
+ intel_gpgpu_post_action(gpgpu, flush_mode);
+ intel_batchbuffer_end_atomic(gpgpu->batch);
+}
+
+static int
+intel_gpgpu_batch_reset(intel_gpgpu_t *gpgpu, size_t sz)
+{
+ return intel_batchbuffer_reset(gpgpu->batch, sz);
+}
+
+static int
+intel_gpgpu_flush(intel_gpgpu_t *gpgpu)
+{
+ if (!gpgpu->batch || !gpgpu->batch->buffer)
+ return 0;
+ return intel_batchbuffer_flush(gpgpu->batch);
+ /* FIXME:
+ Remove old assert here for binded buffer offset 0 which
+ tried to guard possible NULL buffer pointer check in kernel, as
+ in case like "runtime_null_kernel_arg", but that's wrong to just
+ take buffer offset 0 as NULL, and cause failure for normal
+ kernels which has no such NULL ptr check but with buffer offset 0
+ (which is possible now and will be normal if full PPGTT is on).
+
+ Need to fix NULL ptr check otherwise.
+ */
+}
+
+static int
+intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
+ uint32_t max_threads,
+ uint32_t size_cs_entry,
+ int profiling)
+{
+ drm_intel_bo *bo;
+
+ /* Binded buffers */
+ gpgpu->binded_n = 0;
+ gpgpu->img_bitmap = 0;
+ gpgpu->img_index_base = 3;
+ gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
+
+ /* URB */
+ gpgpu->curb.num_cs_entries = 64;
+ gpgpu->curb.size_cs_entry = size_cs_entry;
+ gpgpu->max_threads = max_threads;
+
+ if (gpgpu->printf_b.bo)
+ dri_bo_unreference(gpgpu->printf_b.bo);
+ gpgpu->printf_b.bo = NULL;
+
+ if (gpgpu->profiling_b.bo)
+ dri_bo_unreference(gpgpu->profiling_b.bo);
+ gpgpu->profiling_b.bo = NULL;
+
+ /* Set the profile buffer*/
+ if(gpgpu->time_stamp_b.bo)
+ dri_bo_unreference(gpgpu->time_stamp_b.bo);
+ gpgpu->time_stamp_b.bo = NULL;
+ if (profiling) {
+ bo = dri_bo_alloc(gpgpu->drv->bufmgr, "timestamp query", 4096, 4096);
+ gpgpu->time_stamp_b.bo = bo;
+ if (!bo)
+ fprintf(stderr, "Could not allocate buffer for profiling.\n");
+ }
+
+ /* stack */
+ if (gpgpu->stack_b.bo)
+ dri_bo_unreference(gpgpu->stack_b.bo);
+ gpgpu->stack_b.bo = NULL;
+
+ /* Set the auxiliary buffer*/
+ uint32_t size_aux = 0;
+ if(gpgpu->aux_buf.bo)
+ dri_bo_unreference(gpgpu->aux_buf.bo);
+ gpgpu->aux_buf.bo = NULL;
+
+ /* begin with surface heap to make sure it's page aligned,
+ because state base address use 20bit for the address */
+ gpgpu->aux_offset.surface_heap_offset = size_aux;
+ size_aux += sizeof(surface_heap_t);
+
+ //curbe must be 32 bytes aligned
+ size_aux = ALIGN(size_aux, 64);
+ gpgpu->aux_offset.curbe_offset = size_aux;
+ size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 32;
+
+ //idrt must be 32 bytes aligned
+ size_aux = ALIGN(size_aux, 32);
+ gpgpu->aux_offset.idrt_offset = size_aux;
+ size_aux += MAX_IF_DESC * sizeof(struct gen6_interface_descriptor);
+
+ //must be 32 bytes aligned
+ //sampler state and vme state share the same buffer,
+ size_aux = ALIGN(size_aux, 32);
+ gpgpu->aux_offset.sampler_state_offset = size_aux;
+ size_aux += MAX(GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
+ GEN_MAX_VME_STATES * sizeof(gen7_vme_state_t));
+
+ //sampler border color state must be 32 bytes aligned
+ size_aux = ALIGN(size_aux, 32);
+ gpgpu->aux_offset.sampler_border_color_state_offset = size_aux;
+ size_aux += GEN_MAX_SAMPLERS * sizeof(gen7_sampler_border_color_t);
+
+ /* make sure aux buffer is page aligned */
+ size_aux = ALIGN(size_aux, 4096);
+
+ bo = dri_bo_alloc(gpgpu->drv->bufmgr, "AUX_BUFFER", size_aux, 4096);
+
+ if (!bo || dri_bo_map(bo, 1) != 0) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ if (bo)
+ dri_bo_unreference(bo);
+ if (profiling && gpgpu->time_stamp_b.bo)
+ dri_bo_unreference(gpgpu->time_stamp_b.bo);
+ gpgpu->time_stamp_b.bo = NULL;
+ return -1;
+ }
+ memset(bo->virtual, 0, size_aux);
+ gpgpu->aux_buf.bo = bo;
+ return 0;
+}
+
+static void
+intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * sizeof(gen7_surface_state_t);
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen7_surface_state_t, ss1),
+ obj_bo);
+}
+
+static void
+intel_gpgpu_set_buf_reloc_for_vme_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * sizeof(gen7_surface_state_t);
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen7_media_surface_state_t, ss0),
+ obj_bo);
+}
+
+static dri_bo*
+intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
+{
+ if(gpgpu->constant_b.bo)
+ dri_bo_unreference(gpgpu->constant_b.bo);
+ gpgpu->constant_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", size, 64);
+ if (gpgpu->constant_b.bo == NULL)
+ return NULL;
+
+ intel_gpgpu_setup_bti(gpgpu, gpgpu->constant_b.bo, 0, size, bti, I965_SURFACEFORMAT_R32G32B32A32_UINT);
+ return gpgpu->constant_b.bo;
+}
+
+static void
+intel_gpgpu_setup_bti_gen7(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
+ size_t size, unsigned char index, uint32_t format)
+{
+ assert(size <= (2ul<<30));
+ size_t s = size - 1;
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
+ memset(ss0, 0, sizeof(gen7_surface_state_t));
+ ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss0->ss0.surface_format = format;
+ ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
+ if(format == I965_SURFACEFORMAT_RAW)
+ assert((ss0->ss2.width & 0x03) == 3);
+ ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
+ ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t);
+
+ ss0->ss1.base_addr = buf->offset + internal_offset;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ internal_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen7_surface_state_t, ss1),
+ buf);
+}
+
+static void
+intel_gpgpu_setup_bti_gen75(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
+ size_t size, unsigned char index, uint32_t format)
+{
+ assert(size <= (2ul<<30));
+ size_t s = size - 1;
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
+ memset(ss0, 0, sizeof(gen7_surface_state_t));
+ ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss0->ss0.surface_format = format;
+ if(format != I965_SURFACEFORMAT_RAW) {
+ ss0->ss7.shader_r = I965_SURCHAN_SELECT_RED;
+ ss0->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
+ ss0->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
+ ss0->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
+ }
+ ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
+ if(format == I965_SURFACEFORMAT_RAW)
+ assert((ss0->ss2.width & 0x03) == 3);
+ ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
+ ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t);
+
+ ss0->ss1.base_addr = buf->offset + internal_offset;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ internal_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen7_surface_state_t, ss1),
+ buf);
+}
+
+static void
+intel_gpgpu_setup_bti_gen8(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
+ size_t size, unsigned char index, uint32_t format)
+{
+ assert(size <= (2ul<<30));
+ size_t s = size - 1;
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen8_surface_state_t *ss0 = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+ memset(ss0, 0, sizeof(gen8_surface_state_t));
+ ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss0->ss0.surface_format = format;
+ if(format != I965_SURFACEFORMAT_RAW) {
+ ss0->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
+ ss0->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
+ ss0->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
+ ss0->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
+ }
+ ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
+ if(format == I965_SURFACEFORMAT_RAW)
+ assert((ss0->ss2.width & 0x03) == 3);
+ ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
+ ss0->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen8_surface_state_t);
+ ss0->ss8.surface_base_addr_lo = (buf->offset64 + internal_offset) & 0xffffffff;
+ ss0->ss9.surface_base_addr_hi = ((buf->offset64 + internal_offset) >> 32) & 0xffffffff;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ internal_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen8_surface_state_t, ss8),
+ buf);
+}
+
+static void
+intel_gpgpu_setup_bti_gen9(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
+ size_t size, unsigned char index, uint32_t format)
+{
+ assert(size <= (4ul<<30));
+ size_t s = size - 1;
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen8_surface_state_t *ss0 = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+ memset(ss0, 0, sizeof(gen8_surface_state_t));
+ ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss0->ss0.surface_format = format;
+ if(format != I965_SURFACEFORMAT_RAW) {
+ ss0->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
+ ss0->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
+ ss0->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
+ ss0->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
+ }
+ ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
+ if(format == I965_SURFACEFORMAT_RAW)
+ assert((ss0->ss2.width & 0x03) == 3);
+ ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+ ss0->ss3.depth = (s >> 21) & 0x7ff; /* bits 31:21 of sz, from bespec only gen 9 support that*/
+ ss0->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen8_surface_state_t);
+ ss0->ss8.surface_base_addr_lo = (buf->offset64 + internal_offset) & 0xffffffff;
+ ss0->ss9.surface_base_addr_hi = ((buf->offset64 + internal_offset) >> 32) & 0xffffffff;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ internal_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen8_surface_state_t, ss8),
+ buf);
+}
+
+static int
+intel_is_surface_array(cl_mem_object_type type)
+{
+ if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
+ type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ return 1;
+
+ return 0;
+}
+
+static int
+intel_get_surface_type(cl_mem_object_type type)
+{
+ switch (type) {
+ case CL_MEM_OBJECT_IMAGE1D:
+ case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+ return I965_SURFACE_1D;
+
+ case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+ case CL_MEM_OBJECT_IMAGE2D:
+ case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+ return I965_SURFACE_2D;
+
+ case CL_MEM_OBJECT_IMAGE3D:
+ return I965_SURFACE_3D;
+
+ default:
+ assert(0);
+ }
+ return 0;
+}
+
+/* Get fixed surface type. If it is a 1D array image with a large index,
+ we need to fixup it to 2D type due to a Gen7/Gen75's sampler issue
+ on a integer type surface with clamp address mode and nearest filter mode.
+*/
+static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_type type)
+{
+ uint32_t surface_type;
+ //Now all platforms need it, so disable platform, re-enable it
+ //when some platform don't need this workaround
+ if (/*((IS_IVYBRIDGE(gpgpu->drv->device_id) ||
+ IS_HASWELL(gpgpu->drv->device_id) ||
+ IS_BROADWELL(gpgpu->drv->device_id) ||
+ IS_CHERRYVIEW(gpgpu->drv->device_id) ||
+ IS_SKYLAKE(gpgpu->drv->device_id) ||
+ IS_BROXTON(gpgpu->drv->device_id) ||
+ IS_KABYLAKE(gpgpu->drv_device_id))) && */
+ index >= BTI_WORKAROUND_IMAGE_OFFSET + BTI_RESERVED_NUM &&
+ type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ surface_type = I965_SURFACE_2D;
+ else
+ surface_type = intel_get_surface_type(type);
+ return surface_type;
+}
+
+static void
+intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_surface_state_t *ss = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
+
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
+ if (intel_is_surface_array(type)) {
+ ss->ss0.surface_array = 1;
+ ss->ss0.surface_array_spacing = 1;
+ }
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
+ ss->ss2.width = w - 1;
+
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+ ss->ss4.not_str_buf.rt_view_extent = depth - 1;
+ ss->ss4.not_str_buf.min_array_element = 0;
+ ss->ss3.pitch = pitch - 1;
+ ss->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ }
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_image_for_vme_gen7(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_media_surface_state_t *ss = (gen7_media_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
+
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.base_addr = obj_bo->offset + obj_bo_offset;
+ ss->ss1.uv_offset_v_direction = 0;
+ ss->ss1.pic_struct = 0;
+ ss->ss1.width = w - 1;
+ ss->ss1.height = h - 1;
+ if (tiling == GPGPU_NO_TILE) {
+ ss->ss2.tile_mode = 0;
+ }
+ else if (tiling == GPGPU_TILE_X){
+ ss->ss2.tile_mode = 2;
+ }
+ else if (tiling == GPGPU_TILE_Y){
+ ss->ss2.tile_mode = 3;
+ }
+ ss->ss2.half_pitch_for_chroma = 0;
+ ss->ss2.surface_pitch = pitch - 1;
+ ss->ss2.surface_object_control_state = cl_gpgpu_get_cache_ctrl();
+ ss->ss2.interleave_chroma = 0;
+ ss->ss2.surface_format = 12; //Y8_UNORM
+ ss->ss3.y_offset_for_u = 0;
+ ss->ss3.x_offset_for_u = 0;
+ ss->ss4.y_offset_for_v = 0;
+ ss->ss4.x_offset_for_v = 0;
+
+ intel_gpgpu_set_buf_reloc_for_vme_gen7(gpgpu, index, obj_bo, obj_bo_offset);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+
+static void
+intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen7_surface_state_t *ss = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
+ if (intel_is_surface_array(type)) {
+ ss->ss0.surface_array = 1;
+ ss->ss0.surface_array_spacing = 1;
+ }
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+ ss->ss4.not_str_buf.rt_view_extent = depth - 1;
+ ss->ss4.not_str_buf.min_array_element = 0;
+ ss->ss3.pitch = pitch - 1;
+ ss->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
+ ss->ss7.shader_r = I965_SURCHAN_SELECT_RED;
+ ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
+ ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
+ ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ }
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
+ ss->ss0.surface_format = format;
+ if (intel_is_surface_array(type)) {
+ ss->ss0.surface_array = 1;
+ ss->ss1.surface_qpitch = (h + 3)/4;
+ }
+ ss->ss0.horizontal_alignment = 1;
+ ss->ss0.vertical_alignment = 1;
+
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR;
+ } else
+ assert(tiling == GPGPU_NO_TILE);// W mode is not supported now.
+
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+
+ ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
+ ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
+
+ ss->ss4.render_target_view_ext = depth - 1;
+ ss->ss4.min_array_elt = 0;
+ ss->ss3.surface_pitch = pitch - 1;
+
+ ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
+ ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
+ ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
+ ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * surface_state_sz;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen8_surface_state_t, ss8),
+ obj_bo);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
+ ss->ss0.surface_format = format;
+ if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_1D) {
+ ss->ss0.surface_array = 1;
+ ss->ss1.surface_qpitch = (slice_pitch/bpp + 3)/4; //align_h
+ }
+
+ if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_2D) {
+ ss->ss0.surface_array = 1;
+ ss->ss1.surface_qpitch = (slice_pitch/pitch + 3)/4;
+ }
+
+ if(ss->ss0.surface_type == I965_SURFACE_3D)
+ ss->ss1.surface_qpitch = (slice_pitch/pitch + 3)/4;
+
+ ss->ss0.horizontal_alignment = 1;
+ ss->ss0.vertical_alignment = 1;
+
+ if (tiling == GPGPU_TILE_X) {
+ ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR;
+ } else if (tiling == GPGPU_TILE_Y) {
+ ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR;
+ } else
+ assert(tiling == GPGPU_NO_TILE);// W mode is not supported now.
+
+ ss->ss2.width = w - 1;
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
+
+ ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
+ ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
+
+ ss->ss4.render_target_view_ext = depth - 1;
+ ss->ss4.min_array_elt = 0;
+ ss->ss3.surface_pitch = pitch - 1;
+
+ ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
+ ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
+ ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
+ ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
+ ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * surface_state_sz;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen8_surface_state_t, ss8),
+ obj_bo);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
+static void
+intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset,
+ uint32_t internal_offset, size_t size, uint8_t bti)
+{
+ assert(gpgpu->binded_n < max_buf_n);
+ if(offset != -1) {
+ gpgpu->binded_buf[gpgpu->binded_n] = buf;
+ gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset;
+ gpgpu->binded_offset[gpgpu->binded_n] = offset;
+ gpgpu->binded_n++;
+ }
+ intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti, I965_SURFACEFORMAT_RAW);
+}
+
+static int
+intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size)
+{
+ drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
+ drm_intel_bo* old = gpgpu->scratch_b.bo;
+ uint32_t total = per_thread_size * gpgpu->max_threads;
+ /* Per Bspec, scratch should 2X the desired size when EU index is not continuous */
+ if (IS_HASWELL(gpgpu->drv->device_id) || IS_CHERRYVIEW(gpgpu->drv->device_id) ||
+ PCI_CHIP_BROXTON_1 == gpgpu->drv->device_id || PCI_CHIP_BROXTON_3 == gpgpu->drv->device_id)
+ total *= 2;
+
+ gpgpu->per_thread_scratch = per_thread_size;
+
+ if(old && old->size < total) {
+ drm_intel_bo_unreference(old);
+ old = NULL;
+ }
+
+ if(!old && total) {
+ gpgpu->scratch_b.bo = drm_intel_bo_alloc(bufmgr, "SCRATCH_BO", total, 4096);
+ if (gpgpu->scratch_b.bo == NULL)
+ return -1;
+ }
+ return 0;
+}
+static void
+intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint8_t bti)
+{
+ drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
+ gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64);
+
+ cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)gpgpu->stack_b.bo, offset, 0, size, bti);
+}
+
+static void
+intel_gpgpu_build_idrt_gen7(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gen6_interface_descriptor_t *desc;
+ drm_intel_bo *ker_bo = NULL;
+
+ desc = (gen6_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
+
+ memset(desc, 0, sizeof(*desc));
+ ker_bo = (drm_intel_bo *) kernel->bo;
+ desc->desc0.kernel_start_pointer = ker_bo->offset >> 6; /* reloc */
+ desc->desc1.single_program_flow = 0;
+ desc->desc1.floating_point_mode = 0; /* use IEEE-754 rule */
+ desc->desc5.rounding_mode = 0; /* round to nearest even */
+
+ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
+ desc->desc2.sampler_state_pointer = (gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) >> 5;
+ desc->desc3.binding_table_entry_count = 0; /* no prefetch */
+ desc->desc3.binding_table_pointer = 0;
+ desc->desc4.curbe_read_len = kernel->curbe_sz / 32;
+ desc->desc4.curbe_read_offset = 0;
+
+ /* Barriers / SLM are automatically handled on Gen7+ */
+ if (gpgpu->drv->gen_ver == 7 || gpgpu->drv->gen_ver == 75) {
+ size_t slm_sz = kernel->slm_sz;
+ desc->desc5.group_threads_num = kernel->use_slm ? kernel->thread_n : 0;
+ desc->desc5.barrier_enable = kernel->use_slm;
+ if (slm_sz <= 4*KB)
+ slm_sz = 4*KB;
+ else if (slm_sz <= 8*KB)
+ slm_sz = 8*KB;
+ else if (slm_sz <= 16*KB)
+ slm_sz = 16*KB;
+ else if (slm_sz <= 32*KB)
+ slm_sz = 32*KB;
+ else
+ slm_sz = 64*KB;
+ slm_sz = slm_sz >> 12;
+ desc->desc5.slm_sz = slm_sz;
+ }
+ else
+ desc->desc5.group_threads_num = kernel->barrierID; /* BarrierID on GEN6 */
+
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ gpgpu->aux_offset.idrt_offset + offsetof(gen6_interface_descriptor_t, desc0),
+ ker_bo);
+
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ gpgpu->aux_offset.sampler_state_offset,
+ gpgpu->aux_offset.idrt_offset + offsetof(gen6_interface_descriptor_t, desc2),
+ gpgpu->aux_buf.bo);
+}
+
+static void
+intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gen8_interface_descriptor_t *desc;
+
+ desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
+
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = 0; /* reloc */
+ desc->desc2.single_program_flow = 0;
+ desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */
+ desc->desc6.rounding_mode = 0; /* round to nearest even */
+
+ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
+ desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5;
+ desc->desc4.binding_table_entry_count = 0; /* no prefetch */
+ desc->desc4.binding_table_pointer = 0;
+ desc->desc5.curbe_read_len = kernel->curbe_sz / 32;
+ desc->desc5.curbe_read_offset = 0;
+
+ /* Barriers / SLM are automatically handled on Gen7+ */
+ size_t slm_sz = kernel->slm_sz;
+ /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */
+ desc->desc6.group_threads_num = kernel->thread_n;
+ desc->desc6.barrier_enable = kernel->use_slm;
+ if (slm_sz == 0)
+ slm_sz = 0;
+ else if (slm_sz <= 4*KB)
+ slm_sz = 4*KB;
+ else if (slm_sz <= 8*KB)
+ slm_sz = 8*KB;
+ else if (slm_sz <= 16*KB)
+ slm_sz = 16*KB;
+ else if (slm_sz <= 32*KB)
+ slm_sz = 32*KB;
+ else
+ slm_sz = 64*KB;
+ slm_sz = slm_sz >> 12;
+ desc->desc6.slm_sz = slm_sz;
+}
+
+static void
+intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gen8_interface_descriptor_t *desc;
+
+ desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
+
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = 0; /* reloc */
+ desc->desc2.single_program_flow = 0;
+ desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */
+ desc->desc6.rounding_mode = 0; /* round to nearest even */
+
+ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
+ desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5;
+ desc->desc4.binding_table_entry_count = 0; /* no prefetch */
+ desc->desc4.binding_table_pointer = 0;
+ desc->desc5.curbe_read_len = kernel->curbe_sz / 32;
+ desc->desc5.curbe_read_offset = 0;
+
+ /* Barriers / SLM are automatically handled on Gen7+ */
+ size_t slm_sz = kernel->slm_sz;
+ /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */
+ desc->desc6.group_threads_num = kernel->thread_n;
+ desc->desc6.barrier_enable = kernel->use_slm;
+ if (slm_sz == 0)
+ slm_sz = 0;
+ else if (slm_sz <= 1*KB)
+ slm_sz = 1;
+ else if (slm_sz <= 2*KB)
+ slm_sz = 2;
+ else if (slm_sz <= 4*KB)
+ slm_sz = 3;
+ else if (slm_sz <= 8*KB)
+ slm_sz = 4;
+ else if (slm_sz <= 16*KB)
+ slm_sz = 5;
+ else if (slm_sz <= 32*KB)
+ slm_sz = 6;
+ else
+ slm_sz = 7;
+ desc->desc6.slm_sz = slm_sz;
+}
+
+static int
+intel_gpgpu_upload_curbes_gen7(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
+{
+ unsigned char *curbe = NULL;
+ cl_gpgpu_kernel *k = gpgpu->ker;
+ uint32_t i, j;
+
+ /* Upload the data first */
+ if (dri_bo_map(gpgpu->aux_buf.bo, 1) != 0) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ return -1;
+ }
+ assert(gpgpu->aux_buf.bo->virtual);
+ curbe = (unsigned char *) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.curbe_offset);
+ memcpy(curbe, data, size);
+
+ /* Now put all the relocations for our flat address space */
+ for (i = 0; i < k->thread_n; ++i)
+ for (j = 0; j < gpgpu->binded_n; ++j) {
+ *(uint32_t *)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset64 + gpgpu->target_buf_offset[j];
+ drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo,
+ gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz,
+ gpgpu->binded_buf[j],
+ gpgpu->target_buf_offset[j],
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
+ }
+ dri_bo_unmap(gpgpu->aux_buf.bo);
+ return 0;
+}
+
+static int
+intel_gpgpu_upload_curbes_gen8(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
+{
+ unsigned char *curbe = NULL;
+ cl_gpgpu_kernel *k = gpgpu->ker;
+ uint32_t i, j;
+
+ /* Upload the data first */
+ if (dri_bo_map(gpgpu->aux_buf.bo, 1) != 0) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ return -1;
+ }
+ assert(gpgpu->aux_buf.bo->virtual);
+ curbe = (unsigned char *) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.curbe_offset);
+ memcpy(curbe, data, size);
+
+ /* Now put all the relocations for our flat address space */
+ for (i = 0; i < k->thread_n; ++i)
+ for (j = 0; j < gpgpu->binded_n; ++j) {
+ *(size_t *)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset64 + gpgpu->target_buf_offset[j];
+ drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo,
+ gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz,
+ gpgpu->binded_buf[j],
+ gpgpu->target_buf_offset[j],
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
+ }
+ dri_bo_unmap(gpgpu->aux_buf.bo);
+ return 0;
+}
+
+static void
+intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n)
+{
+ if (n) {
+ const size_t sz = n * sizeof(gen6_sampler_state_t);
+ memcpy(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset, data, sz);
+ }
+}
+
+int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest)
+{
+ switch( cl_address_mode ) {
+ case CLK_ADDRESS_NONE:
+ case CLK_ADDRESS_REPEAT:
+ return GEN_TEXCOORDMODE_WRAP;
+ case CLK_ADDRESS_CLAMP:
+ return GEN_TEXCOORDMODE_CLAMP_BORDER;
+ case CLK_ADDRESS_CLAMP_TO_EDGE:
+ return GEN_TEXCOORDMODE_CLAMP;
+ case CLK_ADDRESS_MIRRORED_REPEAT:
+ return GEN_TEXCOORDMODE_MIRROR;
+ default:
+ return GEN_TEXCOORDMODE_WRAP;
+ }
+}
+
+static void intel_gpgpu_insert_vme_state_gen7(intel_gpgpu_t *gpgpu, cl_accelerator_intel accel, uint32_t index)
+{
+ gen7_vme_state_t* vme = (gen7_vme_state_t*)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index;
+ memset(vme, 0, sizeof(*vme));
+ gen7_vme_search_path_state_t* sp = vme->sp;
+
+ if(accel->desc.me.search_path_type == CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL){
+ sp[0].dw0.SPD_0_X = 0;
+ sp[0].dw0.SPD_0_Y = 0;
+ sp[0].dw0.SPD_1_X = 0;
+ sp[0].dw0.SPD_1_Y = 0;
+ sp[0].dw0.SPD_2_X = 0;
+ sp[0].dw0.SPD_2_Y = 0;
+ sp[0].dw0.SPD_3_X = 0;
+ sp[0].dw0.SPD_3_Y = 0;
+ }
+ else if(accel->desc.me.search_path_type == CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL){
+ sp[0].dw0.SPD_0_X = 1;
+ sp[0].dw0.SPD_0_Y = 0;
+ sp[0].dw0.SPD_1_X = 0;
+ sp[0].dw0.SPD_1_Y = 1;
+ sp[0].dw0.SPD_2_X = -1;
+ sp[0].dw0.SPD_2_Y = 0;
+ sp[0].dw0.SPD_3_X = 0;
+ sp[0].dw0.SPD_3_Y = 0;
+ }
+ else if(accel->desc.me.search_path_type == CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL){
+ sp[0].dw0.SPD_0_X = 1;
+ sp[0].dw0.SPD_0_Y = 0;
+ sp[0].dw0.SPD_1_X = 1;
+ sp[0].dw0.SPD_1_Y = 0;
+ sp[0].dw0.SPD_2_X = 1;
+ sp[0].dw0.SPD_2_Y = 0;
+ sp[0].dw0.SPD_3_X = 1;
+ sp[0].dw0.SPD_3_Y = 0;
+
+ sp[1].dw0.SPD_0_X = 1;
+ sp[1].dw0.SPD_0_Y = 0;
+ sp[1].dw0.SPD_1_X = 1;
+ sp[1].dw0.SPD_1_Y = 0;
+ sp[1].dw0.SPD_2_X = 1;
+ sp[1].dw0.SPD_2_Y = 0;
+ sp[1].dw0.SPD_3_X = 0;
+ sp[1].dw0.SPD_3_Y = 1;
+
+ sp[2].dw0.SPD_0_X = -1;
+ sp[2].dw0.SPD_0_Y = 0;
+ sp[2].dw0.SPD_1_X = -1;
+ sp[2].dw0.SPD_1_Y = 0;
+ sp[2].dw0.SPD_2_X = -1;
+ sp[2].dw0.SPD_2_Y = 0;
+ sp[2].dw0.SPD_3_X = -1;
+ sp[2].dw0.SPD_3_Y = 0;
+
+ sp[3].dw0.SPD_0_X = -1;
+ sp[3].dw0.SPD_0_Y = 0;
+ sp[3].dw0.SPD_1_X = -1;
+ sp[3].dw0.SPD_1_Y = 0;
+ sp[3].dw0.SPD_2_X = -1;
+ sp[3].dw0.SPD_2_Y = 0;
+ sp[3].dw0.SPD_3_X = 0;
+ sp[3].dw0.SPD_3_Y = 1;
+
+ sp[4].dw0.SPD_0_X = 1;
+ sp[4].dw0.SPD_0_Y = 0;
+ sp[4].dw0.SPD_1_X = 1;
+ sp[4].dw0.SPD_1_Y = 0;
+ sp[4].dw0.SPD_2_X = 1;
+ sp[4].dw0.SPD_2_Y = 0;
+ sp[4].dw0.SPD_3_X = 1;
+ sp[4].dw0.SPD_3_Y = 0;
+
+ sp[5].dw0.SPD_0_X = 1;
+ sp[5].dw0.SPD_0_Y = 0;
+ sp[5].dw0.SPD_1_X = 1;
+ sp[5].dw0.SPD_1_Y = 0;
+ sp[5].dw0.SPD_2_X = 1;
+ sp[5].dw0.SPD_2_Y = 0;
+ sp[5].dw0.SPD_3_X = 0;
+ sp[5].dw0.SPD_3_Y = 1;
+
+ sp[6].dw0.SPD_0_X = -1;
+ sp[6].dw0.SPD_0_Y = 0;
+ sp[6].dw0.SPD_1_X = -1;
+ sp[6].dw0.SPD_1_Y = 0;
+ sp[6].dw0.SPD_2_X = -1;
+ sp[6].dw0.SPD_2_Y = 0;
+ sp[6].dw0.SPD_3_X = -1;
+ sp[6].dw0.SPD_3_Y = 0;
+
+ sp[7].dw0.SPD_0_X = -1;
+ sp[7].dw0.SPD_0_Y = 0;
+ sp[7].dw0.SPD_1_X = -1;
+ sp[7].dw0.SPD_1_Y = 0;
+ sp[7].dw0.SPD_2_X = -1;
+ sp[7].dw0.SPD_2_Y = 0;
+ sp[7].dw0.SPD_3_X = 0;
+ sp[7].dw0.SPD_3_Y = 1;
+
+ sp[8].dw0.SPD_0_X = 1;
+ sp[8].dw0.SPD_0_Y = 0;
+ sp[8].dw0.SPD_1_X = 1;
+ sp[8].dw0.SPD_1_Y = 0;
+ sp[8].dw0.SPD_2_X = 1;
+ sp[8].dw0.SPD_2_Y = 0;
+ sp[8].dw0.SPD_3_X = 1;
+ sp[8].dw0.SPD_3_Y = 0;
+
+ sp[9].dw0.SPD_0_X = 1;
+ sp[9].dw0.SPD_0_Y = 0;
+ sp[9].dw0.SPD_1_X = 1;
+ sp[9].dw0.SPD_1_Y = 0;
+ sp[9].dw0.SPD_2_X = 1;
+ sp[9].dw0.SPD_2_Y = 0;
+ sp[9].dw0.SPD_3_X = 0;
+ sp[9].dw0.SPD_3_Y = 1;
+
+ sp[10].dw0.SPD_0_X = -1;
+ sp[10].dw0.SPD_0_Y = 0;
+ sp[10].dw0.SPD_1_X = -1;
+ sp[10].dw0.SPD_1_Y = 0;
+ sp[10].dw0.SPD_2_X = -1;
+ sp[10].dw0.SPD_2_Y = 0;
+ sp[10].dw0.SPD_3_X = -1;
+ sp[10].dw0.SPD_3_Y = 0;
+
+ sp[11].dw0.SPD_0_X = -1;
+ sp[11].dw0.SPD_0_Y = 0;
+ sp[11].dw0.SPD_1_X = -1;
+ sp[11].dw0.SPD_1_Y = 0;
+ sp[11].dw0.SPD_2_X = -1;
+ sp[11].dw0.SPD_2_Y = 0;
+ sp[11].dw0.SPD_3_X = 0;
+ sp[11].dw0.SPD_3_Y = 0;
+ }
+}
+
+static void
+intel_gpgpu_bind_vme_state_gen7(intel_gpgpu_t *gpgpu, cl_accelerator_intel accel)
+{
+ intel_gpgpu_insert_vme_state_gen7(gpgpu, accel, 0);
+}
+
+static void
+intel_gpgpu_insert_sampler_gen7(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler)
+{
+ int using_nearest = 0;
+ uint32_t wrap_mode;
+ gen7_sampler_state_t *sampler;
+
+ sampler = (gen7_sampler_state_t *)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index;
+ memset(sampler, 0, sizeof(*sampler));
+ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) % 32 == 0);
+ sampler->ss2.default_color_pointer = (gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) >> 5;
+ if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
+ sampler->ss3.non_normalized_coord = 1;
+ else
+ sampler->ss3.non_normalized_coord = 0;
+
+ switch (clk_sampler & __CLK_FILTER_MASK) {
+ case CLK_FILTER_NEAREST:
+ sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
+ sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+ sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
+ using_nearest = 1;
+ break;
+ case CLK_FILTER_LINEAR:
+ sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
+ sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+ sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
+ break;
+ }
+
+ wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
+ sampler->ss3.s_wrap_mode = wrap_mode;
+ /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need
+ * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */
+ sampler->ss3.t_wrap_mode = wrap_mode;
+ sampler->ss3.r_wrap_mode = wrap_mode;
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ sampler->ss0.base_level = 0;
+
+ sampler->ss1.max_lod = 0;
+ sampler->ss1.min_lod = 0;
+
+ if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST)
+ sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ GEN_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ GEN_ADDRESS_ROUNDING_ENABLE_R_MIN;
+ if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST)
+ sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
+
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ gpgpu->aux_offset.sampler_border_color_state_offset,
+ gpgpu->aux_offset.sampler_state_offset +
+ index * sizeof(gen7_sampler_state_t) +
+ offsetof(gen7_sampler_state_t, ss2),
+ gpgpu->aux_buf.bo);
+
+}
+
+
+static void
+intel_gpgpu_insert_sampler_gen8(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler)
+{
+ int using_nearest = 0;
+ uint32_t wrap_mode;
+ gen8_sampler_state_t *sampler;
+
+ sampler = (gen8_sampler_state_t *)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index;
+ memset(sampler, 0, sizeof(*sampler));
+ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) % 32 == 0);
+ if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
+ sampler->ss3.non_normalized_coord = 1;
+ else
+ sampler->ss3.non_normalized_coord = 0;
+
+ switch (clk_sampler & __CLK_FILTER_MASK) {
+ case CLK_FILTER_NEAREST:
+ sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
+ sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+ sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
+ using_nearest = 1;
+ break;
+ case CLK_FILTER_LINEAR:
+ sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
+ sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+ sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
+ break;
+ }
+
+ wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
+ sampler->ss3.s_wrap_mode = wrap_mode;
+ /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need
+ * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */
+ sampler->ss3.t_wrap_mode = wrap_mode;
+ sampler->ss3.r_wrap_mode = wrap_mode;
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ sampler->ss0.base_level = 0;
+
+ sampler->ss1.max_lod = 0;
+ sampler->ss1.min_lod = 0;
+
+ if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST)
+ sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ GEN_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ GEN_ADDRESS_ROUNDING_ENABLE_R_MIN;
+ if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST)
+ sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
+}
+
+static void
+intel_gpgpu_bind_sampler_gen7(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz)
+{
+ int index;
+ assert(sampler_sz <= GEN_MAX_SAMPLERS);
+ for(index = 0; index < sampler_sz; index++)
+ intel_gpgpu_insert_sampler_gen7(gpgpu, index, samplers[index]);
+}
+
+static void
+intel_gpgpu_bind_sampler_gen8(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz)
+{
+ int index;
+ assert(sampler_sz <= GEN_MAX_SAMPLERS);
+ for(index = 0; index < sampler_sz; index++)
+ intel_gpgpu_insert_sampler_gen8(gpgpu, index, samplers[index]);
+}
+
+static void
+intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
+{
+ gpgpu->ker = kernel;
+ if (gpgpu->drv->null_bo)
+ intel_gpgpu_setup_bti(gpgpu, gpgpu->drv->null_bo, 0, 64*1024, 0xfe, I965_SURFACEFORMAT_RAW);
+
+ intel_gpgpu_build_idrt(gpgpu, kernel);
+ dri_bo_unmap(gpgpu->aux_buf.bo);
+}
+
+static void
+intel_gpgpu_set_perf_counters(intel_gpgpu_t *gpgpu, cl_buffer *perf)
+{
+ if (gpgpu->perf_b.bo)
+ drm_intel_bo_unreference(gpgpu->perf_b.bo);
+ drm_intel_bo_reference((drm_intel_bo*) perf);
+ gpgpu->perf_b.bo = (drm_intel_bo*) perf;
+}
+
+static void
+intel_gpgpu_walker_gen7(intel_gpgpu_t *gpgpu,
+ uint32_t simd_sz,
+ uint32_t thread_n,
+ const size_t global_wk_off[3],
+ const size_t global_dim_off[3],
+ const size_t global_wk_sz[3],
+ const size_t local_wk_sz[3])
+{
+ const uint32_t global_wk_dim[3] = {
+ global_wk_sz[0] / local_wk_sz[0],
+ global_wk_sz[1] / local_wk_sz[1],
+ global_wk_sz[2] / local_wk_sz[2]
+ };
+ uint32_t right_mask = ~0x0;
+ size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2];
+
+ assert(simd_sz == 8 || simd_sz == 16);
+
+ uint32_t shift = (group_sz & (simd_sz - 1));
+ shift = (shift == 0) ? simd_sz : shift;
+ right_mask = (1 << shift) - 1;
+
+ BEGIN_BATCH(gpgpu->batch, 11);
+ OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 9);
+ OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
+ assert(thread_n <= 64);
+ if (simd_sz == 16)
+ OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
+ else
+ OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[0]);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[1]);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[2]);
+ OUT_BATCH(gpgpu->batch, right_mask);
+ OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/
+ ADVANCE_BATCH(gpgpu->batch);
+
+ BEGIN_BATCH(gpgpu->batch, 2);
+ OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0);
+ OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
+ ADVANCE_BATCH(gpgpu->batch);
+
+ if (IS_IVYBRIDGE(gpgpu->drv->device_id))
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
+intel_gpgpu_walker_gen8(intel_gpgpu_t *gpgpu,
+ uint32_t simd_sz,
+ uint32_t thread_n,
+ const size_t global_wk_off[3],
+ const size_t global_dim_off[3],
+ const size_t global_wk_sz[3],
+ const size_t local_wk_sz[3])
+{
+ const uint32_t global_wk_dim[3] = {
+ global_wk_sz[0] / local_wk_sz[0],
+ global_wk_sz[1] / local_wk_sz[1],
+ global_wk_sz[2] / local_wk_sz[2]
+ };
+ uint32_t right_mask = ~0x0;
+ size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2];
+
+ assert(simd_sz == 8 || simd_sz == 16);
+
+ uint32_t shift = (group_sz & (simd_sz - 1));
+ shift = (shift == 0) ? simd_sz : shift;
+ right_mask = (1 << shift) - 1;
+
+ BEGIN_BATCH(gpgpu->batch, 15);
+ OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 13);
+ OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
+ OUT_BATCH(gpgpu->batch, 0); /* Indirect Data Length */
+ OUT_BATCH(gpgpu->batch, 0); /* Indirect Data Start Address */
+ assert(thread_n <= 64);
+ if (simd_sz == 16)
+ OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
+ else
+ OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */
+ OUT_BATCH(gpgpu->batch, global_dim_off[0]);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[0]+global_dim_off[0]);
+ OUT_BATCH(gpgpu->batch, global_dim_off[1]);
+ OUT_BATCH(gpgpu->batch, 0);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[1]+global_dim_off[1]);
+ OUT_BATCH(gpgpu->batch, global_dim_off[2]);
+ OUT_BATCH(gpgpu->batch, global_wk_dim[2]+global_dim_off[2]);
+ OUT_BATCH(gpgpu->batch, right_mask);
+ OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/
+ ADVANCE_BATCH(gpgpu->batch);
+
+ BEGIN_BATCH(gpgpu->batch, 2);
+ OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0);
+ OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
+ ADVANCE_BATCH(gpgpu->batch);
+
+ intel_gpgpu_pipe_control(gpgpu);
+}
+
+static intel_event_t*
+intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
+{
+ intel_event_t *event = NULL;
+ TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t));
+
+ event->buffer = gpgpu->batch->buffer;
+ if (event->buffer)
+ drm_intel_bo_reference(event->buffer);
+ event->status = command_queued;
+
+ if(gpgpu->time_stamp_b.bo) {
+ event->ts_buf = gpgpu->time_stamp_b.bo;
+ drm_intel_bo_reference(event->ts_buf);
+ }
+
+exit:
+ return event;
+error:
+ cl_free(event);
+ event = NULL;
+ goto exit;
+}
+
+/*
+ The upper layer already flushed the batch buffer, just update
+ internal status to command_submitted.
+*/
+static void
+intel_gpgpu_event_flush(intel_event_t *event)
+{
+ assert(event->status == command_queued);
+ event->status = command_running;
+}
+
+static int
+intel_gpgpu_event_update_status(intel_event_t *event, int wait)
+{
+ if(event->status == command_complete)
+ return event->status;
+
+ if (event->buffer &&
+ event->status == command_running &&
+ !drm_intel_bo_busy(event->buffer)) {
+ event->status = command_complete;
+ drm_intel_bo_unreference(event->buffer);
+ event->buffer = NULL;
+ return event->status;
+ }
+
+ if(wait == 0)
+ return event->status;
+
+ if (event->buffer) {
+ drm_intel_bo_wait_rendering(event->buffer);
+ event->status = command_complete;
+ drm_intel_bo_unreference(event->buffer);
+ event->buffer = NULL;
+ }
+ return event->status;
+}
+
+static void
+intel_gpgpu_event_delete(intel_event_t *event)
+{
+ if(event->buffer)
+ drm_intel_bo_unreference(event->buffer);
+ if(event->ts_buf)
+ drm_intel_bo_unreference(event->ts_buf);
+ cl_free(event);
+}
+
+/* IVB and HSW's result MUST shift in x86_64 system */
+static uint64_t
+intel_gpgpu_read_ts_reg_gen7(drm_intel_bufmgr *bufmgr)
+{
+ uint64_t result = 0;
+ drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result);
+ /* In x86_64 system, the low 32bits of timestamp count are stored in the high 32 bits of
+ result which got from drm_intel_reg_read, and 32-35 bits are lost; but match bspec in
+ i386 system. It seems the kernel readq bug. So shift 32 bit in x86_64, and only remain
+ 32 bits data in i386.
+ */
+ struct utsname buf;
+ uname(&buf);
+ /* In some systems, the user space is 32 bit, but kernel is 64 bit, so can't use the
+ * compiler's flag to determine the kernel'a architecture, use uname to get it. */
+ /* x86_64 in linux, amd64 in bsd */
+ if(strcmp(buf.machine, "x86_64") == 0 || strcmp(buf.machine, "amd64") == 0)
+ return result >> 32;
+ else
+ return result & 0x0ffffffff;
+}
+
+/* baytrail's result should clear high 4 bits */
+static uint64_t
+intel_gpgpu_read_ts_reg_baytrail(drm_intel_bufmgr *bufmgr)
+{
+ uint64_t result = 0;
+ drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result);
+ return result & 0x0ffffffff;
+}
+
+/* We want to get the current time of GPU. */
+static void
+intel_gpgpu_event_get_gpu_cur_timestamp(intel_driver_t* gen_driver, uint64_t* ret_ts)
+{
+ uint64_t result = 0;
+ drm_intel_bufmgr *bufmgr = gen_driver->bufmgr;
+
+ /* Get the ts that match the bspec */
+ result = intel_gpgpu_read_ts_reg(bufmgr);
+ result *= 80;
+
+ *ret_ts = result;
+ return;
+}
+
+/* Get the GPU execute time. */
+static void
+intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, int index, uint64_t* ret_ts)
+{
+ uint64_t result = 0;
+ assert(gpgpu->time_stamp_b.bo);
+ assert(index == 0 || index == 1);
+ drm_intel_gem_bo_map_gtt(gpgpu->time_stamp_b.bo);
+ uint64_t* ptr = gpgpu->time_stamp_b.bo->virtual;
+ result = ptr[index];
+
+ /* According to BSpec, the timestamp counter should be 36 bits,
+ but comparing to the timestamp counter from IO control reading,
+ we find the first 4 bits seems to be fake. In order to keep the
+ timestamp counter conformable, we just skip the first 4 bits.
+ */
+ result = (result & 0x0FFFFFFFF) * 80; //convert to nanoseconds
+ *ret_ts = result;
+
+ drm_intel_gem_bo_unmap_gtt(gpgpu->time_stamp_b.bo);
+}
+
+static int
+intel_gpgpu_set_profiling_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint32_t offset, uint8_t bti)
+{
+ drm_intel_bo *bo = NULL;
+
+ gpgpu->profiling_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "Profiling buffer", size, 64);
+ bo = gpgpu->profiling_b.bo;
+ if (!bo || (drm_intel_bo_map(bo, 1) != 0)) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ return -1;
+ }
+ memset(bo->virtual, 0, size);
+ drm_intel_bo_unmap(bo);
+ cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti);
+ return 0;
+}
+
+static void
+intel_gpgpu_set_profiling_info(intel_gpgpu_t *gpgpu, void* profiling_info)
+{
+ gpgpu->profiling_info = profiling_info;
+}
+
+static void*
+intel_gpgpu_get_profiling_info(intel_gpgpu_t *gpgpu)
+{
+ return gpgpu->profiling_info;
+}
+
+static int
+intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
+{
+ if (gpgpu->printf_b.bo)
+ dri_bo_unreference(gpgpu->printf_b.bo);
+ gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf buffer", size, 4096);
+
+ if (!gpgpu->printf_b.bo || (drm_intel_bo_map(gpgpu->printf_b.bo, 1) != 0)) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ return -1;
+ }
+
+ memset(gpgpu->printf_b.bo->virtual, 0, size);
+ *(uint32_t *)(gpgpu->printf_b.bo->virtual) = 4; // first four is for the length.
+ drm_intel_bo_unmap(gpgpu->printf_b.bo);
+ /* No need to bind, we do not need to emit reloc. */
+ intel_gpgpu_setup_bti(gpgpu, gpgpu->printf_b.bo, 0, size, bti, I965_SURFACEFORMAT_RAW);
+ return 0;
+}
+
+static void*
+intel_gpgpu_map_profiling_buf(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo *bo = NULL;
+ bo = gpgpu->profiling_b.bo;
+ drm_intel_bo_map(bo, 1);
+ return bo->virtual;
+}
+
+static void
+intel_gpgpu_unmap_profiling_buf_addr(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo *bo = NULL;
+ bo = gpgpu->profiling_b.bo;
+ drm_intel_bo_unmap(bo);
+}
+
+
+static void*
+intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo *bo = NULL;
+ bo = gpgpu->printf_b.bo;
+ drm_intel_bo_map(bo, 1);
+ return bo->virtual;
+}
+
+static void
+intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo *bo = NULL;
+ bo = gpgpu->printf_b.bo;
+ drm_intel_bo_unmap(bo);
+}
+
+static void
+intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo_unreference(gpgpu->printf_b.bo);
+ gpgpu->printf_b.bo = NULL;
+}
+
+static void
+intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info)
+{
+ gpgpu->printf_info = printf_info;
+}
+
+static void*
+intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu)
+{
+ return gpgpu->printf_info;
+}
+
+static void
+intel_gpgpu_set_kernel(intel_gpgpu_t *gpgpu, void * kernel)
+{
+ gpgpu->kernel = kernel;
+}
+
+static void*
+intel_gpgpu_get_kernel(intel_gpgpu_t *gpgpu)
+{
+ return gpgpu->kernel;
+}
+
+LOCAL void
+intel_set_gpgpu_callbacks(int device_id)
+{
+ cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
+ cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
+ cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync;
+ cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
+ cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
+ cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
+ cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
+ cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
+ cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
+ cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
+ cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
+ cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start;
+ cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
+ cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
+ cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen7;
+ cl_gpgpu_bind_vme_state = (cl_gpgpu_bind_vme_state_cb *) intel_gpgpu_bind_vme_state_gen7;
+ cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch;
+ cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new;
+ cl_gpgpu_event_flush = (cl_gpgpu_event_flush_cb *)intel_gpgpu_event_flush;
+ cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status;
+ cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
+ cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp;
+ cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
+ cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
+ cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+ cl_gpgpu_set_profiling_buffer = (cl_gpgpu_set_profiling_buffer_cb *)intel_gpgpu_set_profiling_buf;
+ cl_gpgpu_set_profiling_info = (cl_gpgpu_set_profiling_info_cb *)intel_gpgpu_set_profiling_info;
+ cl_gpgpu_get_profiling_info = (cl_gpgpu_get_profiling_info_cb *)intel_gpgpu_get_profiling_info;
+ cl_gpgpu_map_profiling_buffer = (cl_gpgpu_map_profiling_buffer_cb *)intel_gpgpu_map_profiling_buf;
+ cl_gpgpu_unmap_profiling_buffer = (cl_gpgpu_unmap_profiling_buffer_cb *)intel_gpgpu_unmap_profiling_buf_addr;
+ cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf;
+ cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf;
+ cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr;
+ cl_gpgpu_release_printf_buffer = (cl_gpgpu_release_printf_buffer_cb *)intel_gpgpu_release_printf_buf;
+ cl_gpgpu_set_printf_info = (cl_gpgpu_set_printf_info_cb *)intel_gpgpu_set_printf_info;
+ cl_gpgpu_get_printf_info = (cl_gpgpu_get_printf_info_cb *)intel_gpgpu_get_printf_info;
+ cl_gpgpu_set_kernel = (cl_gpgpu_set_kernel_cb *)intel_gpgpu_set_kernel;
+ cl_gpgpu_get_kernel = (cl_gpgpu_get_kernel_cb *)intel_gpgpu_get_kernel;
+
+ if (IS_BROADWELL(device_id) || IS_CHERRYVIEW(device_id)) {
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8;
+ intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
+ cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen8;
+ intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
+ intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //BDW need not restore SLM, same as gen7
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
+ if(IS_CHERRYVIEW(device_id))
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail;
+ intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen8;
+ intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8;
+ intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8;
+ cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8;
+ intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8;
+ intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
+ intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
+ cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
+ intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8;
+ intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
+ cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen8;
+ return;
+ }
+ if (IS_GEN9(device_id)) {
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
+ intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
+ cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
+ intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
+ intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //SKL need not restore SLM, same as gen7
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
+ intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen9;
+ intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen9;
+ intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8;
+ cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8;
+ intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen9;
+ intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
+ intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
+ cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
+ intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8;
+ intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen9;
+ cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen8;
+ return;
+ }
+
+ cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen7;
+ intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen7;
+ intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen7;
+ cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen7;
+ intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen7;
+ intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen7;
+ intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen7;
+ intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
+
+ if (IS_HASWELL(device_id)) {
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
+ intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75;
+ cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen75;
+ intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75;
+ intel_gpgpu_post_action = intel_gpgpu_post_action_gen75;
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb
+ intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75;
+ intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75;
+ }
+ else if (IS_IVYBRIDGE(device_id)) {
+ cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
+ cl_gpgpu_bind_image_for_vme = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_for_vme_gen7;
+ if (IS_BAYTRAIL_T(device_id)) {
+ intel_gpgpu_set_L3 = intel_gpgpu_set_L3_baytrail;
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail;
+ } else {
+ intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen7;
+ intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
+ }
+ cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen7;
+ intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7;
+ intel_gpgpu_post_action = intel_gpgpu_post_action_gen7;
+ intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7;
+ intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
+ }
+}
diff --git a/src/gen/intel_gpgpu.h b/src/gen/intel_gpgpu.h
new file mode 100644
index 0000000..2a67118
--- /dev/null
+++ b/src/gen/intel_gpgpu.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Alexei Soupikov <alexei.soupikov at intel.com>
+ */
+
+#ifndef __INTEL_GPGPU_H__
+#define __INTEL_GPGPU_H__
+
+#include "cl_utils.h"
+#include "cl_driver.h"
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+
+/* We can bind only a limited number of buffers */
+enum { max_buf_n = 128 };
+
+enum { max_img_n = 128};
+
+enum {max_sampler_n = 16 };
+
+struct intel_driver;
+struct intel_batchbuffer;
+
+/* Handle GPGPU state */
+struct intel_gpgpu
+{
+ void* ker_opaque;
+ void* printf_info;
+ void* profiling_info;
+ struct intel_driver *drv;
+ struct intel_batchbuffer *batch;
+ cl_gpgpu_kernel *ker;
+ drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */
+ uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
+ uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer */
+ uint32_t binded_n; /* number of buffers binded */
+ void *kernel; /* cl_kernel with this gpgpu */
+
+ unsigned long img_bitmap; /* image usage bitmap. */
+ unsigned int img_index_base; /* base index for image surface.*/
+
+ unsigned long sampler_bitmap; /* sampler usage bitmap. */
+
+ struct { drm_intel_bo *bo; } stack_b;
+ struct { drm_intel_bo *bo; } perf_b;
+ struct { drm_intel_bo *bo; } scratch_b;
+ struct { drm_intel_bo *bo; } constant_b;
+ struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
+ struct { drm_intel_bo *bo; } printf_b; /* the printf buf and index buf*/
+ struct { drm_intel_bo *bo; } profiling_b; /* the buf for profiling*/
+ struct { drm_intel_bo *bo; } aux_buf;
+ struct {
+ uint32_t surface_heap_offset;
+ uint32_t curbe_offset;
+ uint32_t idrt_offset;
+ uint32_t sampler_state_offset;
+ uint32_t sampler_border_color_state_offset;
+ } aux_offset;
+
+ uint32_t per_thread_scratch;
+ struct {
+ uint32_t num_cs_entries;
+ uint32_t size_cs_entry; /* size of one entry in 512bit elements */
+ } curb;
+
+ uint32_t max_threads; /* max threads requested by the user */
+};
+
+struct intel_gpgpu_node {
+ struct intel_gpgpu *gpgpu;
+ struct intel_gpgpu_node *next;
+};
+
+
+/* Set the gpgpu related call backs */
+extern void intel_set_gpgpu_callbacks(int device_id);
+
+#endif /* __INTEL_GPGPU_H__ */
+
diff --git a/src/gen/intel_structs.h b/src/gen/intel_structs.h
new file mode 100644
index 0000000..c112a16
--- /dev/null
+++ b/src/gen/intel_structs.h
@@ -0,0 +1,832 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __INTEL_STRUCTS_H__
+#define __INTEL_STRUCTS_H__
+
+#include <stdint.h>
+
+typedef struct gen6_interface_descriptor
+{
+ struct {
+ uint32_t pad6:6;
+ uint32_t kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ uint32_t pad:7;
+ uint32_t software_exception:1;
+ uint32_t pad2:3;
+ uint32_t maskstack_exception:1;
+ uint32_t pad3:1;
+ uint32_t illegal_opcode_exception:1;
+ uint32_t pad4:2;
+ uint32_t floating_point_mode:1;
+ uint32_t thread_priority:1;
+ uint32_t single_program_flow:1;
+ uint32_t pad5:1;
+ uint32_t pad6:6;
+ uint32_t pad7:6;
+ } desc1;
+
+ struct {
+ uint32_t pad:2;
+ uint32_t sampler_count:3;
+ uint32_t sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ uint32_t binding_table_entry_count:5; /* prefetch entries only */
+ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
+ } desc3;
+
+ struct {
+ uint32_t curbe_read_offset:16; /* in GRFs */
+ uint32_t curbe_read_len:16; /* in GRFs */
+ } desc4;
+
+ struct {
+ uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */
+ uint32_t barrier_return_byte:8;
+ uint32_t slm_sz:5; /* 0..16 - 0K..64K */
+ uint32_t barrier_enable:1;
+ uint32_t rounding_mode:2;
+ uint32_t barrier_return_grf_offset:8;
+ } desc5;
+
+ uint32_t desc6; /* unused */
+ uint32_t desc7; /* unused */
+} gen6_interface_descriptor_t;
+
+typedef struct gen8_interface_descriptor
+{
+ struct {
+ uint32_t pad6:6;
+ uint32_t kernel_start_pointer:26;
+ } desc0;
+ struct {
+ uint32_t kernel_start_pointer_high:16;
+ uint32_t pad6:16;
+ } desc1;
+
+ struct {
+ uint32_t pad:7;
+ uint32_t software_exception:1;
+ uint32_t pad2:3;
+ uint32_t maskstack_exception:1;
+ uint32_t pad3:1;
+ uint32_t illegal_opcode_exception:1;
+ uint32_t pad4:2;
+ uint32_t floating_point_mode:1;
+ uint32_t thread_priority:1;
+ uint32_t single_program_flow:1;
+ uint32_t denorm_mode:1;
+ uint32_t thread_preemption_disable:1;
+ uint32_t pad5:11;
+ } desc2;
+
+ struct {
+ uint32_t pad:2;
+ uint32_t sampler_count:3;
+ uint32_t sampler_state_pointer:27;
+ } desc3;
+
+ struct {
+ uint32_t binding_table_entry_count:5; /* prefetch entries only */
+ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
+ } desc4;
+
+ struct {
+ uint32_t curbe_read_offset:16; /* in GRFs */
+ uint32_t curbe_read_len:16; /* in GRFs */
+ } desc5;
+
+ struct {
+ uint32_t group_threads_num:10; /* 0..64, 0 - no barrier use */
+ uint32_t pad:5;
+ uint32_t global_barrier_enable:1;
+ uint32_t slm_sz:5; /* 0..16 - 0K..64K */
+ uint32_t barrier_enable:1;
+ uint32_t rounding_mode:2;
+ uint32_t barrier_return_grf_offset:8;
+ } desc6;
+
+ uint32_t desc7; /* unused */
+} gen8_interface_descriptor_t;
+
+typedef struct gen7_surface_state
+{
+ struct {
+ uint32_t cube_pos_z:1;
+ uint32_t cube_neg_z:1;
+ uint32_t cube_pos_y:1;
+ uint32_t cube_neg_y:1;
+ uint32_t cube_pos_x:1;
+ uint32_t cube_neg_x:1;
+ uint32_t media_boundary_pixel_mode:2;
+ uint32_t render_cache_rw_mode:1;
+ uint32_t pad1:1;
+ uint32_t surface_array_spacing:1;
+ uint32_t vertical_line_stride_offset:1;
+ uint32_t vertical_line_stride:1;
+ uint32_t tile_walk:1;
+ uint32_t tiled_surface:1;
+ uint32_t horizontal_alignment:1;
+ uint32_t vertical_alignment:2;
+ uint32_t surface_format:9;
+ uint32_t pad0:1;
+ uint32_t surface_array:1;
+ uint32_t surface_type:3;
+ } ss0;
+
+ struct {
+ uint32_t base_addr;
+ } ss1;
+
+ struct {
+ uint32_t width:14;
+ uint32_t pad1:2;
+ uint32_t height:14;
+ uint32_t pad0:2;
+ } ss2;
+
+ struct {
+ uint32_t pitch:18;
+ uint32_t pad0:3;
+ uint32_t depth:11;
+ } ss3;
+
+ union {
+ struct {
+ uint32_t mulsample_pal_idx:3;
+ uint32_t numer_mulsample:3;
+ uint32_t mss_fmt:1;
+ uint32_t rt_view_extent:11;
+ uint32_t min_array_element:11;
+ uint32_t rt_rotate:2;
+ uint32_t pad0:1;
+ } not_str_buf;
+ } ss4;
+
+ struct {
+ uint32_t mip_count:4;
+ uint32_t surface_min_load:4;
+ uint32_t pad2:6;
+ uint32_t coherence_type:1;
+ uint32_t stateless_force_write_thru:1;
+ uint32_t cache_control:4;
+ uint32_t y_offset:4;
+ uint32_t pad0:1;
+ uint32_t x_offset:7;
+ } ss5;
+
+ uint32_t ss6; /* unused */
+
+ struct {
+ uint32_t min_lod:12;
+ uint32_t pad0:4;
+ uint32_t shader_a:3;
+ uint32_t shader_b:3;
+ uint32_t shader_g:3;
+ uint32_t shader_r:3;
+ uint32_t pad1:4;
+ } ss7;
+} gen7_surface_state_t;
+
+typedef struct gen8_surface_state
+{
+ struct {
+ uint32_t cube_pos_z:1;
+ uint32_t cube_neg_z:1;
+ uint32_t cube_pos_y:1;
+ uint32_t cube_neg_y:1;
+ uint32_t cube_pos_x:1;
+ uint32_t cube_neg_x:1;
+ uint32_t media_boundary_pixel_mode:2;
+ uint32_t render_cache_rw_mode:1;
+ uint32_t sampler_L2_bypass_mode:1;
+ uint32_t vertical_line_stride_offset:1;
+ uint32_t vertical_line_stride:1;
+ uint32_t tile_mode:2;
+ uint32_t horizontal_alignment:2;
+ uint32_t vertical_alignment:2;
+ uint32_t surface_format:9;
+ uint32_t pad0:1;
+ uint32_t surface_array:1;
+ uint32_t surface_type:3;
+ } ss0;
+
+ struct {
+ uint32_t surface_qpitch:15;
+ uint32_t pad0:3;
+ uint32_t pad1:1;
+ uint32_t base_mip_level:5;
+ uint32_t mem_obj_ctrl_state:7;
+ uint32_t pad2:1;
+ } ss1;
+
+ struct {
+ uint32_t width:14;
+ uint32_t pad1:2;
+ uint32_t height:14;
+ uint32_t pad0:2;
+ } ss2;
+
+ struct {
+ uint32_t surface_pitch:18;
+ uint32_t pad1:2;
+ uint32_t pad0:1;
+ uint32_t depth:11;
+ } ss3;
+
+ struct {
+ union {
+ struct {
+ uint32_t multisample_pos_palette_idx:3;
+ uint32_t multisample_num:3;
+ uint32_t multisample_format:1;
+ uint32_t render_target_view_ext:11;
+ uint32_t min_array_elt:11;
+ uint32_t render_target_and_sample_rotation:2;
+ uint32_t pad1:1;
+ };
+
+ uint32_t pad0;
+ };
+ } ss4;
+
+ struct {
+ uint32_t mip_count:4;
+ uint32_t surface_min_lod:4;
+ uint32_t pad5:4;
+ uint32_t pad4:2;
+ uint32_t conherency_type:1;
+ uint32_t pad3:3;
+ uint32_t pad2:2;
+ uint32_t cube_ewa:1;
+ uint32_t y_offset:3;
+ uint32_t pad0:1;
+ uint32_t x_offset:7;
+ } ss5;
+
+ struct {
+ union {
+ union {
+ struct {
+ uint32_t aux_surface_mode:3;
+ uint32_t aux_surface_pitch:9;
+ uint32_t pad3:4;
+ };
+ struct {
+ uint32_t uv_plane_y_offset:14;
+ uint32_t pad2:2;
+ };
+ };
+
+ struct {
+ uint32_t uv_plane_x_offset:14;
+ uint32_t pad1:1;
+ uint32_t seperate_uv_plane_enable:1;
+ };
+ struct {
+ uint32_t aux_sruface_qpitch:15;
+ uint32_t pad0:1;
+ };
+ };
+ } ss6;
+
+ struct {
+ uint32_t resource_min_lod:12;
+ uint32_t pad0:4;
+ uint32_t shader_channel_select_alpha:3;
+ uint32_t shader_channel_select_blue:3;
+ uint32_t shader_channel_select_green:3;
+ uint32_t shader_channel_select_red:3;
+ uint32_t alpha_clear_color:1;
+ uint32_t blue_clear_color:1;
+ uint32_t green_clear_color:1;
+ uint32_t red_clear_color:1;
+ } ss7;
+
+ struct {
+ uint32_t surface_base_addr_lo;
+ } ss8;
+
+ struct {
+ uint32_t surface_base_addr_hi;
+ } ss9;
+
+ struct {
+ uint32_t pad0:12;
+ uint32_t aux_base_addr_lo:20;
+ } ss10;
+
+ struct {
+ uint32_t aux_base_addr_hi:32;
+ } ss11;
+
+ struct {
+ uint32_t pad0;
+ } ss12;
+
+ /* 13~15 have meaning only when aux surface mode == AUX_HIZ */
+ struct {
+ uint32_t pad0;
+ } ss13;
+ struct {
+ uint32_t pad0;
+ } ss14;
+ struct {
+ uint32_t pad0;
+ } ss15;
+} gen8_surface_state_t;
+
+typedef struct gen7_media_surface_state
+{
+ struct {
+ uint32_t base_addr;
+ } ss0;
+
+ struct {
+ uint32_t uv_offset_v_direction:2;
+ uint32_t pic_struct:2;
+ uint32_t width:14;
+ uint32_t height:14;
+ } ss1;
+
+ struct {
+ uint32_t tile_mode:2;
+ uint32_t half_pitch_for_chroma:1;
+ uint32_t surface_pitch:18;
+ uint32_t pad1:1;
+ uint32_t surface_object_control_state:4;
+ uint32_t pad0:1;
+ uint32_t interleave_chroma:1;
+ uint32_t surface_format:4;
+ } ss2;
+
+ struct {
+ uint32_t y_offset_for_u:14;
+ uint32_t pad1:2;
+ uint32_t x_offset_for_u:14;
+ uint32_t pad0:2;
+ } ss3;
+
+ struct {
+ uint32_t y_offset_for_v:15;
+ uint32_t pad1:1;
+ uint32_t x_offset_for_v:14;
+ uint32_t pad0:2;
+ } ss4;
+
+ struct {
+ uint32_t pad0;
+ } ss5;
+
+ struct {
+ uint32_t pad0;
+ } ss6;
+
+ struct {
+ uint32_t pad0;
+ } ss7;
+} gen7_media_surface_state_t;
+
+typedef union gen_surface_state
+{
+ gen7_surface_state_t gen7_surface_state;
+ gen8_surface_state_t gen8_surface_state;
+} gen_surface_state_t;
+
+static const size_t surface_state_sz = sizeof(gen_surface_state_t);
+
+typedef struct gen6_vfe_state_inline
+{
+ struct {
+ uint32_t per_thread_scratch_space:4;
+ uint32_t pad3:3;
+ uint32_t extend_vfe_state_present:1;
+ uint32_t pad2:2;
+ uint32_t scratch_base:22;
+ } vfe0;
+
+ struct {
+ uint32_t debug_counter_control:2;
+ uint32_t gpgpu_mode:1; /* 0 for SNB!!! */
+ uint32_t gateway_mmio_access:2;
+ uint32_t fast_preempt:1;
+ uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */
+ uint32_t reset_gateway_timer:1;
+ uint32_t urb_entries:8;
+ uint32_t max_threads:16;
+ } vfe1;
+
+ struct {
+ uint32_t pad8:8;
+ uint32_t debug_object_id:24;
+ } vfe2;
+
+ struct {
+ uint32_t curbe_size:16; /* in GRFs */
+ uint32_t urb_size:16; /* in GRFs */
+ } vfe3;
+
+ struct {
+ uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */
+ } vfe4;
+
+ struct {
+ uint32_t scoreboard0_dx:4;
+ uint32_t scoreboard0_dy:4;
+ uint32_t scoreboard1_dx:4;
+ uint32_t scoreboard1_dy:4;
+ uint32_t scoreboard2_dx:4;
+ uint32_t scoreboard2_dy:4;
+ uint32_t scoreboard3_dx:4;
+ uint32_t scoreboard3_dy:4;
+ } vfe5;
+
+ struct {
+ uint32_t scoreboard4_dx:4;
+ uint32_t scoreboard4_dy:4;
+ uint32_t scoreboard5_dx:4;
+ uint32_t scoreboard5_dy:4;
+ uint32_t scoreboard6_dx:4;
+ uint32_t scoreboard6_dy:4;
+ uint32_t scoreboard7_dx:4;
+ uint32_t scoreboard7_dy:4;
+ } vfe6;
+} gen6_vfe_state_inline_t;
+
+typedef struct gen6_pipe_control
+{
+ struct {
+ uint32_t length : BITFIELD_RANGE(0, 7);
+ uint32_t reserved : BITFIELD_RANGE(8, 15);
+ uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23);
+ uint32_t instruction_opcode : BITFIELD_RANGE(24, 26);
+ uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28);
+ uint32_t instruction_type : BITFIELD_RANGE(29, 31);
+ } dw0;
+
+ struct {
+ uint32_t depth_cache_flush_enable : BITFIELD_BIT(0);
+ uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1);
+ uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2);
+ uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3);
+ uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4);
+ uint32_t dc_flush_enable : BITFIELD_BIT(5);
+ uint32_t protected_memory_app_id : BITFIELD_BIT(6);
+ uint32_t pipe_control_flush_enable : BITFIELD_BIT(7);
+ uint32_t notify_enable : BITFIELD_BIT(8);
+ uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9);
+ uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10);
+ uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11);
+ uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12);
+ uint32_t depth_stall_enable : BITFIELD_BIT(13);
+ uint32_t post_sync_operation : BITFIELD_RANGE(14, 15);
+ uint32_t generic_media_state_clear : BITFIELD_BIT(16);
+ uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17);
+ uint32_t tlb_invalidate : BITFIELD_BIT(18);
+ uint32_t global_snapshot_count_reset : BITFIELD_BIT(19);
+ uint32_t cs_stall : BITFIELD_BIT(20);
+ uint32_t store_data_index : BITFIELD_BIT(21);
+ uint32_t protected_memory_enable : BITFIELD_BIT(22);
+ uint32_t reserved : BITFIELD_RANGE(23, 31);
+ } dw1;
+
+ struct {
+ uint32_t reserved : BITFIELD_RANGE(0, 1);
+ uint32_t destination_address_type : BITFIELD_BIT(2);
+ uint32_t address : BITFIELD_RANGE(3, 31);
+ } dw2;
+
+ struct {
+ uint32_t data;
+ } dw3;
+
+ struct {
+ uint32_t data;
+ } dw4;
+} gen6_pipe_control_t;
+
+typedef struct gen8_pipe_control
+{
+ struct {
+ uint32_t length : BITFIELD_RANGE(0, 7);
+ uint32_t reserved : BITFIELD_RANGE(8, 15);
+ uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23);
+ uint32_t instruction_opcode : BITFIELD_RANGE(24, 26);
+ uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28);
+ uint32_t instruction_type : BITFIELD_RANGE(29, 31);
+ } dw0;
+
+ struct {
+ uint32_t depth_cache_flush_enable : BITFIELD_BIT(0);
+ uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1);
+ uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2);
+ uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3);
+ uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4);
+ uint32_t dc_flush_enable : BITFIELD_BIT(5);
+ uint32_t protected_memory_app_id : BITFIELD_BIT(6);
+ uint32_t pipe_control_flush_enable : BITFIELD_BIT(7);
+ uint32_t notify_enable : BITFIELD_BIT(8);
+ uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9);
+ uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10);
+ uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11);
+ uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12);
+ uint32_t depth_stall_enable : BITFIELD_BIT(13);
+ uint32_t post_sync_operation : BITFIELD_RANGE(14, 15);
+ uint32_t generic_media_state_clear : BITFIELD_BIT(16);
+ uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17);
+ uint32_t tlb_invalidate : BITFIELD_BIT(18);
+ uint32_t global_snapshot_count_reset : BITFIELD_BIT(19);
+ uint32_t cs_stall : BITFIELD_BIT(20);
+ uint32_t store_data_index : BITFIELD_BIT(21);
+ uint32_t protected_memory_enable : BITFIELD_BIT(22);
+ uint32_t reserved : BITFIELD_RANGE(23, 31);
+ } dw1;
+
+ struct {
+ uint32_t reserved : BITFIELD_RANGE(0, 1);
+ uint32_t destination_address_type : BITFIELD_BIT(2);
+ uint32_t address : BITFIELD_RANGE(3, 31);
+ } dw2;
+
+ struct {
+ uint32_t data;
+ } dw3;
+
+ struct {
+ uint32_t data;
+ } dw4;
+
+ struct {
+ uint32_t data;
+ } dw5;
+} gen8_pipe_control_t;
+
+#define GEN7_NUM_VME_SEARCH_PATH_STATES 14
+#define GEN7_NUM_VME_RD_LUT_SETS 4
+
+typedef struct gen7_vme_search_path_state
+{
+ struct {
+ uint32_t SPD_0_X : BITFIELD_RANGE(0, 3); //search path distance
+ uint32_t SPD_0_Y : BITFIELD_RANGE(4, 7);
+ uint32_t SPD_1_X : BITFIELD_RANGE(8, 11);
+ uint32_t SPD_1_Y : BITFIELD_RANGE(12, 15);
+ uint32_t SPD_2_X : BITFIELD_RANGE(16, 19);
+ uint32_t SPD_2_Y : BITFIELD_RANGE(20, 23);
+ uint32_t SPD_3_X : BITFIELD_RANGE(24, 27);
+ uint32_t SPD_3_Y : BITFIELD_RANGE(28, 31);
+ }dw0;
+}gen7_vme_search_path_state_t;
+
+typedef struct gen7_vme_rd_lut_set
+{
+ struct {
+ uint32_t LUT_MbMode_0 : BITFIELD_RANGE(0, 7);
+ uint32_t LUT_MbMode_1 : BITFIELD_RANGE(8, 15);
+ uint32_t LUT_MbMode_2 : BITFIELD_RANGE(16, 23);
+ uint32_t LUT_MbMode_3 : BITFIELD_RANGE(24, 31);
+ }dw0;
+
+ struct {
+ uint32_t LUT_MbMode_4 : BITFIELD_RANGE(0, 7);
+ uint32_t LUT_MbMode_5 : BITFIELD_RANGE(8, 15);
+ uint32_t LUT_MbMode_6 : BITFIELD_RANGE(16, 23);
+ uint32_t LUT_MbMode_7 : BITFIELD_RANGE(24, 31);
+ }dw1;
+
+ struct {
+ uint32_t LUT_MV_0 : BITFIELD_RANGE(0, 7);
+ uint32_t LUT_MV_1 : BITFIELD_RANGE(8, 15);
+ uint32_t LUT_MV_2 : BITFIELD_RANGE(16, 23);
+ uint32_t LUT_MV_3 : BITFIELD_RANGE(24, 31);
+ }dw2;
+
+ struct {
+ uint32_t LUT_MV_4 : BITFIELD_RANGE(0, 7);
+ uint32_t LUT_MV_5 : BITFIELD_RANGE(8, 15);
+ uint32_t LUT_MV_6 : BITFIELD_RANGE(16, 23);
+ uint32_t LUT_MV_7 : BITFIELD_RANGE(24, 31);
+ }dw3;
+}gen7_vme_rd_lut_set_t;
+
+typedef struct gen7_vme_state
+{
+ gen7_vme_search_path_state_t sp[GEN7_NUM_VME_SEARCH_PATH_STATES];
+
+ struct {
+ uint32_t LUT_MbMode_8_0 : BITFIELD_RANGE(0, 7);
+ uint32_t LUT_MbMode_9_0 : BITFIELD_RANGE(8, 15);
+ uint32_t LUT_MbMode_8_1 : BITFIELD_RANGE(16, 23);
+ uint32_t LUT_MbMode_9_1 : BITFIELD_RANGE(24, 31);
+ }dw14;
+
+ struct {
+ uint32_t LUT_MbMode_8_2 : BITFIELD_RANGE(0, 7);
+ uint32_t LUT_MbMode_9_2 : BITFIELD_RANGE(8, 15);
+ uint32_t LUT_MbMode_8_3 : BITFIELD_RANGE(16, 23);
+ uint32_t LUT_MbMode_9_3 : BITFIELD_RANGE(24, 31);
+ }dw15;
+
+ gen7_vme_rd_lut_set_t lut[GEN7_NUM_VME_RD_LUT_SETS];
+}gen7_vme_state_t;
+
+typedef struct gen6_sampler_state
+{
+ struct {
+ uint32_t shadow_function:3;
+ uint32_t lod_bias:11;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t min_mag_neq:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t cube_control_mode:1;
+ uint32_t pad:2;
+ uint32_t max_lod:10;
+ uint32_t min_lod:10;
+ } ss1;
+
+ struct {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct {
+ uint32_t non_normalized_coord:1;
+ uint32_t pad:12;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t monochrome_filter_width:3;
+ uint32_t monochrome_filter_height:3;
+ } ss3;
+} gen6_sampler_state_t;
+
+typedef struct gen7_sampler_border_color {
+ float r,g,b,a;
+} gen7_sampler_border_color_t;
+
+typedef struct gen7_sampler_state
+{
+ struct {
+ uint32_t aniso_algorithm:1;
+ uint32_t lod_bias:13;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t pad1:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct {
+ uint32_t cube_control_mode:1;
+ uint32_t shadow_function:3;
+ uint32_t pad:4;
+ uint32_t max_lod:12;
+ uint32_t min_lod:12;
+ } ss1;
+
+ struct {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t pad:1;
+ uint32_t non_normalized_coord:1;
+ uint32_t trilinear_quality:2;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t pad0:6;
+ } ss3;
+} gen7_sampler_state_t;
+
+STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t));
+
+typedef struct gen8_sampler_state
+{
+ struct {
+ uint32_t aniso_algorithm:1;
+ uint32_t lod_bias:13;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t lod_preclamp:2;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct {
+ uint32_t cube_control_mode:1;
+ uint32_t shadow_function:3;
+ uint32_t chromakey_mode:1;
+ uint32_t chromakey_index:2;
+ uint32_t chromakey_enable:1;
+ uint32_t max_lod:12;
+ uint32_t min_lod:12;
+ } ss1;
+
+ struct {
+ uint32_t lod_clamp_mag_mode:1;
+ uint32_t flexible_filter_valign:1;
+ uint32_t flexible_filter_halign:1;
+ uint32_t flexible_filter_coeff_size:1;
+ uint32_t flexible_filter_mode:1;
+ uint32_t pad1:1;
+ uint32_t indirect_state_ptr:18;
+ uint32_t pad0:2;
+ uint32_t sep_filter_height:2;
+ uint32_t sep_filter_width:2;
+ uint32_t sep_filter_coeff_table_size:2;
+ } ss2;
+
+ struct {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t pad:1;
+ uint32_t non_normalized_coord:1;
+ uint32_t trilinear_quality:2;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t pad0:2;
+ uint32_t non_sep_filter_footprint_mask:8;
+ } ss3;
+} gen8_sampler_state_t;
+
+STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen8_sampler_state_t));
+
+#undef BITFIELD_BIT
+#undef BITFIELD_RANGE
+
+#endif /* __INTEL_STRUCTS_H__ */
+
diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
deleted file mode 100644
index be104bb..0000000
--- a/src/intel/intel_batchbuffer.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "intel/intel_batchbuffer.h"
-#include "intel/intel_driver.h"
-#include "cl_alloc.h"
-#include "cl_utils.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-
-LOCAL int
-intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
-{
- if (batch->buffer != NULL) {
- dri_bo_unreference(batch->buffer);
- batch->buffer = NULL;
- batch->last_bo = NULL;
- }
-
- batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
- "batch buffer",
- sz,
- 64);
- if (!batch->buffer || (dri_bo_map(batch->buffer, 1) != 0)) {
- if (batch->buffer)
- dri_bo_unreference(batch->buffer);
- batch->buffer = NULL;
- return -1;
- }
- batch->map = (uint8_t*) batch->buffer->virtual;
- batch->size = sz;
- batch->ptr = batch->map;
- batch->atomic = 0;
- batch->last_bo = batch->buffer;
- batch->enable_slm = 0;
- return 0;
-}
-
-LOCAL void
-intel_batchbuffer_init(intel_batchbuffer_t *batch, intel_driver_t *intel)
-{
- assert(intel);
- batch->intel = intel;
-}
-
-LOCAL void
-intel_batchbuffer_terminate(intel_batchbuffer_t *batch)
-{
- assert(batch->buffer);
-
- if (batch->map) {
- dri_bo_unmap(batch->buffer);
- batch->map = NULL;
- }
-
- dri_bo_unreference(batch->buffer);
- batch->buffer = NULL;
-}
-
-LOCAL int
-intel_batchbuffer_flush(intel_batchbuffer_t *batch)
-{
- uint32_t used = batch->ptr - batch->map;
- int is_locked = batch->intel->locked;
- int err = 0;
-
- if (used == 0)
- return 0;
-
- if ((used & 4) == 0) {
- *(uint32_t*) batch->ptr = 0;
- batch->ptr += 4;
- }
-
- *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END;
- batch->ptr += 4;
- used = batch->ptr - batch->map;
- dri_bo_unmap(batch->buffer);
- batch->ptr = batch->map = NULL;
-
- if (!is_locked)
- intel_driver_lock_hardware(batch->intel);
-
- int flag = I915_EXEC_RENDER;
- if(batch->enable_slm) {
- /* use the hard code here temp, must change to
- * I915_EXEC_ENABLE_SLM when it drm accept the patch */
- flag |= (1<<13);
- }
- if (drm_intel_gem_bo_context_exec(batch->buffer, batch->intel->ctx, used, flag) < 0) {
- fprintf(stderr, "drm_intel_gem_bo_context_exec() failed: %s\n", strerror(errno));
- err = -1;
- }
-
- if (!is_locked)
- intel_driver_unlock_hardware(batch->intel);
-
- return err;
-}
-
-LOCAL void
-intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
- dri_bo *bo,
- uint32_t read_domains,
- uint32_t write_domains,
- uint32_t delta)
-{
- assert(batch->ptr - batch->map < batch->size);
- dri_bo_emit_reloc(batch->buffer,
- read_domains,
- write_domains,
- delta,
- batch->ptr - batch->map,
- bo);
- intel_batchbuffer_emit_dword(batch, bo->offset + delta);
-}
-
-LOCAL intel_batchbuffer_t*
-intel_batchbuffer_new(intel_driver_t *intel)
-{
- intel_batchbuffer_t *batch = NULL;
- assert(intel);
- TRY_ALLOC_NO_ERR (batch, CALLOC(intel_batchbuffer_t));
- intel_batchbuffer_init(batch, intel);
-
-exit:
- return batch;
-error:
- intel_batchbuffer_delete(batch);
- batch = NULL;
- goto exit;
-}
-
-LOCAL void
-intel_batchbuffer_delete(intel_batchbuffer_t *batch)
-{
- if (batch == NULL)
- return;
- if(batch->buffer)
- intel_batchbuffer_terminate(batch);
-
- cl_free(batch);
-}
diff --git a/src/intel/intel_batchbuffer.h b/src/intel/intel_batchbuffer.h
deleted file mode 100644
index 0544e9a..0000000
--- a/src/intel/intel_batchbuffer.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-#ifndef _INTEL_BATCHBUFFER_H_
-#define _INTEL_BATCHBUFFER_H_
-
-#include "intel_defines.h"
-#include "cl_utils.h"
-
-#include <xf86drm.h>
-#include <drm.h>
-#include <i915_drm.h>
-#include <intel_bufmgr.h>
-#include <stdint.h>
-#include <memory.h>
-#include <assert.h>
-
-#define BEGIN_BATCH(b, n) do { \
- intel_batchbuffer_require_space(b, (n) * 4); \
-} while (0)
-
-#define OUT_BATCH(b, d) do { \
- intel_batchbuffer_emit_dword(b, d); \
-} while (0)
-
-#define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \
- assert((delta) >= 0); \
- intel_batchbuffer_emit_reloc(b, bo, read_domains, write_domain, delta); \
-} while (0)
-
-#define ADVANCE_BATCH(b) do { } while (0)
-
-struct intel_driver;
-
-typedef struct intel_batchbuffer
-{
- struct intel_driver *intel;
- drm_intel_bo *buffer;
- /** Last bo submitted to the hardware. used for clFinish. */
- drm_intel_bo *last_bo;
- uint32_t size;
- uint8_t *map;
- uint8_t *ptr;
- /** HSW: can't set LRI in batch buffer, set I915_EXEC_ENABLE_SLM
- * flag when call exec. */
- uint8_t enable_slm;
- int atomic;
-} intel_batchbuffer_t;
-
-extern intel_batchbuffer_t* intel_batchbuffer_new(struct intel_driver*);
-extern void intel_batchbuffer_delete(intel_batchbuffer_t*);
-extern void intel_batchbuffer_emit_reloc(intel_batchbuffer_t*,
- drm_intel_bo*,
- uint32_t read_domains,
- uint32_t write_domains,
- uint32_t delta);
-extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*);
-extern void intel_batchbuffer_terminate(intel_batchbuffer_t*);
-extern int intel_batchbuffer_flush(intel_batchbuffer_t*);
-extern int intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz);
-
-static INLINE uint32_t
-intel_batchbuffer_space(const intel_batchbuffer_t *batch)
-{
- assert(batch->ptr);
- return batch->size - (batch->ptr - batch->map);
-}
-
-static INLINE void
-intel_batchbuffer_emit_dword(intel_batchbuffer_t *batch, uint32_t x)
-{
- assert(intel_batchbuffer_space(batch) >= 4);
- *(uint32_t*)batch->ptr = x;
- batch->ptr += 4;
-}
-
-static INLINE void
-intel_batchbuffer_require_space(intel_batchbuffer_t *batch, uint32_t size) {
- assert(size < batch->size - 8);
- if (intel_batchbuffer_space(batch) < size)
- intel_batchbuffer_space(batch);
-}
-
-static INLINE uint8_t*
-intel_batchbuffer_alloc_space(intel_batchbuffer_t *batch, uint32_t size)
-{
- assert(intel_batchbuffer_space(batch) >= size);
- uint8_t *space_ptr = batch->ptr;
- batch->ptr += size;
- return space_ptr;
-}
-
-static INLINE void
-intel_batchbuffer_start_atomic(intel_batchbuffer_t *batch, uint32_t size)
-{
- assert(!batch->atomic);
- intel_batchbuffer_require_space(batch, size);
- batch->atomic = 1;
-}
-
-static INLINE void
-intel_batchbuffer_end_atomic(intel_batchbuffer_t *batch)
-{
- assert(batch->atomic);
- batch->atomic = 0;
-}
-
-#endif /* _INTEL_BATCHBUFFER_H_ */
-
diff --git a/src/intel/intel_cl_gl_share_image_info.h b/src/intel/intel_cl_gl_share_image_info.h
deleted file mode 100644
index 21fbbd1..0000000
--- a/src/intel/intel_cl_gl_share_image_info.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef __INTEL_CL_GL_SHARE_IMAGE_INFO_
-#define __INTEL_CL_GL_SHARE_IMAGE_INFO_
-
-struct _intel_cl_gl_share_image_info {
- int fd;
- size_t w;
- size_t h;
- size_t depth;
- size_t pitch;
- int tiling;
- size_t offset;
- size_t tile_x;
- size_t tile_y;
- unsigned int gl_format;
- size_t row_pitch, slice_pitch;
-};
-
-#endif
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
deleted file mode 100644
index 6ada30c..0000000
--- a/src/intel/intel_defines.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith at tungstengraphics.com>
- */
-#ifndef __GENX_DEFINES_H__
-#define __GENX_DEFINES_H__
-
-#define CMD(PIPELINE,OP,SUB_OP) ((3 << 29) | \
- ((PIPELINE) << 27) | \
- ((OP) << 24) | \
- ((SUB_OP) << 16))
-
-#define CMD_URB_FENCE CMD(0, 0, 0)
-#define CMD_CS_URB_STATE CMD(0, 0, 1)
-#define CMD_CONSTANT_BUFFER CMD(0, 0, 2)
-#define CMD_STATE_PREFETCH CMD(0, 0, 3)
-#define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3)
-#define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4)
-#define CMD_GPGPU_WALKER CMD(2, 1, 5)
-#define CMD_PIPE_CONTROL CMD(3, 2, 0)
-
-#define CMD_LOAD_REGISTER_IMM (0x22 << 23)
-
-#define CMD_STATE_BASE_ADDRESS CMD(0, 1, 1)
-#define CMD_STATE_SIP CMD(0, 1, 2)
-#define CMD_PIPELINE_SELECT CMD(1, 1, 4)
-#define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2)
-
-#define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0)
-#define CMD_MEDIA CMD(2, 1, 0)
-#define CMD_MEDIA_EX CMD(2, 1, 1)
-
-#define CMD_PIPELINED_POINTERS CMD(3, 0, 0)
-#define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1)
-#define CMD_VERTEX_BUFFERS CMD(3, 0, 8)
-#define CMD_VERTEX_ELEMENTS CMD(3, 0, 9)
-#define CMD_DRAWING_RECTANGLE CMD(3, 1, 0)
-#define CMD_CONSTANT_COLOR CMD(3, 1, 1)
-#define CMD_3DPRIMITIVE CMD(3, 3, 0)
-
-#define BASE_ADDRESS_MODIFY (1 << 0)
-
-#define PIPELINE_SELECT_3D 0
-#define PIPELINE_SELECT_MEDIA 1
-#define PIPELINE_SELECT_GPGPU 2
-#define PIPELINE_SELECT_MASK (3 << 8)
-
-#define UF0_CS_REALLOC (1 << 13)
-#define UF0_VFE_REALLOC (1 << 12)
-#define UF0_SF_REALLOC (1 << 11)
-#define UF0_CLIP_REALLOC (1 << 10)
-#define UF0_GS_REALLOC (1 << 9)
-#define UF0_VS_REALLOC (1 << 8)
-#define UF1_CLIP_FENCE_SHIFT 20
-#define UF1_GS_FENCE_SHIFT 10
-#define UF1_VS_FENCE_SHIFT 0
-#define UF2_CS_FENCE_SHIFT 20
-#define UF2_VFE_FENCE_SHIFT 10
-#define UF2_SF_FENCE_SHIFT 0
-
-#define FLOATING_POINT_IEEE_754 0
-#define FLOATING_POINT_NON_IEEE_754 1
-
-#define I965_SURFACE_1D 0
-#define I965_SURFACE_2D 1
-#define I965_SURFACE_3D 2
-#define I965_SURFACE_CUBE 3
-#define I965_SURFACE_BUFFER 4
-#define I965_SURFACE_NULL 7
-
-#define I965_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
-#define I965_SURFACEFORMAT_R32G32B32A32_SINT 0x001
-#define I965_SURFACEFORMAT_R32G32B32A32_UINT 0x002
-#define I965_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
-#define I965_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
-#define I965_SURFACEFORMAT_R64G64_FLOAT 0x005
-#define I965_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
-#define I965_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
-#define I965_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
-#define I965_SURFACEFORMAT_R32G32B32_FLOAT 0x040
-#define I965_SURFACEFORMAT_R32G32B32_SINT 0x041
-#define I965_SURFACEFORMAT_R32G32B32_UINT 0x042
-#define I965_SURFACEFORMAT_R32G32B32_UNORM 0x043
-#define I965_SURFACEFORMAT_R32G32B32_SNORM 0x044
-#define I965_SURFACEFORMAT_R32G32B32_SSCALED 0x045
-#define I965_SURFACEFORMAT_R32G32B32_USCALED 0x046
-#define I965_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
-#define I965_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
-#define I965_SURFACEFORMAT_R16G16B16A16_SINT 0x082
-#define I965_SURFACEFORMAT_R16G16B16A16_UINT 0x083
-#define I965_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
-#define I965_SURFACEFORMAT_R32G32_FLOAT 0x085
-#define I965_SURFACEFORMAT_R32G32_SINT 0x086
-#define I965_SURFACEFORMAT_R32G32_UINT 0x087
-#define I965_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
-#define I965_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
-#define I965_SURFACEFORMAT_L32A32_FLOAT 0x08A
-#define I965_SURFACEFORMAT_R32G32_UNORM 0x08B
-#define I965_SURFACEFORMAT_R32G32_SNORM 0x08C
-#define I965_SURFACEFORMAT_R64_FLOAT 0x08D
-#define I965_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
-#define I965_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
-#define I965_SURFACEFORMAT_A32X32_FLOAT 0x090
-#define I965_SURFACEFORMAT_L32X32_FLOAT 0x091
-#define I965_SURFACEFORMAT_I32X32_FLOAT 0x092
-#define I965_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
-#define I965_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
-#define I965_SURFACEFORMAT_R32G32_SSCALED 0x095
-#define I965_SURFACEFORMAT_R32G32_USCALED 0x096
-#define I965_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
-#define I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
-#define I965_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
-#define I965_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
-#define I965_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
-#define I965_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
-#define I965_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
-#define I965_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
-#define I965_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
-#define I965_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
-#define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
-#define I965_SURFACEFORMAT_R16G16_UNORM 0x0CC
-#define I965_SURFACEFORMAT_R16G16_SNORM 0x0CD
-#define I965_SURFACEFORMAT_R16G16_SINT 0x0CE
-#define I965_SURFACEFORMAT_R16G16_UINT 0x0CF
-#define I965_SURFACEFORMAT_R16G16_FLOAT 0x0D0
-#define I965_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
-#define I965_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
-#define I965_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
-#define I965_SURFACEFORMAT_R32_SINT 0x0D6
-#define I965_SURFACEFORMAT_R32_UINT 0x0D7
-#define I965_SURFACEFORMAT_R32_FLOAT 0x0D8
-#define I965_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
-#define I965_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
-#define I965_SURFACEFORMAT_L16A16_UNORM 0x0DF
-#define I965_SURFACEFORMAT_I24X8_UNORM 0x0E0
-#define I965_SURFACEFORMAT_L24X8_UNORM 0x0E1
-#define I965_SURFACEFORMAT_A24X8_UNORM 0x0E2
-#define I965_SURFACEFORMAT_I32_FLOAT 0x0E3
-#define I965_SURFACEFORMAT_L32_FLOAT 0x0E4
-#define I965_SURFACEFORMAT_A32_FLOAT 0x0E5
-#define I965_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
-#define I965_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
-#define I965_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
-#define I965_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
-#define I965_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
-#define I965_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
-#define I965_SURFACEFORMAT_L16A16_FLOAT 0x0F0
-#define I965_SURFACEFORMAT_R32_UNORM 0x0F1
-#define I965_SURFACEFORMAT_R32_SNORM 0x0F2
-#define I965_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
-#define I965_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
-#define I965_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
-#define I965_SURFACEFORMAT_R16G16_SSCALED 0x0F6
-#define I965_SURFACEFORMAT_R16G16_USCALED 0x0F7
-#define I965_SURFACEFORMAT_R32_SSCALED 0x0F8
-#define I965_SURFACEFORMAT_R32_USCALED 0x0F9
-#define I965_SURFACEFORMAT_B5G6R5_UNORM 0x100
-#define I965_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
-#define I965_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
-#define I965_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
-#define I965_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
-#define I965_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
-#define I965_SURFACEFORMAT_R8G8_UNORM 0x106
-#define I965_SURFACEFORMAT_R8G8_SNORM 0x107
-#define I965_SURFACEFORMAT_R8G8_SINT 0x108
-#define I965_SURFACEFORMAT_R8G8_UINT 0x109
-#define I965_SURFACEFORMAT_R16_UNORM 0x10A
-#define I965_SURFACEFORMAT_R16_SNORM 0x10B
-#define I965_SURFACEFORMAT_R16_SINT 0x10C
-#define I965_SURFACEFORMAT_R16_UINT 0x10D
-#define I965_SURFACEFORMAT_R16_FLOAT 0x10E
-#define I965_SURFACEFORMAT_I16_UNORM 0x111
-#define I965_SURFACEFORMAT_L16_UNORM 0x112
-#define I965_SURFACEFORMAT_A16_UNORM 0x113
-#define I965_SURFACEFORMAT_L8A8_UNORM 0x114
-#define I965_SURFACEFORMAT_I16_FLOAT 0x115
-#define I965_SURFACEFORMAT_L16_FLOAT 0x116
-#define I965_SURFACEFORMAT_A16_FLOAT 0x117
-#define I965_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
-#define I965_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
-#define I965_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
-#define I965_SURFACEFORMAT_R8G8_SSCALED 0x11C
-#define I965_SURFACEFORMAT_R8G8_USCALED 0x11D
-#define I965_SURFACEFORMAT_R16_SSCALED 0x11E
-#define I965_SURFACEFORMAT_R16_USCALED 0x11F
-#define I965_SURFACEFORMAT_R8_UNORM 0x140
-#define I965_SURFACEFORMAT_R8_SNORM 0x141
-#define I965_SURFACEFORMAT_R8_SINT 0x142
-#define I965_SURFACEFORMAT_R8_UINT 0x143
-#define I965_SURFACEFORMAT_A8_UNORM 0x144
-#define I965_SURFACEFORMAT_I8_UNORM 0x145
-#define I965_SURFACEFORMAT_L8_UNORM 0x146
-#define I965_SURFACEFORMAT_P4A4_UNORM 0x147
-#define I965_SURFACEFORMAT_A4P4_UNORM 0x148
-#define I965_SURFACEFORMAT_R8_SSCALED 0x149
-#define I965_SURFACEFORMAT_R8_USCALED 0x14A
-#define I965_SURFACEFORMAT_R1_UINT 0x181
-#define I965_SURFACEFORMAT_YCRCB_NORMAL 0x182
-#define I965_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
-#define I965_SURFACEFORMAT_BC1_UNORM 0x186
-#define I965_SURFACEFORMAT_BC2_UNORM 0x187
-#define I965_SURFACEFORMAT_BC3_UNORM 0x188
-#define I965_SURFACEFORMAT_BC4_UNORM 0x189
-#define I965_SURFACEFORMAT_BC5_UNORM 0x18A
-#define I965_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
-#define I965_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
-#define I965_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
-#define I965_SURFACEFORMAT_MONO8 0x18E
-#define I965_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
-#define I965_SURFACEFORMAT_YCRCB_SWAPY 0x190
-#define I965_SURFACEFORMAT_DXT1_RGB 0x191
-#define I965_SURFACEFORMAT_FXT1 0x192
-#define I965_SURFACEFORMAT_R8G8B8_UNORM 0x193
-#define I965_SURFACEFORMAT_R8G8B8_SNORM 0x194
-#define I965_SURFACEFORMAT_R8G8B8_SSCALED 0x195
-#define I965_SURFACEFORMAT_R8G8B8_USCALED 0x196
-#define I965_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
-#define I965_SURFACEFORMAT_R64G64B64_FLOAT 0x198
-#define I965_SURFACEFORMAT_BC4_SNORM 0x199
-#define I965_SURFACEFORMAT_BC5_SNORM 0x19A
-#define I965_SURFACEFORMAT_R16G16B16_UNORM 0x19C
-#define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D
-#define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
-#define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F
-#define I965_SURFACEFORMAT_RAW 0x1FF
-
-#define I965_MAPFILTER_NEAREST 0x0
-#define I965_MAPFILTER_LINEAR 0x1
-#define I965_MAPFILTER_ANISOTROPIC 0x2
-
-#define I965_MIPFILTER_NONE 0
-#define I965_MIPFILTER_NEAREST 1
-#define I965_MIPFILTER_LINEAR 3
-
-#define I965_TEXCOORDMODE_WRAP 0
-#define I965_TEXCOORDMODE_MIRROR 1
-#define I965_TEXCOORDMODE_CLAMP 2
-#define I965_TEXCOORDMODE_CUBE 3
-#define I965_TEXCOORDMODE_CLAMP_BORDER 4
-#define I965_TEXCOORDMODE_MIRROR_ONCE 5
-
-#define I965_SURFACERETURNFORMAT_FLOAT32 0
-#define I965_SURFACERETURNFORMAT_S1 1
-
-#define I965_TILEWALK_XMAJOR 0
-#define I965_TILEWALK_YMAJOR 1
-
-#define GEN8_TILEMODE_LINEAR 0
-#define GEN8_TILEMODE_WMAJOR 1
-#define GEN8_TILEMODE_XMAJOR 2
-#define GEN8_TILEMODE_YMAJOR 3
-
-#define I965_SURCHAN_SELECT_ZERO 0
-#define I965_SURCHAN_SELECT_ONE 1
-#define I965_SURCHAN_SELECT_RED 4
-#define I965_SURCHAN_SELECT_GREEN 5
-#define I965_SURCHAN_SELECT_BLUE 6
-#define I965_SURCHAN_SELECT_ALPHA 7
-
-#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
- IS_G4X(intel->device_id) ? 384 : 256)
-// HSW
-#define HSW_SCRATCH1_OFFSET (0xB038)
-#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C)
-
-// L3 cache stuff
-#define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010)
-#define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020)
-#define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET (0xB024)
-
-#define GEN8_L3_CNTL_REG_ADDRESS_OFFSET (0x7034)
-
-// To issue pipe controls (reset L3 / SLM or stall)
-#define GEN7_PIPE_CONTROL_MEDIA 0x2
-#define GEN7_PIPE_CONTROL_3D 0x3
-#define GEN7_PIPE_CONTROL_INSTRUCTION_GFX 0x3
-#define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2
-#define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0
-#define GEN7_PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
-#define GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
-
-
-#define GEN_MAPFILTER_NEAREST 0x0
-#define GEN_MAPFILTER_LINEAR 0x1
-#define GEN_MAPFILTER_ANISOTROPIC 0x2
-
-#define GEN_MIPFILTER_NONE 0
-#define GEN_MIPFILTER_NEAREST 1
-#define GEN_MIPFILTER_LINEAR 3
-
-#define GEN_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
-#define GEN_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
-#define GEN_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
-#define GEN_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
-#define GEN_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
-#define GEN_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
-
-#define GEN_TEXCOORDMODE_WRAP 0
-#define GEN_TEXCOORDMODE_MIRROR 1
-#define GEN_TEXCOORDMODE_CLAMP 2
-#define GEN_TEXCOORDMODE_CUBE 3
-#define GEN_TEXCOORDMODE_CLAMP_BORDER 4
-#define GEN_TEXCOORDMODE_MIRROR_ONCE 5
-
-#endif /* __GENX_DEFINES_H__ */
-
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
deleted file mode 100644
index b8a1b52..0000000
--- a/src/intel/intel_driver.c
+++ /dev/null
@@ -1,1042 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/*
- * Copyright 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Xiang Haihao <haihao.xiang at intel.com>
- * Zou Nan hai <nanhai.zou at intel.com>
- *
- */
-
-#if defined(HAS_GL_EGL)
-#define EGL_EGLEXT_PROTOTYPES
-#include "GL/gl.h"
-#include "EGL/egl.h"
-#include <EGL/eglext.h>
-#endif
-
-#ifdef HAS_X11
-#include <X11/Xlibint.h>
-#include "x11/dricommon.h"
-#endif
-
-#include "intel_driver.h"
-#include "intel_gpgpu.h"
-#include "intel_batchbuffer.h"
-#include "intel_bufmgr.h"
-#include "cl_mem.h"
-
-#include <assert.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <xf86drm.h>
-#include <stdio.h>
-
-#include "cl_utils.h"
-#include "cl_alloc.h"
-#include "cl_context.h"
-#include "cl_driver.h"
-#include "cl_device_id.h"
-#include "cl_platform_id.h"
-
-static void
-intel_driver_delete(intel_driver_t *driver)
-{
- if (driver == NULL)
- return;
-
- cl_free(driver);
-}
-
-static intel_driver_t*
-intel_driver_new(void)
-{
- intel_driver_t *driver = NULL;
-
- TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t));
- driver->fd = -1;
-
-exit:
- return driver;
-error:
-intel_driver_delete(driver);
-driver = NULL;
-goto exit;
-}
-
-/* just used for maximum relocation number in drm_intel */
-#define BATCH_SIZE 0x4000
-
-/* set OCL_DUMP_AUB=1 to get aub file */
-static void
-intel_driver_aub_dump(intel_driver_t *driver)
-{
-char *val;
-val = getenv("OCL_DUMP_AUB");
-if (!val)
- return;
-if (atoi(val) != 0) {
- drm_intel_bufmgr_gem_set_aub_filename(driver->bufmgr,
- "beignet.aub");
- drm_intel_bufmgr_gem_set_aub_dump(driver->bufmgr, 1);
-}
-}
-
-static int
-intel_driver_memman_init(intel_driver_t *driver)
-{
-driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE);
-if (!driver->bufmgr) return 0;
-drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr);
-driver->device_id = drm_intel_bufmgr_gem_get_devid(driver->bufmgr);
-intel_driver_aub_dump(driver);
-return 1;
-}
-
-static int
-intel_driver_context_init(intel_driver_t *driver)
-{
-driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
-if (!driver->ctx)
- return 0;
-driver->null_bo = NULL;
-#ifdef HAS_BO_SET_SOFTPIN
-drm_intel_bo *bo = dri_bo_alloc(driver->bufmgr, "null_bo", 64*1024, 4096);
-drm_intel_bo_set_softpin_offset(bo, 0);
-// don't reuse it, that would make two bo trying to bind to same address,
-// which is un-reasonable.
-drm_intel_bo_disable_reuse(bo);
-driver->null_bo = bo;
-#endif
-return 1;
-}
-
-static void
-intel_driver_context_destroy(intel_driver_t *driver)
-{
-if (driver->null_bo)
- drm_intel_bo_unreference(driver->null_bo);
-if(driver->ctx)
- drm_intel_gem_context_destroy(driver->ctx);
-driver->ctx = NULL;
-}
-
-static int
-intel_driver_init(intel_driver_t *driver, int dev_fd)
-{
-driver->fd = dev_fd;
-driver->locked = 0;
-pthread_mutex_init(&driver->ctxmutex, NULL);
-
-if (!intel_driver_memman_init(driver)) return 0;
-if (!intel_driver_context_init(driver)) return 0;
-
-#if EMULATE_GEN
-driver->gen_ver = EMULATE_GEN;
-if (EMULATE_GEN == 75)
- driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */
-else if (EMULATE_GEN == 7)
- driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */
-else if (EMULATE_GEN == 6)
- driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */
-else
- FATAL ("Unsupported Gen for emulation");
-#else
-if (IS_GEN9(driver->device_id))
- driver->gen_ver = 9;
-else if (IS_GEN8(driver->device_id))
- driver->gen_ver = 8;
-else if (IS_GEN75(driver->device_id))
- driver->gen_ver = 75;
-else if (IS_GEN7(driver->device_id))
- driver->gen_ver = 7;
-else if (IS_GEN6(driver->device_id))
- driver->gen_ver = 6;
-else if(IS_IGDNG(driver->device_id))
- driver->gen_ver = 5;
-else
- driver->gen_ver = 4;
-#endif /* EMULATE_GEN */
-return 1;
-}
-
-static cl_int
-intel_driver_open(intel_driver_t *intel, cl_context_prop props)
-{
-int cardi;
-#ifdef HAS_X11
-char *driver_name;
-#endif
-if (props != NULL
- && props->gl_type != CL_GL_NOSHARE
- && props->gl_type != CL_GL_GLX_DISPLAY
- && props->gl_type != CL_GL_EGL_DISPLAY) {
- fprintf(stderr, "Unsupported gl share type %d.\n", props->gl_type);
- return CL_INVALID_OPERATION;
-}
-
-#ifdef HAS_X11
-intel->x11_display = XOpenDisplay(NULL);
-
-if(intel->x11_display) {
- if((intel->dri_ctx = getDRI2State(intel->x11_display,
- DefaultScreen(intel->x11_display),
- &driver_name))) {
- intel_driver_init_shared(intel, intel->dri_ctx);
- Xfree(driver_name);
- }
- else
- fprintf(stderr, "X server found. dri2 connection failed! \n");
-}
-#endif
-
-if(!intel_driver_is_active(intel)) {
- char card_name[20];
- for(cardi = 0; cardi < 16; cardi++) {
- sprintf(card_name, "/dev/dri/renderD%d", 128+cardi);
- if (access(card_name, R_OK) != 0)
- continue;
- if(intel_driver_init_render(intel, card_name))
- break;
- }
-}
-
-if(!intel_driver_is_active(intel)) {
- char card_name[20];
- for(cardi = 0; cardi < 16; cardi++) {
- sprintf(card_name, "/dev/dri/card%d", cardi);
- if (access(card_name, R_OK) != 0)
- continue;
- if(intel_driver_init_master(intel, card_name))
- break;
- }
-}
-
-if(!intel_driver_is_active(intel)) {
- fprintf(stderr, "Device open failed, aborting...\n");
- return CL_DEVICE_NOT_FOUND;
-}
-
-#ifdef HAS_GL_EGL
-if (props && props->gl_type == CL_GL_EGL_DISPLAY) {
- assert(props->egl_display);
-}
-#endif
-return CL_SUCCESS;
-}
-
-static void
-intel_driver_close(intel_driver_t *intel)
-{
-//Due to the drm change about the test usrptr, we need to destroy the bufmgr
-//befor the driver was closed, otherwise the test usrptr will not be freed.
-if (intel->bufmgr)
- drm_intel_bufmgr_destroy(intel->bufmgr);
-#ifdef HAS_X11
-if(intel->dri_ctx) dri_state_release(intel->dri_ctx);
-if(intel->x11_display) XCloseDisplay(intel->x11_display);
-#endif
-if(intel->need_close) {
- close(intel->fd);
- intel->need_close = 0;
-}
-intel->dri_ctx = NULL;
-intel->x11_display = NULL;
-intel->fd = -1;
-}
-
-LOCAL int
-intel_driver_is_active(intel_driver_t *driver) {
-return driver->fd >= 0;
-}
-
-#ifdef HAS_X11
-LOCAL int
-intel_driver_init_shared(intel_driver_t *driver, dri_state_t *state)
-{
-int ret;
-assert(state);
-if(state->driConnectedFlag != DRI2)
- return 0;
-ret = intel_driver_init(driver, state->fd);
-driver->need_close = 0;
-return ret;
-}
-#endif
-
-LOCAL int
-intel_driver_init_master(intel_driver_t *driver, const char* dev_name)
-{
-int dev_fd, ret;
-
-drm_client_t client;
-
-// usually dev_name = "/dev/dri/card%d"
-dev_fd = open(dev_name, O_RDWR);
-if (dev_fd == -1) {
- fprintf(stderr, "open(\"%s\", O_RDWR) failed: %s\n", dev_name, strerror(errno));
- return 0;
-}
-
-// Check that we're authenticated
-memset(&client, 0, sizeof(drm_client_t));
-ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client);
-if (ret == -1) {
- fprintf(stderr, "ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client) failed: %s\n", strerror(errno));
- close(dev_fd);
- return 0;
-}
-
-if (!client.auth) {
- fprintf(stderr, "%s not authenticated\n", dev_name);
- close(dev_fd);
- return 0;
-}
-
-ret = intel_driver_init(driver, dev_fd);
-driver->need_close = 1;
-
-return ret;
-}
-
-LOCAL int
-intel_driver_init_render(intel_driver_t *driver, const char* dev_name)
-{
-int dev_fd, ret;
-
-dev_fd = open(dev_name, O_RDWR);
-if (dev_fd == -1)
- return 0;
-
-ret = intel_driver_init(driver, dev_fd);
-driver->need_close = 1;
-
-return ret;
-}
-
-LOCAL int
-intel_driver_terminate(intel_driver_t *driver)
-{
-pthread_mutex_destroy(&driver->ctxmutex);
-
-if(driver->need_close) {
- close(driver->fd);
- driver->need_close = 0;
-}
-driver->fd = -1;
-return 1;
-}
-
-LOCAL void
-intel_driver_lock_hardware(intel_driver_t *driver)
-{
-
-PPTHREAD_MUTEX_LOCK(driver);
-assert(!driver->locked);
-driver->locked = 1;
-}
-
-LOCAL void
-intel_driver_unlock_hardware(intel_driver_t *driver)
-{
-driver->locked = 0;
-PPTHREAD_MUTEX_UNLOCK(driver);
-}
-
-LOCAL dri_bo*
-intel_driver_share_buffer_from_name(intel_driver_t *driver, const char *sname, uint32_t name)
-{
-dri_bo *bo = intel_bo_gem_create_from_name(driver->bufmgr,
- sname,
- name);
-if (bo == NULL) {
- fprintf(stderr, "intel_bo_gem_create_from_name create \"%s\" bo from name %d failed: %s\n", sname, name, strerror(errno));
- return NULL;
-}
-return bo;
-}
-
-LOCAL dri_bo*
-intel_driver_share_buffer_from_fd(intel_driver_t *driver, int fd, int size)
-{
-dri_bo *bo = drm_intel_bo_gem_create_from_prime(driver->bufmgr,
- fd,
- size);
-if (bo == NULL) {
- fprintf(stderr, "drm_intel_bo_gem_create_from_prime create bo(size %d) from fd %d failed: %s\n", size, fd, strerror(errno));
- return NULL;
-}
-return bo;
-}
-
-LOCAL uint32_t
-intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo)
-{
-uint32_t name;
-assert(bo);
-dri_bo_flink(bo, &name);
-return name;
-}
-/* XXX a null props is ok? */
-static int
-intel_get_device_id(void)
-{
-intel_driver_t *driver = NULL;
-int intel_device_id;
-
-driver = intel_driver_new();
-assert(driver != NULL);
-if(UNLIKELY(intel_driver_open(driver, NULL) != CL_SUCCESS)) return INVALID_CHIP_ID;
-intel_device_id = driver->device_id;
-intel_driver_context_destroy(driver);
-intel_driver_close(driver);
-intel_driver_terminate(driver);
-intel_driver_delete(driver);
-
-return intel_device_id;
-}
-
-extern void intel_gpgpu_delete_all(intel_driver_t *driver);
-static void
-cl_intel_driver_delete(intel_driver_t *driver)
-{
-if (driver == NULL)
- return;
-intel_gpgpu_delete_all(driver);
-intel_driver_context_destroy(driver);
-intel_driver_close(driver);
-intel_driver_terminate(driver);
-intel_driver_delete(driver);
-}
-
-#include "cl_gbe_loader.h"
-static intel_driver_t*
-cl_intel_driver_new(cl_context_prop props)
-{
-intel_driver_t *driver = NULL;
-TRY_ALLOC_NO_ERR (driver, intel_driver_new());
-if(UNLIKELY(intel_driver_open(driver, props) != CL_SUCCESS)) goto error;
-exit:
-return driver;
-error:
-cl_intel_driver_delete(driver);
-driver = NULL;
-goto exit;
-}
-
-static drm_intel_bufmgr*
-intel_driver_get_bufmgr(intel_driver_t *drv)
-{
-return drv->bufmgr;
-}
-
-static uint32_t
-intel_driver_get_ver(struct intel_driver *drv)
-{
-return drv->gen_ver;
-}
-
-static void
-intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t *stack_size)
-{
- if (drv->gen_ver == 75)
- *stack_size = *stack_size * 4;
- else if (drv->device_id == PCI_CHIP_BROXTON_1 || drv->device_id == PCI_CHIP_BROXTON_3 ||
- IS_CHERRYVIEW(drv->device_id))
- *stack_size = *stack_size * 2;
-}
-
-static void
-intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)
-{
-drv->atomic_test_result = atomic_flag;
-}
-
-static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
-static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
-
-static int get_cl_tiling(uint32_t drm_tiling)
-{
-switch(drm_tiling) {
-case I915_TILING_X: return CL_TILE_X;
-case I915_TILING_Y: return CL_TILE_Y;
-case I915_TILING_NONE: return CL_NO_TILE;
-default:
- assert(0);
-}
-return CL_NO_TILE;
-}
-
-static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
-{
-uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
-uint32_t ret = 0;
-
-switch (tiling_mode) {
-case CL_TILE_X:
- if (dim == 0) { //tileX width in bytes
- ret = 512;
- } else if (dim == 1) { //tileX height in number of rows
- ret = 8;
- } else if (dim == 2) { //height to calculate slice pitch
- if (gen_ver == 9) //SKL same as tileY height
- ret = 8;
- else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment
- ret = 4;
- else
- ret = 2;
- } else
- assert(0);
- break;
-
-case CL_TILE_Y:
- if (dim == 0) { //tileY width in bytes
- ret = 128;
- } else if (dim == 1) { //tileY height in number of rows
- ret = 32;
- } else if (dim == 2) { //height to calculate slice pitch
- if (gen_ver == 9) //SKL same as tileY height
- ret = 32;
- else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment
- ret = 4;
- else
- ret = 2;
- } else
- assert(0);
- break;
-
-case CL_NO_TILE:
- if (dim == 1 || dim == 2) { //vertical alignment
- if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch
- ret = 4;
- else
- ret = 2;
- } else
- assert(0);
- break;
-}
-
-return ret;
-}
-
-#if defined(HAS_GL_EGL)
-#include "intel_cl_gl_share_image_info.h"
-#include "cl_image.h"
-
-static PFNEGLEXPORTDMABUFIMAGEMESAPROC eglExportDMABUFImageMESA_func = NULL;
-
-static int
-get_required_egl_extensions(){
-
-if(eglExportDMABUFImageMESA_func == NULL){
- eglExportDMABUFImageMESA_func = (PFNEGLEXPORTDMABUFIMAGEMESAPROC) eglGetProcAddress("eglExportDMABUFImageMESA");
- if(eglExportDMABUFImageMESA_func == NULL){
- fprintf(stderr, "Failed to get EGL extension function eglExportDMABUFImageMESA\n");
- return -1;
- }
-}
-return 0;
-}
-
-
-static int cl_get_clformat_from_texture(GLint tex_format, cl_image_format * cl_format)
-{
-cl_int ret = CL_SUCCESS;
-
-switch (tex_format) {
-case GL_RGBA8:
-case GL_RGBA:
-case GL_RGBA16:
-case GL_RGBA8I:
-case GL_RGBA16I:
-case GL_RGBA32I:
-case GL_RGBA8UI:
-case GL_RGBA16UI:
-case GL_RGBA32UI:
-case GL_RGBA16F:
-case GL_RGBA32F:
- cl_format->image_channel_order = CL_RGBA;
- break;
-case GL_BGRA:
- cl_format->image_channel_order = CL_BGRA;
- break;
-default:
- ret = -1;
- goto error;
-}
-
-switch (tex_format) {
-case GL_RGBA8:
-case GL_RGBA:
-case GL_BGRA:
- cl_format->image_channel_data_type = CL_UNORM_INT8;
- break;
-case GL_RGBA16:
- cl_format->image_channel_data_type = CL_UNORM_INT16;
- break;
-case GL_RGBA8I:
- cl_format->image_channel_data_type = CL_SIGNED_INT8;
- break;
-case GL_RGBA16I:
- cl_format->image_channel_data_type = CL_SIGNED_INT16;
- break;
-case GL_RGBA32I:
- cl_format->image_channel_data_type = CL_SIGNED_INT32;
- break;
-case GL_RGBA8UI:
- cl_format->image_channel_data_type = CL_UNSIGNED_INT8;
- break;
-case GL_RGBA16UI:
- cl_format->image_channel_data_type = CL_UNSIGNED_INT16;
- break;
-case GL_RGBA32UI:
- cl_format->image_channel_data_type = CL_UNSIGNED_INT32;
- break;
-case GL_RGBA16F:
- cl_format->image_channel_data_type = CL_HALF_FLOAT;
- break;
-case GL_RGBA32F:
- cl_format->image_channel_order = CL_FLOAT;
- break;
-default:
- ret = -1;
- goto error;
-}
-
-error:
-return ret;
-}
-
-static int
-get_mem_type_from_target(GLenum texture_target, cl_mem_object_type *type)
-{
-switch(texture_target) {
-case GL_TEXTURE_1D: *type = CL_MEM_OBJECT_IMAGE1D; break;
-case GL_TEXTURE_2D: *type = CL_MEM_OBJECT_IMAGE2D; break;
-case GL_TEXTURE_3D: *type = CL_MEM_OBJECT_IMAGE3D; break;
-case GL_TEXTURE_1D_ARRAY: *type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break;
-case GL_TEXTURE_2D_ARRAY: *type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break;
-default:
- return -1;
-}
-return CL_SUCCESS;
-}
-
-static cl_buffer
-intel_alloc_buffer_from_texture_egl(cl_context ctx, unsigned int target,
- int miplevel, unsigned int texture,
- struct _cl_mem_image *image)
-{
-drm_intel_bo *intel_bo = NULL;
-struct _intel_cl_gl_share_image_info info;
-unsigned int bpp, intel_fmt;
-cl_image_format cl_format;
-EGLBoolean ret;
-
-EGLenum e_target;
-//We just support GL_TEXTURE_2D because we can't query info like slice_pitch now.
-if(target == GL_TEXTURE_2D)
- e_target = EGL_GL_TEXTURE_2D;
-else
- return NULL;
-
-if(get_required_egl_extensions() != 0)
- return NULL;
-
-EGLAttrib attrib_list[] = {EGL_GL_TEXTURE_LEVEL, miplevel,
- EGL_NONE};
-EGLImage e_image = eglCreateImage(EGL_DISP(ctx), EGL_CTX(ctx), e_target,
- (EGLClientBuffer)texture, &attrib_list[0]);
-if(e_image == EGL_NO_IMAGE)
- return NULL;
-
-int fd, stride, offset;
-ret = eglExportDMABUFImageMESA_func(EGL_DISP(ctx), e_image, &fd, &stride, &offset);
-if(ret != EGL_TRUE){
- eglDestroyImage(EGL_DISP(ctx), e_image);
- return NULL;
-}
-info.fd = fd;
-
-/* The size argument just takes effect in intel_driver_share_buffer_from_fd when
- * Linux kernel is older than 3.12, so it doesn't matter we set to 0 here.
- */
-int size = 0;
-intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, size);
-
-if (intel_bo == NULL) {
- eglDestroyImage(EGL_DISP(ctx), e_image);
- return NULL;
-}
-
-GLint param_value;
-glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_WIDTH, ¶m_value);
-info.w = param_value;
-glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_HEIGHT, ¶m_value);
-info.h = param_value;
-glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_DEPTH, ¶m_value);
-info.depth = 1;
-info.pitch = stride;
-uint32_t tiling_mode, swizzle_mode;
-drm_intel_bo_get_tiling(intel_bo, &tiling_mode, &swizzle_mode);
-info.offset = offset;
-info.tile_x = 0;
-info.tile_y = 0;
-glGetTexLevelParameteriv(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, ¶m_value);
-info.gl_format = param_value;
-info.row_pitch = stride;
-info.slice_pitch = 0;
-
-info.tiling = get_cl_tiling(tiling_mode);
-if (cl_get_clformat_from_texture(info.gl_format, &cl_format) != 0)
- goto error;
-
-if (cl_image_byte_per_pixel(&cl_format, &bpp) != CL_SUCCESS)
- goto error;
-intel_fmt = cl_image_get_intel_format(&cl_format);
-if (intel_fmt == INTEL_UNSUPPORTED_FORMAT)
- goto error;
-cl_mem_object_type image_type;
-if (get_mem_type_from_target(target, &image_type) != 0)
- goto error;
-
-cl_mem_image_init(image, info.w, info.h,
- image_type, info.depth, cl_format,
- intel_fmt, bpp, info.row_pitch,
- info.slice_pitch, info.tiling,
- info.tile_x, info.tile_y, info.offset);
-
-struct _cl_mem_gl_image *gl_image = (struct _cl_mem_gl_image*)image;
-gl_image->fd = fd;
-gl_image->egl_image = e_image;
-
-return (cl_buffer) intel_bo;
-
-error:
-drm_intel_bo_unreference(intel_bo);
-close(fd);
-eglDestroyImage(EGL_DISP(ctx), e_image);
-return NULL;
-}
-
-static cl_buffer
-intel_alloc_buffer_from_texture(cl_context ctx, unsigned int target,
- int miplevel, unsigned int texture,
- struct _cl_mem_image *image)
-{
-
-if (IS_EGL_CONTEXT(ctx))
- return intel_alloc_buffer_from_texture_egl(ctx, target, miplevel, texture, image);
-
-return NULL;
-}
-
-static int
-intel_release_buffer_from_texture(cl_context ctx, struct _cl_mem_gl_image *gl_image)
-{
-if (IS_EGL_CONTEXT(ctx)) {
- close(gl_image->fd);
- eglDestroyImage(EGL_DISP(ctx), gl_image->egl_image);
- return CL_SUCCESS;
-}
-return -1;
-}
-#endif
-
-cl_buffer intel_share_buffer_from_libva(cl_context ctx,
- unsigned int bo_name,
- size_t *sz)
-{
-drm_intel_bo *intel_bo;
-
-intel_bo = intel_driver_share_buffer_from_name((intel_driver_t *)ctx->drv, "shared from libva", bo_name);
-
-if (intel_bo == NULL)
- return NULL;
-
-if (sz)
- *sz = intel_bo->size;
-
-return (cl_buffer)intel_bo;
-}
-
-cl_buffer intel_share_image_from_libva(cl_context ctx,
- unsigned int bo_name,
- struct _cl_mem_image *image)
-{
-drm_intel_bo *intel_bo;
-uint32_t intel_tiling, intel_swizzle_mode;
-
-intel_bo = intel_driver_share_buffer_from_name((intel_driver_t *)ctx->drv, "shared from libva", bo_name);
-
-if (intel_bo == NULL)
- return NULL;
-
-drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode);
-image->tiling = get_cl_tiling(intel_tiling);
-
-return (cl_buffer)intel_bo;
-}
-
-cl_buffer intel_share_buffer_from_fd(cl_context ctx,
- int fd,
- int buffer_size)
-{
-drm_intel_bo *intel_bo;
-
-intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, buffer_size);
-
-if (intel_bo == NULL)
- return NULL;
-
-return (cl_buffer)intel_bo;
-}
-
-cl_buffer intel_share_image_from_fd(cl_context ctx,
- int fd,
- int image_size,
- struct _cl_mem_image *image)
-{
-drm_intel_bo *intel_bo;
-uint32_t intel_tiling, intel_swizzle_mode;
-
-intel_bo = intel_driver_share_buffer_from_fd((intel_driver_t *)ctx->drv, fd, image_size);
-
-if (intel_bo == NULL)
- return NULL;
-
-drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode);
-image->tiling = get_cl_tiling(intel_tiling);
-
-return (cl_buffer)intel_bo;
-}
-
-static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags)
-{
-#ifdef HAS_USERPTR
-drm_intel_bo *bo;
-bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags);
-/* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */
-if (bo == NULL)
- bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED);
-return (cl_buffer)bo;
-#else
-return NULL;
-#endif
-}
-
-static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling)
-{
-switch (tiling) {
- case CL_NO_TILE:
- *intel_tiling = I915_TILING_NONE;
- break;
- case CL_TILE_X:
- *intel_tiling = I915_TILING_X;
- break;
- case CL_TILE_Y:
- *intel_tiling = I915_TILING_Y;
- break;
- default:
- assert(0);
- return -1;
-}
-return 0;
-}
-
-static int intel_buffer_set_tiling(cl_buffer bo,
- cl_image_tiling_t tiling, size_t stride)
-{
-uint32_t intel_tiling;
-int ret;
-if (UNLIKELY((get_intel_tiling(tiling, &intel_tiling)) < 0))
- return -1;
-#ifndef NDEBUG
-uint32_t required_tiling;
-required_tiling = intel_tiling;
-#endif
-ret = drm_intel_bo_set_tiling((drm_intel_bo*)bo, &intel_tiling, stride);
-assert(intel_tiling == required_tiling);
-return ret;
-}
-
-#define CHV_CONFIG_WARNING \
- "Warning: can't get GPU's configurations, will use the minimal one. Please update your drm to 2.4.59+ and linux kernel to 4.0.0+.\n"
-static void
-intel_update_device_info(cl_device_id device)
-{
-intel_driver_t *driver;
-
-driver = intel_driver_new();
-assert(driver != NULL);
-if (intel_driver_open(driver, NULL) != CL_SUCCESS) {
- intel_driver_delete(driver);
- return;
-}
-
-#ifdef HAS_USERPTR
-const size_t sz = 4096;
-void *host_ptr;
-
-host_ptr = cl_aligned_malloc(sz, 4096);
-if (host_ptr != NULL) {
- cl_buffer bo = intel_buffer_alloc_userptr((cl_buffer_mgr)driver->bufmgr,
- "CL memory object", host_ptr, sz, 0);
- if (bo == NULL)
- device->host_unified_memory = CL_FALSE;
- else
- drm_intel_bo_unreference((drm_intel_bo*)bo);
- cl_free(host_ptr);
-}
-else
- device->host_unified_memory = CL_FALSE;
-#endif
-
-#ifdef HAS_EU_TOTAL
-unsigned int eu_total;
-
-/* Prefer driver-queried max compute units if supported */
-if (!drm_intel_get_eu_total(driver->fd, &eu_total))
- device->max_compute_unit = eu_total;
-else if (IS_CHERRYVIEW(device->device_id))
- printf(CHV_CONFIG_WARNING);
-#else
-if (IS_CHERRYVIEW(device->device_id)) {
-#if defined(__ANDROID__)
- device->max_compute_unit = 12;
-#else
- printf(CHV_CONFIG_WARNING);
-#endif
-}
-#endif
-
-#ifdef HAS_SUBSLICE_TOTAL
-unsigned int subslice_total;
-
-/* Prefer driver-queried subslice count if supported */
-if (!drm_intel_get_subslice_total(driver->fd, &subslice_total))
- device->sub_slice_count = subslice_total;
-else if (IS_CHERRYVIEW(device->device_id))
- printf(CHV_CONFIG_WARNING);
-#else
-if (IS_CHERRYVIEW(device->device_id)) {
-#if defined(__ANDROID__)
- device->sub_slice_count = 2;
-#else
- printf(CHV_CONFIG_WARNING);
-#endif
-}
-#endif
-
-#ifdef HAS_POOLED_EU
-/* BXT pooled eu, 3*6 to 2*9, like sub slice count is 2 */
-int has_pooled_eu;
-if((has_pooled_eu = drm_intel_get_pooled_eu(driver->fd)) > 0)
- device->sub_slice_count = 2;
-
-#ifdef HAS_MIN_EU_IN_POOL
-int min_eu;
-/* for fused down 2x6 devices, beignet don't support. */
-if (has_pooled_eu > 0 && (min_eu = drm_intel_get_min_eu_in_pool(driver->fd)) > 0) {
- assert(min_eu == 9); //don't support fuse down device.
-}
-#endif //HAS_MIN_EU_IN_POOL
-#endif //HAS_POOLED_EU
-//We should get the device memory dynamically, but the
-//mapablce mem size usage is unknown. Just ignore it.
-size_t total_mem,map_mem;
-if(drm_intel_get_aperture_sizes(driver->fd,&map_mem,&total_mem) == 0)
- device->global_mem_size = (cl_ulong)total_mem;
-
-intel_driver_context_destroy(driver);
-intel_driver_close(driver);
-intel_driver_terminate(driver);
-intel_driver_delete(driver);
-}
-
-LOCAL void
-intel_setup_callbacks(void)
-{
-cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
-cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
-cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
-cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *) intel_driver_enlarge_stack_size;
-cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
-cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
-cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
-cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info;
-cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
-cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
-#ifdef HAS_BO_SET_SOFTPIN
-cl_buffer_set_softpin_offset = (cl_buffer_set_softpin_offset_cb *) drm_intel_bo_set_softpin_offset;
-cl_buffer_set_bo_use_full_range = (cl_buffer_set_bo_use_full_range_cb *) drm_intel_bo_use_48b_address_range;
-#endif
- cl_buffer_disable_reuse = (cl_buffer_disable_reuse_cb *) drm_intel_bo_disable_reuse;
- cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
-#if defined(HAS_GL_EGL)
- cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;
- cl_buffer_release_from_texture = (cl_buffer_release_from_texture_cb *) intel_release_buffer_from_texture;
-#endif
- cl_buffer_get_buffer_from_libva = (cl_buffer_get_buffer_from_libva_cb *) intel_share_buffer_from_libva;
- cl_buffer_get_image_from_libva = (cl_buffer_get_image_from_libva_cb *) intel_share_image_from_libva;
- cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
- cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
- cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
- cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
- cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt;
- cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt;
- cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized;
- cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
- cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
- cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
- cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
- cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
- cl_buffer_get_subdata = (cl_buffer_get_subdata_cb *) drm_intel_bo_get_subdata;
- cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
- cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
- cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
- cl_buffer_get_buffer_from_fd = (cl_buffer_get_buffer_from_fd_cb *) intel_share_buffer_from_fd;
- cl_buffer_get_image_from_fd = (cl_buffer_get_image_from_fd_cb *) intel_share_image_from_fd;
- intel_set_gpgpu_callbacks(intel_get_device_id());
-}
diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
deleted file mode 100644
index 3be93c2..0000000
--- a/src/intel/intel_driver.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/*
- * Copyright 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#ifndef _INTEL_DRIVER_H_
-#define _INTEL_DRIVER_H_
-
-#include "cl_device_data.h"
-
-#include <stdint.h>
-#include <pthread.h>
-#include <signal.h>
-
-#include <xf86drm.h>
-#include <drm.h>
-#include <i915_drm.h>
-#include <intel_bufmgr.h>
-#include <intel/intel_gpgpu.h>
-
-#define CMD_MI (0x0 << 29)
-#define CMD_2D (0x2 << 29)
-
-#define MI_NOOP (CMD_MI | 0)
-#define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23))
-
-#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04)
-#define XY_COLOR_BLT_WRITE_ALPHA (1 << 21)
-#define XY_COLOR_BLT_WRITE_RGB (1 << 20)
-#define XY_COLOR_BLT_DST_TILED (1 << 11)
-
-/* BR13 */
-#define BR13_565 (0x1 << 24)
-#define BR13_8888 (0x3 << 24)
-
-struct dri_state;
-struct intel_gpgpu_node;
-typedef struct _XDisplay Display;
-
-typedef struct intel_driver
-{
- dri_bufmgr *bufmgr;
- drm_intel_context *ctx;
- drm_intel_bo *null_bo;
- int fd;
- int device_id;
- int gen_ver;
- sigset_t sa_mask;
- pthread_mutex_t ctxmutex;
- int locked;
- int need_close;
- Display *x11_display;
- struct dri_state *dri_ctx;
- struct intel_gpgpu_node *gpgpu_list;
- int atomic_test_result;
-} intel_driver_t;
-
-#define SET_BLOCKED_SIGSET(DRIVER) do { \
- sigset_t bl_mask; \
- sigfillset(&bl_mask); \
- sigdelset(&bl_mask, SIGFPE); \
- sigdelset(&bl_mask, SIGILL); \
- sigdelset(&bl_mask, SIGSEGV); \
- sigdelset(&bl_mask, SIGBUS); \
- sigdelset(&bl_mask, SIGKILL); \
- pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
-} while (0)
-
-#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
- pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
-} while (0)
-
-#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
- SET_BLOCKED_SIGSET(DRIVER); \
- pthread_mutex_lock(&(DRIVER)->ctxmutex); \
-} while (0)
-
-#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
- pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
- RESTORE_BLOCKED_SIGSET(DRIVER); \
-} while (0)
-
-/* device control */
-extern void intel_driver_lock_hardware(intel_driver_t*);
-extern void intel_driver_unlock_hardware(intel_driver_t*);
-
-/* methods working in shared mode */
-extern dri_bo* intel_driver_share_buffer(intel_driver_t*, const char *sname, uint32_t name);
-extern uint32_t intel_driver_shared_name(intel_driver_t*, dri_bo*);
-
-/* init driver shared with X using dri state, acquired from X Display */
-extern int intel_driver_init_shared(intel_driver_t*, struct dri_state*);
-
-/* init driver in master mode (when X is not using the card)
- * usually dev_name = "/dev/dri/card0"
- */
-extern int intel_driver_init_master(intel_driver_t*, const char* dev_name);
-
-/* init driver for render node */
-extern int intel_driver_init_render(intel_driver_t*, const char* dev_name);
-
-/* terminate driver and all underlying structures */
-extern int intel_driver_terminate(intel_driver_t*);
-
-/* simple check if driver was initialized (checking fd should suffice) */
-extern int intel_driver_is_active(intel_driver_t*);
-
-/* init the call backs used by the ocl driver */
-extern void intel_setup_callbacks(void);
-
-#endif /* _INTEL_DRIVER_H_ */
-
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
deleted file mode 100644
index 283b07a..0000000
--- a/src/intel/intel_gpgpu.c
+++ /dev/null
@@ -1,2581 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- * Alexei Soupikov <alexei.soupikov at intel.com>
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/utsname.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <errno.h>
-
-#include "intel/intel_gpgpu.h"
-#include "intel/intel_defines.h"
-#include "intel/intel_structs.h"
-#include "program.h" // for BTI_RESERVED_NUM
-
-#include "cl_alloc.h"
-#include "cl_utils.h"
-#include "cl_sampler.h"
-#include "cl_accelerator_intel.h"
-
-#ifndef CL_VERSION_1_2
-#define CL_MEM_OBJECT_IMAGE1D 0x10F4
-#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
-#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
-#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
-#endif
-
-#define GEN_CMD_MEDIA_OBJECT (0x71000000)
-#define MO_TS_BIT (1 << 24)
-#define MO_RETAIN_BIT (1 << 28)
-#define SAMPLER_STATE_SIZE (16)
-
-#define TIMESTAMP_ADDR 0x2358
-
-/* Stores both binding tables and surface states */
-typedef struct surface_heap {
- uint32_t binding_table[256];
- char surface[256*sizeof(gen_surface_state_t)];
-} surface_heap_t;
-
-typedef struct intel_event {
- drm_intel_bo *buffer;
- drm_intel_bo *ts_buf;
- int status;
-} intel_event_t;
-
-#define MAX_IF_DESC 32
-
-typedef struct intel_gpgpu intel_gpgpu_t;
-
-typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm);
-intel_gpgpu_set_L3_t *intel_gpgpu_set_L3 = NULL;
-
-typedef uint32_t (intel_gpgpu_get_scratch_index_t)(uint32_t size);
-intel_gpgpu_get_scratch_index_t *intel_gpgpu_get_scratch_index = NULL;
-
-typedef void (intel_gpgpu_post_action_t)(intel_gpgpu_t *gpgpu, int32_t flush_mode);
-intel_gpgpu_post_action_t *intel_gpgpu_post_action = NULL;
-
-typedef uint64_t (intel_gpgpu_read_ts_reg_t)(drm_intel_bufmgr *bufmgr);
-intel_gpgpu_read_ts_reg_t *intel_gpgpu_read_ts_reg = NULL;
-
-
-typedef void (intel_gpgpu_set_base_address_t)(intel_gpgpu_t *gpgpu);
-intel_gpgpu_set_base_address_t *intel_gpgpu_set_base_address = NULL;
-
-typedef void (intel_gpgpu_setup_bti_t)(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
- size_t size, unsigned char index, uint32_t format);
-intel_gpgpu_setup_bti_t *intel_gpgpu_setup_bti = NULL;
-
-
-typedef void (intel_gpgpu_load_vfe_state_t)(intel_gpgpu_t *gpgpu);
-intel_gpgpu_load_vfe_state_t *intel_gpgpu_load_vfe_state = NULL;
-
-typedef void (intel_gpgpu_build_idrt_t)(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel);
-intel_gpgpu_build_idrt_t *intel_gpgpu_build_idrt = NULL;
-
-
-typedef void (intel_gpgpu_load_curbe_buffer_t)(intel_gpgpu_t *gpgpu);
-intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL;
-
-
-typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu);
-intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL;
-
-typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu);
-intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL;
-
-typedef void (intel_gpgpu_select_pipeline_t)(intel_gpgpu_t *gpgpu);
-intel_gpgpu_select_pipeline_t *intel_gpgpu_select_pipeline = NULL;
-
-static void
-intel_gpgpu_sync(void *buf)
-{
- if (buf)
- drm_intel_bo_wait_rendering((drm_intel_bo *)buf);
-}
-
-static void *intel_gpgpu_ref_batch_buf(intel_gpgpu_t *gpgpu)
-{
- if (gpgpu->batch->last_bo)
- drm_intel_bo_reference(gpgpu->batch->last_bo);
-
- return gpgpu->batch->last_bo;
-}
-
-static void intel_gpgpu_unref_batch_buf(void *buf)
-{
- if (buf)
- drm_intel_bo_unreference((drm_intel_bo *)buf);
-}
-
-static void
-intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
-{
- if (gpgpu == NULL)
- return;
- if(gpgpu->time_stamp_b.bo)
- drm_intel_bo_unreference(gpgpu->time_stamp_b.bo);
- if(gpgpu->printf_b.bo)
- drm_intel_bo_unreference(gpgpu->printf_b.bo);
- if (gpgpu->aux_buf.bo)
- drm_intel_bo_unreference(gpgpu->aux_buf.bo);
- if (gpgpu->perf_b.bo)
- drm_intel_bo_unreference(gpgpu->perf_b.bo);
- if (gpgpu->stack_b.bo)
- drm_intel_bo_unreference(gpgpu->stack_b.bo);
- if (gpgpu->scratch_b.bo)
- drm_intel_bo_unreference(gpgpu->scratch_b.bo);
- if (gpgpu->profiling_b.bo)
- drm_intel_bo_unreference(gpgpu->profiling_b.bo);
-
- if(gpgpu->constant_b.bo)
- drm_intel_bo_unreference(gpgpu->constant_b.bo);
-
- intel_batchbuffer_delete(gpgpu->batch);
- cl_free(gpgpu);
-}
-
-/* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */
-void intel_gpgpu_delete_all(intel_driver_t *drv)
-{
- struct intel_gpgpu_node *p;
- if(drv->gpgpu_list == NULL)
- return;
-
- PPTHREAD_MUTEX_LOCK(drv);
- while(drv->gpgpu_list) {
- p = drv->gpgpu_list;
- drv->gpgpu_list = p->next;
- intel_gpgpu_delete_finished(p->gpgpu);
- cl_free(p);
- }
- PPTHREAD_MUTEX_UNLOCK(drv);
-}
-
-static void
-intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
-{
- if (gpgpu == NULL)
- return;
-
- intel_driver_t *drv = gpgpu->drv;
- struct intel_gpgpu_node *p, *node;
-
- PPTHREAD_MUTEX_LOCK(drv);
- p = drv->gpgpu_list;
- if(p) {
- node = p->next;
- while(node) {
- if(node->gpgpu->batch && node->gpgpu->batch->buffer &&
- !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
- p->next = node->next;
- intel_gpgpu_delete_finished(node->gpgpu);
- cl_free(node);
- node = p->next;
- } else {
- p = node;
- node = node->next;
- }
- }
- node = drv->gpgpu_list;
- if(node->gpgpu->batch && node->gpgpu->batch->buffer &&
- !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
- drv->gpgpu_list = drv->gpgpu_list->next;
- intel_gpgpu_delete_finished(node->gpgpu);
- cl_free(node);
- }
- }
- if (gpgpu == NULL)
- return;
-
- if(gpgpu->batch && gpgpu->batch->buffer &&
- drm_intel_bo_busy(gpgpu->batch->buffer)) {
- TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
- node->gpgpu = gpgpu;
- node->next = NULL;
- p = drv->gpgpu_list;
- if(p == NULL)
- drv->gpgpu_list= node;
- else {
- while(p->next)
- p = p->next;
- p->next = node;
- }
- } else
- intel_gpgpu_delete_finished(gpgpu);
-
-error:
- PPTHREAD_MUTEX_UNLOCK(drv);
-}
-
-static intel_gpgpu_t*
-intel_gpgpu_new(intel_driver_t *drv)
-{
- intel_gpgpu_t *state = NULL;
-
- TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t));
- state->drv = drv;
- state->batch = intel_batchbuffer_new(state->drv);
- assert(state->batch);
-
-exit:
- return state;
-error:
- intel_gpgpu_delete(state);
- state = NULL;
- goto exit;
-}
-
-static void
-intel_gpgpu_select_pipeline_gen7(intel_gpgpu_t *gpgpu)
-{
- BEGIN_BATCH(gpgpu->batch, 1);
- OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_select_pipeline_gen9(intel_gpgpu_t *gpgpu)
-{
- BEGIN_BATCH(gpgpu->batch, 1);
- OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MASK | PIPELINE_SELECT_GPGPU);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static uint32_t
-intel_gpgpu_get_cache_ctrl_gen7()
-{
- return cc_llc_l3;
-}
-
-static uint32_t
-intel_gpgpu_get_cache_ctrl_gen75()
-{
- return llccc_ec | l3cc_ec;
-}
-static uint32_t
-intel_gpgpu_get_cache_ctrl_gen8()
-{
- return tcc_llc_ec_l3 | mtllc_wb;
-}
-static uint32_t
-intel_gpgpu_get_cache_ctrl_gen9()
-{
- //Kernel-defined cache control registers 2:
- //L3CC: WB; LeCC: WB; TC: LLC/eLLC;
- int major = 0, minor = 0;
- int mocs_index = 0x2;
-
- struct utsname buf;
- uname(&buf);
- sscanf(buf.release, "%d.%d", &major, &minor);
- //From linux 4.3, kernel redefined the mocs table's value,
- //But before 4.3, still used the hw defautl value.
- if(strcmp(buf.sysname, "Linux") == 0 &&
- major == 4 && minor < 3) { /* linux kernel support skl from 4.x, so check from 4 */
- mocs_index = 0x9;
- }
-
- return (mocs_index << 1);
-}
-
-static void
-intel_gpgpu_set_base_address_gen7(intel_gpgpu_t *gpgpu)
-{
- const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
- BEGIN_BATCH(gpgpu->batch, 10);
- OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 8);
- /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
- /* 0, State Mem Obj CC */
- /* We use a state base address for the surface heap since IVB clamp the
- * binding table pointer at 11 bits. So, we cannot use pointers directly while
- * using the surface heap
- */
- assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_INSTRUCTION,
- I915_GEM_DOMAIN_INSTRUCTION,
- gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY));
-
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */
-
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
- OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
- /* According to mesa i965 driver code, we must set the dynamic state access upper bound
- * to a valid bound value, otherwise, the border color pointer may be rejected and you
- * may get incorrect border color. This is a known hardware bug. */
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_set_base_address_gen8(intel_gpgpu_t *gpgpu)
-{
- const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
- BEGIN_BATCH(gpgpu->batch, 16);
- OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 14);
- /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16));
- /* 0, State Mem Obj CC */
- /* We use a state base address for the surface heap since IVB clamp the
- * binding table pointer at 11 bits. So, we cannot use pointers directly while
- * using the surface heap
- */
- assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_SAMPLER,
- I915_GEM_DOMAIN_SAMPLER,
- gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
- OUT_BATCH(gpgpu->batch, 0);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
- OUT_BATCH(gpgpu->batch, 0);
- //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
- OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo,
- I915_GEM_DOMAIN_INSTRUCTION,
- I915_GEM_DOMAIN_INSTRUCTION,
- 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
- OUT_BATCH(gpgpu->batch, 0);
-
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- /* According to mesa i965 driver code, we must set the dynamic state access upper bound
- * to a valid bound value, otherwise, the border color pointer may be rejected and you
- * may get incorrect border color. This is a known hardware bug. */
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_set_base_address_gen9(intel_gpgpu_t *gpgpu)
-{
- const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */
- BEGIN_BATCH(gpgpu->batch, 19);
- OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 17);
- /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16));
- /* 0, State Mem Obj CC */
- /* We use a state base address for the surface heap since IVB clamp the
- * binding table pointer at 11 bits. So, we cannot use pointers directly while
- * using the surface heap
- */
- assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_SAMPLER,
- I915_GEM_DOMAIN_SAMPLER,
- gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
- OUT_BATCH(gpgpu->batch, 0);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */
- OUT_BATCH(gpgpu->batch, 0);
- //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */
- OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo,
- I915_GEM_DOMAIN_INSTRUCTION,
- I915_GEM_DOMAIN_INSTRUCTION,
- 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY));
- OUT_BATCH(gpgpu->batch, 0);
-
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- /* According to mesa i965 driver code, we must set the dynamic state access upper bound
- * to a valid bound value, otherwise, the border color pointer may be rejected and you
- * may get incorrect border color. This is a known hardware bug. */
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
- /* Bindless surface state base address */
- OUT_BATCH(gpgpu->batch, (def_cc << 4) | BASE_ADDRESS_MODIFY);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0xfffff000);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-uint32_t intel_gpgpu_get_scratch_index_gen7(uint32_t size) {
- return size / 1024 - 1;
-}
-
-uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t size) {
- //align in backend, if non pow2, must align when alloc scratch bo.
- assert((size & (size - 1)) == 0);
- size = size >> 11;
- uint32_t index = 0;
- while((size >>= 1) > 0)
- index++; //get leading one
-
- return index;
-}
-
-uint32_t intel_gpgpu_get_scratch_index_gen8(uint32_t size) {
- //align in backend, if non pow2, must align when alloc scratch bo.
- assert((size & (size - 1)) == 0);
- size = size >> 10;
- uint32_t index = 0;
- while((size >>= 1) > 0)
- index++; //get leading one
-
- return index;
-}
-
-
-static cl_int
-intel_gpgpu_get_max_curbe_size(uint32_t device_id)
-{
- if (IS_BAYTRAIL_T(device_id) ||
- IS_IVB_GT1(device_id))
- return 992;
- else
- return 2016;
-}
-
-static cl_int
-intel_gpgpu_get_curbe_size(intel_gpgpu_t *gpgpu)
-{
- int curbe_size = gpgpu->curb.size_cs_entry * gpgpu->curb.num_cs_entries;
- int max_curbe_size = intel_gpgpu_get_max_curbe_size(gpgpu->drv->device_id);
-
- if (curbe_size > max_curbe_size) {
- fprintf(stderr, "warning, curbe size exceed limitation.\n");
- return max_curbe_size;
- } else
- return curbe_size;
-}
-
-static void
-intel_gpgpu_load_vfe_state_gen7(intel_gpgpu_t *gpgpu)
-{
- int32_t scratch_index;
- BEGIN_BATCH(gpgpu->batch, 8);
- OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (8-2));
-
- if(gpgpu->per_thread_scratch > 0) {
- scratch_index = intel_gpgpu_get_scratch_index(gpgpu->per_thread_scratch);
- OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- scratch_index);
- }
- else {
- OUT_BATCH(gpgpu->batch, 0);
- }
- /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
- OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 8) | 0xc4);
- OUT_BATCH(gpgpu->batch, 0);
- /* curbe_size */
- OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu));
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_load_vfe_state_gen8(intel_gpgpu_t *gpgpu)
-{
- int32_t scratch_index;
- BEGIN_BATCH(gpgpu->batch, 9);
- OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (9-2));
-
- if(gpgpu->per_thread_scratch > 0) {
- scratch_index = intel_gpgpu_get_scratch_index(gpgpu->per_thread_scratch);
- OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- scratch_index);
- }
- else {
- OUT_BATCH(gpgpu->batch, 0);
- }
- OUT_BATCH(gpgpu->batch, 0);
-
- /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
- OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (2 << 8) | 0xc0); //urb entries can't be 0
- OUT_BATCH(gpgpu->batch, 0);
- /* urb entries size | curbe_size */
- OUT_BATCH(gpgpu->batch, 2<<16 | intel_gpgpu_get_curbe_size(gpgpu));
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0);
-
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_load_curbe_buffer_gen7(intel_gpgpu_t *gpgpu)
-{
- BEGIN_BATCH(gpgpu->batch, 4);
- OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */
- OUT_BATCH(gpgpu->batch, 0); /* mbz */
- OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.curbe_offset);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_load_curbe_buffer_gen8(intel_gpgpu_t *gpgpu)
-{
- BEGIN_BATCH(gpgpu->batch, 4);
- OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */
- OUT_BATCH(gpgpu->batch, 0); /* mbz */
- OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32);
- OUT_BATCH(gpgpu->batch, gpgpu->aux_offset.curbe_offset);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_load_idrt_gen7(intel_gpgpu_t *gpgpu)
-{
- BEGIN_BATCH(gpgpu->batch, 4);
- OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
- OUT_BATCH(gpgpu->batch, 0); /* mbz */
- OUT_BATCH(gpgpu->batch, 1 << 5);
- OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.idrt_offset);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_load_idrt_gen8(intel_gpgpu_t *gpgpu)
-{
- BEGIN_BATCH(gpgpu->batch, 4);
- OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
- OUT_BATCH(gpgpu->batch, 0); /* mbz */
- OUT_BATCH(gpgpu->batch, 1 << 5);
- OUT_BATCH(gpgpu->batch, gpgpu->aux_offset.idrt_offset);
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-
-static const uint32_t gpgpu_l3_config_reg1[] = {
- 0x00080040, 0x02040040, 0x00800040, 0x01000038,
- 0x02000030, 0x01000038, 0x00000038, 0x00000040,
- 0x0A140091, 0x09100091, 0x08900091, 0x08900091,
- 0x010000a1
-};
-
-static const uint32_t gpgpu_l3_config_reg2[] = {
- 0x00000000, 0x00000000, 0x00080410, 0x00080410,
- 0x00040410, 0x00040420, 0x00080420, 0x00080020,
- 0x00204080, 0x00244890, 0x00284490, 0x002444A0,
- 0x00040810
-};
-
-/* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. */
-static void
-intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx)
-{
- BEGIN_BATCH(gpgpu->batch, 5);
- OUT_BATCH(gpgpu->batch, CMD_PIPE_CONTROL | (5-2));
- OUT_BATCH(gpgpu->batch, GEN7_PIPE_CONTROL_WRITE_TIMESTAMP);
- OUT_RELOC(gpgpu->batch, gpgpu->time_stamp_b.bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE | idx * sizeof(uint64_t));
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, 0);
- ADVANCE_BATCH();
-}
-
-static void
-intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu)
-{
- gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
- intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
- memset(pc, 0, sizeof(*pc));
- pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
- pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
- pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
- pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
- pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
- pc->dw1.render_target_cache_flush_enable = 1;
- pc->dw1.texture_cache_invalidation_enable = 1;
- pc->dw1.cs_stall = 1;
- pc->dw1.dc_flush_enable = 1;
- //pc->dw1.instruction_cache_invalidate_enable = 1;
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu)
-{
- gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
- intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
- memset(pc, 0, sizeof(*pc));
- pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
- pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
- pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
- pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
- pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
- pc->dw1.cs_stall = 1;
- pc->dw1.dc_flush_enable = 1;
-
- pc = (gen6_pipe_control_t*)
- intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
- memset(pc, 0, sizeof(*pc));
- pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
- pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
- pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
- pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
- pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
- pc->dw1.render_target_cache_flush_enable = 1;
- pc->dw1.texture_cache_invalidation_enable = 1;
- pc->dw1.cs_stall = 1;
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_pipe_control_gen8(intel_gpgpu_t *gpgpu)
-{
- gen8_pipe_control_t* pc = (gen8_pipe_control_t*)
- intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen8_pipe_control_t));
- memset(pc, 0, sizeof(*pc));
- pc->dw0.length = SIZEOF32(gen8_pipe_control_t) - 2;
- pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
- pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
- pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
- pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
- pc->dw1.render_target_cache_flush_enable = 1;
- pc->dw1.texture_cache_invalidation_enable = 1;
- pc->dw1.cs_stall = 1;
- pc->dw1.dc_flush_enable = 1;
- //pc->dw1.instruction_cache_invalidate_enable = 1;
- ADVANCE_BATCH(gpgpu->batch);
-}
-
-static void
-intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm)
-{
- BEGIN_BATCH(gpgpu->batch, 9);
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
- OUT_BATCH(gpgpu->batch, 0x00A00000);
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
-
- if (use_slm)
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]);
- else
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
- if (use_slm)
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]);
- else
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
- ADVANCE_BATCH(gpgpu->batch);
-
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static void
-intel_gpgpu_set_L3_baytrail(intel_gpgpu_t *gpgpu, uint32_t use_slm)
-{
- BEGIN_BATCH(gpgpu->batch, 9);
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
- OUT_BATCH(gpgpu->batch, 0x00D30000); /* General credit : High credit = 26 : 6 */
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
- if (use_slm)
- OUT_BATCH(gpgpu->batch, 0x01020021); /* {SLM=64, URB=96, DC=16, RO=16, Sum=192} */
- else
- OUT_BATCH(gpgpu->batch, 0x02040040); /* {SLM=0, URB=128, DC=32, RO=32, Sum=192} */
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
- OUT_BATCH(gpgpu->batch, 0x0); /* {I/S=0, Const=0, Tex=0} */
-
- ADVANCE_BATCH(gpgpu->batch);
-
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static void
-intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
-{
- /* still set L3 in batch buffer for fulsim. */
- if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL)
- {
- BEGIN_BATCH(gpgpu->batch, 15);
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- /* FIXME: KMD always disable the atomic in L3 for some reason.
- I checked the spec, and don't think we need that workaround now.
- Before I send a patch to kernel, let's just enable it here. */
- OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
- OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
- OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */
- }
- else
- {
- BEGIN_BATCH(gpgpu->batch, 9);
- }
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
- OUT_BATCH(gpgpu->batch, 0x08800000);
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET);
-
- if (use_slm)
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]);
- else
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]);
-
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET);
- if (use_slm)
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]);
- else
- OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]);
- ADVANCE_BATCH(gpgpu->batch);
-
- //if(use_slm)
- // gpgpu->batch->enable_slm = 1;
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static void
-intel_gpgpu_set_L3_gen8(intel_gpgpu_t *gpgpu, uint32_t use_slm)
-{
- BEGIN_BATCH(gpgpu->batch, 3);
- OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
- OUT_BATCH(gpgpu->batch, GEN8_L3_CNTL_REG_ADDRESS_OFFSET);
- // FIXME, this is a workaround for switch SLM enable and disable random hang
- if(use_slm)
- OUT_BATCH(gpgpu->batch, 0x60000121); /* {SLM=192, URB=128, Rest=384} */
- else
- OUT_BATCH(gpgpu->batch, 0x60000160); /* {SLM=0, URB=384, Rest=384, Sum=768} */
-
- //if(use_slm)
- // gpgpu->batch->enable_slm = 1;
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static void
-intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu)
-{
- intel_batchbuffer_start_atomic(gpgpu->batch, 256);
- intel_gpgpu_pipe_control(gpgpu);
- assert(intel_gpgpu_set_L3);
- intel_gpgpu_set_L3(gpgpu, gpgpu->ker->use_slm);
- intel_gpgpu_select_pipeline(gpgpu);
- intel_gpgpu_set_base_address(gpgpu);
- intel_gpgpu_load_vfe_state(gpgpu);
- intel_gpgpu_load_curbe_buffer(gpgpu);
- intel_gpgpu_load_idrt(gpgpu);
-
- if (gpgpu->perf_b.bo) {
- BEGIN_BATCH(gpgpu->batch, 3);
- OUT_BATCH(gpgpu->batch,
- (0x28 << 23) | /* MI_REPORT_PERF_COUNT */
- (3 - 2)); /* length-2 */
- OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0 | /* Offset for the start "counters" */
- 1); /* Use GTT and not PGTT */
- OUT_BATCH(gpgpu->batch, 0);
- ADVANCE_BATCH(gpgpu->batch);
- }
-
- /* Insert PIPE_CONTROL for time stamp of start*/
- if (gpgpu->time_stamp_b.bo)
- intel_gpgpu_write_timestamp(gpgpu, 0);
-}
-
-static void
-intel_gpgpu_post_action_gen7(intel_gpgpu_t *gpgpu, int32_t flush_mode)
-{
- if(flush_mode)
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static void
-intel_gpgpu_post_action_gen75(intel_gpgpu_t *gpgpu, int32_t flush_mode)
-{
- /* flush force for set L3 */
- intel_gpgpu_pipe_control(gpgpu);
-
- /* Restore L3 control to disable SLM mode,
- otherwise, may affect 3D pipeline */
- intel_gpgpu_set_L3(gpgpu, 0);
-}
-
-static void
-intel_gpgpu_batch_end(intel_gpgpu_t *gpgpu, int32_t flush_mode)
-{
- /* Insert PIPE_CONTROL for time stamp of end*/
- if (gpgpu->time_stamp_b.bo)
- intel_gpgpu_write_timestamp(gpgpu, 1);
-
- /* Insert the performance counter command */
- if (gpgpu->perf_b.bo) {
- BEGIN_BATCH(gpgpu->batch, 3);
- OUT_BATCH(gpgpu->batch,
- (0x28 << 23) | /* MI_REPORT_PERF_COUNT */
- (3 - 2)); /* length-2 */
- OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 512 | /* Offset for the end "counters" */
- 1); /* Use GTT and not PGTT */
- OUT_BATCH(gpgpu->batch, 0);
- ADVANCE_BATCH(gpgpu->batch);
- }
-
- intel_gpgpu_post_action(gpgpu, flush_mode);
- intel_batchbuffer_end_atomic(gpgpu->batch);
-}
-
-static int
-intel_gpgpu_batch_reset(intel_gpgpu_t *gpgpu, size_t sz)
-{
- return intel_batchbuffer_reset(gpgpu->batch, sz);
-}
-
-static int
-intel_gpgpu_flush(intel_gpgpu_t *gpgpu)
-{
- if (!gpgpu->batch || !gpgpu->batch->buffer)
- return 0;
- return intel_batchbuffer_flush(gpgpu->batch);
- /* FIXME:
- Remove old assert here for binded buffer offset 0 which
- tried to guard possible NULL buffer pointer check in kernel, as
- in case like "runtime_null_kernel_arg", but that's wrong to just
- take buffer offset 0 as NULL, and cause failure for normal
- kernels which has no such NULL ptr check but with buffer offset 0
- (which is possible now and will be normal if full PPGTT is on).
-
- Need to fix NULL ptr check otherwise.
- */
-}
-
-static int
-intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
- uint32_t max_threads,
- uint32_t size_cs_entry,
- int profiling)
-{
- drm_intel_bo *bo;
-
- /* Binded buffers */
- gpgpu->binded_n = 0;
- gpgpu->img_bitmap = 0;
- gpgpu->img_index_base = 3;
- gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
-
- /* URB */
- gpgpu->curb.num_cs_entries = 64;
- gpgpu->curb.size_cs_entry = size_cs_entry;
- gpgpu->max_threads = max_threads;
-
- if (gpgpu->printf_b.bo)
- dri_bo_unreference(gpgpu->printf_b.bo);
- gpgpu->printf_b.bo = NULL;
-
- if (gpgpu->profiling_b.bo)
- dri_bo_unreference(gpgpu->profiling_b.bo);
- gpgpu->profiling_b.bo = NULL;
-
- /* Set the profile buffer*/
- if(gpgpu->time_stamp_b.bo)
- dri_bo_unreference(gpgpu->time_stamp_b.bo);
- gpgpu->time_stamp_b.bo = NULL;
- if (profiling) {
- bo = dri_bo_alloc(gpgpu->drv->bufmgr, "timestamp query", 4096, 4096);
- gpgpu->time_stamp_b.bo = bo;
- if (!bo)
- fprintf(stderr, "Could not allocate buffer for profiling.\n");
- }
-
- /* stack */
- if (gpgpu->stack_b.bo)
- dri_bo_unreference(gpgpu->stack_b.bo);
- gpgpu->stack_b.bo = NULL;
-
- /* Set the auxiliary buffer*/
- uint32_t size_aux = 0;
- if(gpgpu->aux_buf.bo)
- dri_bo_unreference(gpgpu->aux_buf.bo);
- gpgpu->aux_buf.bo = NULL;
-
- /* begin with surface heap to make sure it's page aligned,
- because state base address use 20bit for the address */
- gpgpu->aux_offset.surface_heap_offset = size_aux;
- size_aux += sizeof(surface_heap_t);
-
- //curbe must be 32 bytes aligned
- size_aux = ALIGN(size_aux, 64);
- gpgpu->aux_offset.curbe_offset = size_aux;
- size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 32;
-
- //idrt must be 32 bytes aligned
- size_aux = ALIGN(size_aux, 32);
- gpgpu->aux_offset.idrt_offset = size_aux;
- size_aux += MAX_IF_DESC * sizeof(struct gen6_interface_descriptor);
-
- //must be 32 bytes aligned
- //sampler state and vme state share the same buffer,
- size_aux = ALIGN(size_aux, 32);
- gpgpu->aux_offset.sampler_state_offset = size_aux;
- size_aux += MAX(GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
- GEN_MAX_VME_STATES * sizeof(gen7_vme_state_t));
-
- //sampler border color state must be 32 bytes aligned
- size_aux = ALIGN(size_aux, 32);
- gpgpu->aux_offset.sampler_border_color_state_offset = size_aux;
- size_aux += GEN_MAX_SAMPLERS * sizeof(gen7_sampler_border_color_t);
-
- /* make sure aux buffer is page aligned */
- size_aux = ALIGN(size_aux, 4096);
-
- bo = dri_bo_alloc(gpgpu->drv->bufmgr, "AUX_BUFFER", size_aux, 4096);
-
- if (!bo || dri_bo_map(bo, 1) != 0) {
- fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
- if (bo)
- dri_bo_unreference(bo);
- if (profiling && gpgpu->time_stamp_b.bo)
- dri_bo_unreference(gpgpu->time_stamp_b.bo);
- gpgpu->time_stamp_b.bo = NULL;
- return -1;
- }
- memset(bo->virtual, 0, size_aux);
- gpgpu->aux_buf.bo = bo;
- return 0;
-}
-
-static void
-intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- heap->binding_table[index] = offsetof(surface_heap_t, surface) +
- index * sizeof(gen7_surface_state_t);
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- obj_bo_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen7_surface_state_t, ss1),
- obj_bo);
-}
-
-static void
-intel_gpgpu_set_buf_reloc_for_vme_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- heap->binding_table[index] = offsetof(surface_heap_t, surface) +
- index * sizeof(gen7_surface_state_t);
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- obj_bo_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen7_media_surface_state_t, ss0),
- obj_bo);
-}
-
-static dri_bo*
-intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
-{
- if(gpgpu->constant_b.bo)
- dri_bo_unreference(gpgpu->constant_b.bo);
- gpgpu->constant_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", size, 64);
- if (gpgpu->constant_b.bo == NULL)
- return NULL;
-
- intel_gpgpu_setup_bti(gpgpu, gpgpu->constant_b.bo, 0, size, bti, I965_SURFACEFORMAT_R32G32B32A32_UINT);
- return gpgpu->constant_b.bo;
-}
-
-static void
-intel_gpgpu_setup_bti_gen7(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
- size_t size, unsigned char index, uint32_t format)
-{
- assert(size <= (2ul<<30));
- size_t s = size - 1;
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
- memset(ss0, 0, sizeof(gen7_surface_state_t));
- ss0->ss0.surface_type = I965_SURFACE_BUFFER;
- ss0->ss0.surface_format = format;
- ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
- // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
- if(format == I965_SURFACEFORMAT_RAW)
- assert((ss0->ss2.width & 0x03) == 3);
- ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
- ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
- ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
- heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t);
-
- ss0->ss1.base_addr = buf->offset + internal_offset;
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- internal_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen7_surface_state_t, ss1),
- buf);
-}
-
-static void
-intel_gpgpu_setup_bti_gen75(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
- size_t size, unsigned char index, uint32_t format)
-{
- assert(size <= (2ul<<30));
- size_t s = size - 1;
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
- memset(ss0, 0, sizeof(gen7_surface_state_t));
- ss0->ss0.surface_type = I965_SURFACE_BUFFER;
- ss0->ss0.surface_format = format;
- if(format != I965_SURFACEFORMAT_RAW) {
- ss0->ss7.shader_r = I965_SURCHAN_SELECT_RED;
- ss0->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
- ss0->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
- ss0->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
- }
- ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
- // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
- if(format == I965_SURFACEFORMAT_RAW)
- assert((ss0->ss2.width & 0x03) == 3);
- ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
- ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
- ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
- heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t);
-
- ss0->ss1.base_addr = buf->offset + internal_offset;
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- internal_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen7_surface_state_t, ss1),
- buf);
-}
-
-static void
-intel_gpgpu_setup_bti_gen8(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
- size_t size, unsigned char index, uint32_t format)
-{
- assert(size <= (2ul<<30));
- size_t s = size - 1;
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen8_surface_state_t *ss0 = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
- memset(ss0, 0, sizeof(gen8_surface_state_t));
- ss0->ss0.surface_type = I965_SURFACE_BUFFER;
- ss0->ss0.surface_format = format;
- if(format != I965_SURFACEFORMAT_RAW) {
- ss0->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
- ss0->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
- ss0->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
- ss0->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
- }
- ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
- // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
- if(format == I965_SURFACEFORMAT_RAW)
- assert((ss0->ss2.width & 0x03) == 3);
- ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
- ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
- ss0->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
- heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen8_surface_state_t);
- ss0->ss8.surface_base_addr_lo = (buf->offset64 + internal_offset) & 0xffffffff;
- ss0->ss9.surface_base_addr_hi = ((buf->offset64 + internal_offset) >> 32) & 0xffffffff;
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- internal_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen8_surface_state_t, ss8),
- buf);
-}
-
-static void
-intel_gpgpu_setup_bti_gen9(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset,
- size_t size, unsigned char index, uint32_t format)
-{
- assert(size <= (4ul<<30));
- size_t s = size - 1;
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen8_surface_state_t *ss0 = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
- memset(ss0, 0, sizeof(gen8_surface_state_t));
- ss0->ss0.surface_type = I965_SURFACE_BUFFER;
- ss0->ss0.surface_format = format;
- if(format != I965_SURFACEFORMAT_RAW) {
- ss0->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
- ss0->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
- ss0->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
- ss0->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
- }
- ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
- // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte.
- if(format == I965_SURFACEFORMAT_RAW)
- assert((ss0->ss2.width & 0x03) == 3);
- ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
- ss0->ss3.depth = (s >> 21) & 0x7ff; /* bits 31:21 of sz, from bespec only gen 9 support that*/
- ss0->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
- heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen8_surface_state_t);
- ss0->ss8.surface_base_addr_lo = (buf->offset64 + internal_offset) & 0xffffffff;
- ss0->ss9.surface_base_addr_hi = ((buf->offset64 + internal_offset) >> 32) & 0xffffffff;
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- internal_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen8_surface_state_t, ss8),
- buf);
-}
-
-static int
-intel_is_surface_array(cl_mem_object_type type)
-{
- if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
- type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
- return 1;
-
- return 0;
-}
-
-static int
-intel_get_surface_type(cl_mem_object_type type)
-{
- switch (type) {
- case CL_MEM_OBJECT_IMAGE1D:
- case CL_MEM_OBJECT_IMAGE1D_ARRAY:
- return I965_SURFACE_1D;
-
- case CL_MEM_OBJECT_IMAGE1D_BUFFER:
- case CL_MEM_OBJECT_IMAGE2D:
- case CL_MEM_OBJECT_IMAGE2D_ARRAY:
- return I965_SURFACE_2D;
-
- case CL_MEM_OBJECT_IMAGE3D:
- return I965_SURFACE_3D;
-
- default:
- assert(0);
- }
- return 0;
-}
-
-/* Get fixed surface type. If it is a 1D array image with a large index,
- we need to fixup it to 2D type due to a Gen7/Gen75's sampler issue
- on a integer type surface with clamp address mode and nearest filter mode.
-*/
-static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_type type)
-{
- uint32_t surface_type;
- //Now all platforms need it, so disable platform, re-enable it
- //when some platform don't need this workaround
- if (/*((IS_IVYBRIDGE(gpgpu->drv->device_id) ||
- IS_HASWELL(gpgpu->drv->device_id) ||
- IS_BROADWELL(gpgpu->drv->device_id) ||
- IS_CHERRYVIEW(gpgpu->drv->device_id) ||
- IS_SKYLAKE(gpgpu->drv->device_id) ||
- IS_BROXTON(gpgpu->drv->device_id) ||
- IS_KABYLAKE(gpgpu->drv_device_id))) && */
- index >= BTI_WORKAROUND_IMAGE_OFFSET + BTI_RESERVED_NUM &&
- type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- surface_type = I965_SURFACE_2D;
- else
- surface_type = intel_get_surface_type(type);
- return surface_type;
-}
-
-static void
-intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
- uint32_t index,
- dri_bo* obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- uint32_t bpp,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- int32_t slice_pitch,
- int32_t tiling)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
-
- memset(ss, 0, sizeof(*ss));
- ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
- if (intel_is_surface_array(type)) {
- ss->ss0.surface_array = 1;
- ss->ss0.surface_array_spacing = 1;
- }
- ss->ss0.surface_format = format;
- ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
- ss->ss2.width = w - 1;
-
- ss->ss2.height = h - 1;
- ss->ss3.depth = depth - 1;
- ss->ss4.not_str_buf.rt_view_extent = depth - 1;
- ss->ss4.not_str_buf.min_array_element = 0;
- ss->ss3.pitch = pitch - 1;
- ss->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
- if (tiling == GPGPU_TILE_X) {
- ss->ss0.tiled_surface = 1;
- ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
- } else if (tiling == GPGPU_TILE_Y) {
- ss->ss0.tiled_surface = 1;
- ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
- }
- ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
- intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
-
- assert(index < GEN_MAX_SURFACES);
-}
-
-static void
-intel_gpgpu_bind_image_for_vme_gen7(intel_gpgpu_t *gpgpu,
- uint32_t index,
- dri_bo* obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- uint32_t bpp,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- int32_t slice_pitch,
- int32_t tiling)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_media_surface_state_t *ss = (gen7_media_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
-
- memset(ss, 0, sizeof(*ss));
- ss->ss0.base_addr = obj_bo->offset + obj_bo_offset;
- ss->ss1.uv_offset_v_direction = 0;
- ss->ss1.pic_struct = 0;
- ss->ss1.width = w - 1;
- ss->ss1.height = h - 1;
- if (tiling == GPGPU_NO_TILE) {
- ss->ss2.tile_mode = 0;
- }
- else if (tiling == GPGPU_TILE_X){
- ss->ss2.tile_mode = 2;
- }
- else if (tiling == GPGPU_TILE_Y){
- ss->ss2.tile_mode = 3;
- }
- ss->ss2.half_pitch_for_chroma = 0;
- ss->ss2.surface_pitch = pitch - 1;
- ss->ss2.surface_object_control_state = cl_gpgpu_get_cache_ctrl();
- ss->ss2.interleave_chroma = 0;
- ss->ss2.surface_format = 12; //Y8_UNORM
- ss->ss3.y_offset_for_u = 0;
- ss->ss3.x_offset_for_u = 0;
- ss->ss4.y_offset_for_v = 0;
- ss->ss4.x_offset_for_v = 0;
-
- intel_gpgpu_set_buf_reloc_for_vme_gen7(gpgpu, index, obj_bo, obj_bo_offset);
-
- assert(index < GEN_MAX_SURFACES);
-}
-
-
-static void
-intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
- uint32_t index,
- dri_bo* obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- uint32_t bpp,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- int32_t slice_pitch,
- int32_t tiling)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)];
- memset(ss, 0, sizeof(*ss));
- ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
- if (intel_is_surface_array(type)) {
- ss->ss0.surface_array = 1;
- ss->ss0.surface_array_spacing = 1;
- }
- ss->ss0.surface_format = format;
- ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
- ss->ss2.width = w - 1;
- ss->ss2.height = h - 1;
- ss->ss3.depth = depth - 1;
- ss->ss4.not_str_buf.rt_view_extent = depth - 1;
- ss->ss4.not_str_buf.min_array_element = 0;
- ss->ss3.pitch = pitch - 1;
- ss->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
- ss->ss7.shader_r = I965_SURCHAN_SELECT_RED;
- ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
- ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
- ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
- if (tiling == GPGPU_TILE_X) {
- ss->ss0.tiled_surface = 1;
- ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
- } else if (tiling == GPGPU_TILE_Y) {
- ss->ss0.tiled_surface = 1;
- ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
- }
- ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
- intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
-
- assert(index < GEN_MAX_SURFACES);
-}
-
-static void
-intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
- uint32_t index,
- dri_bo* obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- uint32_t bpp,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- int32_t slice_pitch,
- int32_t tiling)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
- memset(ss, 0, sizeof(*ss));
- ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
- ss->ss0.surface_format = format;
- if (intel_is_surface_array(type)) {
- ss->ss0.surface_array = 1;
- ss->ss1.surface_qpitch = (h + 3)/4;
- }
- ss->ss0.horizontal_alignment = 1;
- ss->ss0.vertical_alignment = 1;
-
- if (tiling == GPGPU_TILE_X) {
- ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR;
- } else if (tiling == GPGPU_TILE_Y) {
- ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR;
- } else
- assert(tiling == GPGPU_NO_TILE);// W mode is not supported now.
-
- ss->ss2.width = w - 1;
- ss->ss2.height = h - 1;
- ss->ss3.depth = depth - 1;
-
- ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
- ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
-
- ss->ss4.render_target_view_ext = depth - 1;
- ss->ss4.min_array_elt = 0;
- ss->ss3.surface_pitch = pitch - 1;
-
- ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
- ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
- ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
- ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
- ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
- ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
-
- heap->binding_table[index] = offsetof(surface_heap_t, surface) +
- index * surface_state_sz;
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- obj_bo_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen8_surface_state_t, ss8),
- obj_bo);
-
- assert(index < GEN_MAX_SURFACES);
-}
-
-static void
-intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
- uint32_t index,
- dri_bo* obj_bo,
- uint32_t obj_bo_offset,
- uint32_t format,
- cl_mem_object_type type,
- uint32_t bpp,
- int32_t w,
- int32_t h,
- int32_t depth,
- int32_t pitch,
- int32_t slice_pitch,
- int32_t tiling)
-{
- surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
- memset(ss, 0, sizeof(*ss));
- ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
- ss->ss0.surface_format = format;
- if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_1D) {
- ss->ss0.surface_array = 1;
- ss->ss1.surface_qpitch = (slice_pitch/bpp + 3)/4; //align_h
- }
-
- if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_2D) {
- ss->ss0.surface_array = 1;
- ss->ss1.surface_qpitch = (slice_pitch/pitch + 3)/4;
- }
-
- if(ss->ss0.surface_type == I965_SURFACE_3D)
- ss->ss1.surface_qpitch = (slice_pitch/pitch + 3)/4;
-
- ss->ss0.horizontal_alignment = 1;
- ss->ss0.vertical_alignment = 1;
-
- if (tiling == GPGPU_TILE_X) {
- ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR;
- } else if (tiling == GPGPU_TILE_Y) {
- ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR;
- } else
- assert(tiling == GPGPU_NO_TILE);// W mode is not supported now.
-
- ss->ss2.width = w - 1;
- ss->ss2.height = h - 1;
- ss->ss3.depth = depth - 1;
-
- ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
- ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
-
- ss->ss4.render_target_view_ext = depth - 1;
- ss->ss4.min_array_elt = 0;
- ss->ss3.surface_pitch = pitch - 1;
-
- ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
- ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
- ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
- ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
- ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
- ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
-
- heap->binding_table[index] = offsetof(surface_heap_t, surface) +
- index * surface_state_sz;
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- obj_bo_offset,
- gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[index] +
- offsetof(gen8_surface_state_t, ss8),
- obj_bo);
-
- assert(index < GEN_MAX_SURFACES);
-}
-
-static void
-intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset,
- uint32_t internal_offset, size_t size, uint8_t bti)
-{
- assert(gpgpu->binded_n < max_buf_n);
- if(offset != -1) {
- gpgpu->binded_buf[gpgpu->binded_n] = buf;
- gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset;
- gpgpu->binded_offset[gpgpu->binded_n] = offset;
- gpgpu->binded_n++;
- }
- intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti, I965_SURFACEFORMAT_RAW);
-}
-
-static int
-intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size)
-{
- drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
- drm_intel_bo* old = gpgpu->scratch_b.bo;
- uint32_t total = per_thread_size * gpgpu->max_threads;
- /* Per Bspec, scratch should 2X the desired size when EU index is not continuous */
- if (IS_HASWELL(gpgpu->drv->device_id) || IS_CHERRYVIEW(gpgpu->drv->device_id) ||
- PCI_CHIP_BROXTON_1 == gpgpu->drv->device_id || PCI_CHIP_BROXTON_3 == gpgpu->drv->device_id)
- total *= 2;
-
- gpgpu->per_thread_scratch = per_thread_size;
-
- if(old && old->size < total) {
- drm_intel_bo_unreference(old);
- old = NULL;
- }
-
- if(!old && total) {
- gpgpu->scratch_b.bo = drm_intel_bo_alloc(bufmgr, "SCRATCH_BO", total, 4096);
- if (gpgpu->scratch_b.bo == NULL)
- return -1;
- }
- return 0;
-}
-static void
-intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint8_t bti)
-{
- drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
- gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64);
-
- cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)gpgpu->stack_b.bo, offset, 0, size, bti);
-}
-
-static void
-intel_gpgpu_build_idrt_gen7(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
-{
- gen6_interface_descriptor_t *desc;
- drm_intel_bo *ker_bo = NULL;
-
- desc = (gen6_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
-
- memset(desc, 0, sizeof(*desc));
- ker_bo = (drm_intel_bo *) kernel->bo;
- desc->desc0.kernel_start_pointer = ker_bo->offset >> 6; /* reloc */
- desc->desc1.single_program_flow = 0;
- desc->desc1.floating_point_mode = 0; /* use IEEE-754 rule */
- desc->desc5.rounding_mode = 0; /* round to nearest even */
-
- assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
- desc->desc2.sampler_state_pointer = (gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) >> 5;
- desc->desc3.binding_table_entry_count = 0; /* no prefetch */
- desc->desc3.binding_table_pointer = 0;
- desc->desc4.curbe_read_len = kernel->curbe_sz / 32;
- desc->desc4.curbe_read_offset = 0;
-
- /* Barriers / SLM are automatically handled on Gen7+ */
- if (gpgpu->drv->gen_ver == 7 || gpgpu->drv->gen_ver == 75) {
- size_t slm_sz = kernel->slm_sz;
- desc->desc5.group_threads_num = kernel->use_slm ? kernel->thread_n : 0;
- desc->desc5.barrier_enable = kernel->use_slm;
- if (slm_sz <= 4*KB)
- slm_sz = 4*KB;
- else if (slm_sz <= 8*KB)
- slm_sz = 8*KB;
- else if (slm_sz <= 16*KB)
- slm_sz = 16*KB;
- else if (slm_sz <= 32*KB)
- slm_sz = 32*KB;
- else
- slm_sz = 64*KB;
- slm_sz = slm_sz >> 12;
- desc->desc5.slm_sz = slm_sz;
- }
- else
- desc->desc5.group_threads_num = kernel->barrierID; /* BarrierID on GEN6 */
-
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- gpgpu->aux_offset.idrt_offset + offsetof(gen6_interface_descriptor_t, desc0),
- ker_bo);
-
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- gpgpu->aux_offset.sampler_state_offset,
- gpgpu->aux_offset.idrt_offset + offsetof(gen6_interface_descriptor_t, desc2),
- gpgpu->aux_buf.bo);
-}
-
-static void
-intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
-{
- gen8_interface_descriptor_t *desc;
-
- desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
-
- memset(desc, 0, sizeof(*desc));
- desc->desc0.kernel_start_pointer = 0; /* reloc */
- desc->desc2.single_program_flow = 0;
- desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */
- desc->desc6.rounding_mode = 0; /* round to nearest even */
-
- assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
- desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5;
- desc->desc4.binding_table_entry_count = 0; /* no prefetch */
- desc->desc4.binding_table_pointer = 0;
- desc->desc5.curbe_read_len = kernel->curbe_sz / 32;
- desc->desc5.curbe_read_offset = 0;
-
- /* Barriers / SLM are automatically handled on Gen7+ */
- size_t slm_sz = kernel->slm_sz;
- /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */
- desc->desc6.group_threads_num = kernel->thread_n;
- desc->desc6.barrier_enable = kernel->use_slm;
- if (slm_sz == 0)
- slm_sz = 0;
- else if (slm_sz <= 4*KB)
- slm_sz = 4*KB;
- else if (slm_sz <= 8*KB)
- slm_sz = 8*KB;
- else if (slm_sz <= 16*KB)
- slm_sz = 16*KB;
- else if (slm_sz <= 32*KB)
- slm_sz = 32*KB;
- else
- slm_sz = 64*KB;
- slm_sz = slm_sz >> 12;
- desc->desc6.slm_sz = slm_sz;
-}
-
-static void
-intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
-{
- gen8_interface_descriptor_t *desc;
-
- desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset);
-
- memset(desc, 0, sizeof(*desc));
- desc->desc0.kernel_start_pointer = 0; /* reloc */
- desc->desc2.single_program_flow = 0;
- desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */
- desc->desc6.rounding_mode = 0; /* round to nearest even */
-
- assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0);
- desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5;
- desc->desc4.binding_table_entry_count = 0; /* no prefetch */
- desc->desc4.binding_table_pointer = 0;
- desc->desc5.curbe_read_len = kernel->curbe_sz / 32;
- desc->desc5.curbe_read_offset = 0;
-
- /* Barriers / SLM are automatically handled on Gen7+ */
- size_t slm_sz = kernel->slm_sz;
- /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */
- desc->desc6.group_threads_num = kernel->thread_n;
- desc->desc6.barrier_enable = kernel->use_slm;
- if (slm_sz == 0)
- slm_sz = 0;
- else if (slm_sz <= 1*KB)
- slm_sz = 1;
- else if (slm_sz <= 2*KB)
- slm_sz = 2;
- else if (slm_sz <= 4*KB)
- slm_sz = 3;
- else if (slm_sz <= 8*KB)
- slm_sz = 4;
- else if (slm_sz <= 16*KB)
- slm_sz = 5;
- else if (slm_sz <= 32*KB)
- slm_sz = 6;
- else
- slm_sz = 7;
- desc->desc6.slm_sz = slm_sz;
-}
-
-static int
-intel_gpgpu_upload_curbes_gen7(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
-{
- unsigned char *curbe = NULL;
- cl_gpgpu_kernel *k = gpgpu->ker;
- uint32_t i, j;
-
- /* Upload the data first */
- if (dri_bo_map(gpgpu->aux_buf.bo, 1) != 0) {
- fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
- return -1;
- }
- assert(gpgpu->aux_buf.bo->virtual);
- curbe = (unsigned char *) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.curbe_offset);
- memcpy(curbe, data, size);
-
- /* Now put all the relocations for our flat address space */
- for (i = 0; i < k->thread_n; ++i)
- for (j = 0; j < gpgpu->binded_n; ++j) {
- *(uint32_t *)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset64 + gpgpu->target_buf_offset[j];
- drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo,
- gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz,
- gpgpu->binded_buf[j],
- gpgpu->target_buf_offset[j],
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER);
- }
- dri_bo_unmap(gpgpu->aux_buf.bo);
- return 0;
-}
-
-static int
-intel_gpgpu_upload_curbes_gen8(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
-{
- unsigned char *curbe = NULL;
- cl_gpgpu_kernel *k = gpgpu->ker;
- uint32_t i, j;
-
- /* Upload the data first */
- if (dri_bo_map(gpgpu->aux_buf.bo, 1) != 0) {
- fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
- return -1;
- }
- assert(gpgpu->aux_buf.bo->virtual);
- curbe = (unsigned char *) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.curbe_offset);
- memcpy(curbe, data, size);
-
- /* Now put all the relocations for our flat address space */
- for (i = 0; i < k->thread_n; ++i)
- for (j = 0; j < gpgpu->binded_n; ++j) {
- *(size_t *)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset64 + gpgpu->target_buf_offset[j];
- drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo,
- gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz,
- gpgpu->binded_buf[j],
- gpgpu->target_buf_offset[j],
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER);
- }
- dri_bo_unmap(gpgpu->aux_buf.bo);
- return 0;
-}
-
-static void
-intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n)
-{
- if (n) {
- const size_t sz = n * sizeof(gen6_sampler_state_t);
- memcpy(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset, data, sz);
- }
-}
-
-int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest)
-{
- switch( cl_address_mode ) {
- case CLK_ADDRESS_NONE:
- case CLK_ADDRESS_REPEAT:
- return GEN_TEXCOORDMODE_WRAP;
- case CLK_ADDRESS_CLAMP:
- return GEN_TEXCOORDMODE_CLAMP_BORDER;
- case CLK_ADDRESS_CLAMP_TO_EDGE:
- return GEN_TEXCOORDMODE_CLAMP;
- case CLK_ADDRESS_MIRRORED_REPEAT:
- return GEN_TEXCOORDMODE_MIRROR;
- default:
- return GEN_TEXCOORDMODE_WRAP;
- }
-}
-
-static void intel_gpgpu_insert_vme_state_gen7(intel_gpgpu_t *gpgpu, cl_accelerator_intel accel, uint32_t index)
-{
- gen7_vme_state_t* vme = (gen7_vme_state_t*)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index;
- memset(vme, 0, sizeof(*vme));
- gen7_vme_search_path_state_t* sp = vme->sp;
-
- if(accel->desc.me.search_path_type == CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL){
- sp[0].dw0.SPD_0_X = 0;
- sp[0].dw0.SPD_0_Y = 0;
- sp[0].dw0.SPD_1_X = 0;
- sp[0].dw0.SPD_1_Y = 0;
- sp[0].dw0.SPD_2_X = 0;
- sp[0].dw0.SPD_2_Y = 0;
- sp[0].dw0.SPD_3_X = 0;
- sp[0].dw0.SPD_3_Y = 0;
- }
- else if(accel->desc.me.search_path_type == CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL){
- sp[0].dw0.SPD_0_X = 1;
- sp[0].dw0.SPD_0_Y = 0;
- sp[0].dw0.SPD_1_X = 0;
- sp[0].dw0.SPD_1_Y = 1;
- sp[0].dw0.SPD_2_X = -1;
- sp[0].dw0.SPD_2_Y = 0;
- sp[0].dw0.SPD_3_X = 0;
- sp[0].dw0.SPD_3_Y = 0;
- }
- else if(accel->desc.me.search_path_type == CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL){
- sp[0].dw0.SPD_0_X = 1;
- sp[0].dw0.SPD_0_Y = 0;
- sp[0].dw0.SPD_1_X = 1;
- sp[0].dw0.SPD_1_Y = 0;
- sp[0].dw0.SPD_2_X = 1;
- sp[0].dw0.SPD_2_Y = 0;
- sp[0].dw0.SPD_3_X = 1;
- sp[0].dw0.SPD_3_Y = 0;
-
- sp[1].dw0.SPD_0_X = 1;
- sp[1].dw0.SPD_0_Y = 0;
- sp[1].dw0.SPD_1_X = 1;
- sp[1].dw0.SPD_1_Y = 0;
- sp[1].dw0.SPD_2_X = 1;
- sp[1].dw0.SPD_2_Y = 0;
- sp[1].dw0.SPD_3_X = 0;
- sp[1].dw0.SPD_3_Y = 1;
-
- sp[2].dw0.SPD_0_X = -1;
- sp[2].dw0.SPD_0_Y = 0;
- sp[2].dw0.SPD_1_X = -1;
- sp[2].dw0.SPD_1_Y = 0;
- sp[2].dw0.SPD_2_X = -1;
- sp[2].dw0.SPD_2_Y = 0;
- sp[2].dw0.SPD_3_X = -1;
- sp[2].dw0.SPD_3_Y = 0;
-
- sp[3].dw0.SPD_0_X = -1;
- sp[3].dw0.SPD_0_Y = 0;
- sp[3].dw0.SPD_1_X = -1;
- sp[3].dw0.SPD_1_Y = 0;
- sp[3].dw0.SPD_2_X = -1;
- sp[3].dw0.SPD_2_Y = 0;
- sp[3].dw0.SPD_3_X = 0;
- sp[3].dw0.SPD_3_Y = 1;
-
- sp[4].dw0.SPD_0_X = 1;
- sp[4].dw0.SPD_0_Y = 0;
- sp[4].dw0.SPD_1_X = 1;
- sp[4].dw0.SPD_1_Y = 0;
- sp[4].dw0.SPD_2_X = 1;
- sp[4].dw0.SPD_2_Y = 0;
- sp[4].dw0.SPD_3_X = 1;
- sp[4].dw0.SPD_3_Y = 0;
-
- sp[5].dw0.SPD_0_X = 1;
- sp[5].dw0.SPD_0_Y = 0;
- sp[5].dw0.SPD_1_X = 1;
- sp[5].dw0.SPD_1_Y = 0;
- sp[5].dw0.SPD_2_X = 1;
- sp[5].dw0.SPD_2_Y = 0;
- sp[5].dw0.SPD_3_X = 0;
- sp[5].dw0.SPD_3_Y = 1;
-
- sp[6].dw0.SPD_0_X = -1;
- sp[6].dw0.SPD_0_Y = 0;
- sp[6].dw0.SPD_1_X = -1;
- sp[6].dw0.SPD_1_Y = 0;
- sp[6].dw0.SPD_2_X = -1;
- sp[6].dw0.SPD_2_Y = 0;
- sp[6].dw0.SPD_3_X = -1;
- sp[6].dw0.SPD_3_Y = 0;
-
- sp[7].dw0.SPD_0_X = -1;
- sp[7].dw0.SPD_0_Y = 0;
- sp[7].dw0.SPD_1_X = -1;
- sp[7].dw0.SPD_1_Y = 0;
- sp[7].dw0.SPD_2_X = -1;
- sp[7].dw0.SPD_2_Y = 0;
- sp[7].dw0.SPD_3_X = 0;
- sp[7].dw0.SPD_3_Y = 1;
-
- sp[8].dw0.SPD_0_X = 1;
- sp[8].dw0.SPD_0_Y = 0;
- sp[8].dw0.SPD_1_X = 1;
- sp[8].dw0.SPD_1_Y = 0;
- sp[8].dw0.SPD_2_X = 1;
- sp[8].dw0.SPD_2_Y = 0;
- sp[8].dw0.SPD_3_X = 1;
- sp[8].dw0.SPD_3_Y = 0;
-
- sp[9].dw0.SPD_0_X = 1;
- sp[9].dw0.SPD_0_Y = 0;
- sp[9].dw0.SPD_1_X = 1;
- sp[9].dw0.SPD_1_Y = 0;
- sp[9].dw0.SPD_2_X = 1;
- sp[9].dw0.SPD_2_Y = 0;
- sp[9].dw0.SPD_3_X = 0;
- sp[9].dw0.SPD_3_Y = 1;
-
- sp[10].dw0.SPD_0_X = -1;
- sp[10].dw0.SPD_0_Y = 0;
- sp[10].dw0.SPD_1_X = -1;
- sp[10].dw0.SPD_1_Y = 0;
- sp[10].dw0.SPD_2_X = -1;
- sp[10].dw0.SPD_2_Y = 0;
- sp[10].dw0.SPD_3_X = -1;
- sp[10].dw0.SPD_3_Y = 0;
-
- sp[11].dw0.SPD_0_X = -1;
- sp[11].dw0.SPD_0_Y = 0;
- sp[11].dw0.SPD_1_X = -1;
- sp[11].dw0.SPD_1_Y = 0;
- sp[11].dw0.SPD_2_X = -1;
- sp[11].dw0.SPD_2_Y = 0;
- sp[11].dw0.SPD_3_X = 0;
- sp[11].dw0.SPD_3_Y = 0;
- }
-}
-
-static void
-intel_gpgpu_bind_vme_state_gen7(intel_gpgpu_t *gpgpu, cl_accelerator_intel accel)
-{
- intel_gpgpu_insert_vme_state_gen7(gpgpu, accel, 0);
-}
-
-static void
-intel_gpgpu_insert_sampler_gen7(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler)
-{
- int using_nearest = 0;
- uint32_t wrap_mode;
- gen7_sampler_state_t *sampler;
-
- sampler = (gen7_sampler_state_t *)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index;
- memset(sampler, 0, sizeof(*sampler));
- assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) % 32 == 0);
- sampler->ss2.default_color_pointer = (gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) >> 5;
- if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
- sampler->ss3.non_normalized_coord = 1;
- else
- sampler->ss3.non_normalized_coord = 0;
-
- switch (clk_sampler & __CLK_FILTER_MASK) {
- case CLK_FILTER_NEAREST:
- sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
- sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
- sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
- using_nearest = 1;
- break;
- case CLK_FILTER_LINEAR:
- sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
- sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
- sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
- break;
- }
-
- wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
- sampler->ss3.s_wrap_mode = wrap_mode;
- /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need
- * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */
- sampler->ss3.t_wrap_mode = wrap_mode;
- sampler->ss3.r_wrap_mode = wrap_mode;
-
- sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
- sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
-
- sampler->ss0.base_level = 0;
-
- sampler->ss1.max_lod = 0;
- sampler->ss1.min_lod = 0;
-
- if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST)
- sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN |
- GEN_ADDRESS_ROUNDING_ENABLE_V_MIN |
- GEN_ADDRESS_ROUNDING_ENABLE_R_MIN;
- if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST)
- sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
- GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
- GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
-
- dri_bo_emit_reloc(gpgpu->aux_buf.bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- gpgpu->aux_offset.sampler_border_color_state_offset,
- gpgpu->aux_offset.sampler_state_offset +
- index * sizeof(gen7_sampler_state_t) +
- offsetof(gen7_sampler_state_t, ss2),
- gpgpu->aux_buf.bo);
-
-}
-
-
-static void
-intel_gpgpu_insert_sampler_gen8(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler)
-{
- int using_nearest = 0;
- uint32_t wrap_mode;
- gen8_sampler_state_t *sampler;
-
- sampler = (gen8_sampler_state_t *)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index;
- memset(sampler, 0, sizeof(*sampler));
- assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) % 32 == 0);
- if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
- sampler->ss3.non_normalized_coord = 1;
- else
- sampler->ss3.non_normalized_coord = 0;
-
- switch (clk_sampler & __CLK_FILTER_MASK) {
- case CLK_FILTER_NEAREST:
- sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
- sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
- sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
- using_nearest = 1;
- break;
- case CLK_FILTER_LINEAR:
- sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
- sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
- sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
- break;
- }
-
- wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
- sampler->ss3.s_wrap_mode = wrap_mode;
- /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need
- * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */
- sampler->ss3.t_wrap_mode = wrap_mode;
- sampler->ss3.r_wrap_mode = wrap_mode;
-
- sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
- sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
-
- sampler->ss0.base_level = 0;
-
- sampler->ss1.max_lod = 0;
- sampler->ss1.min_lod = 0;
-
- if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST)
- sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN |
- GEN_ADDRESS_ROUNDING_ENABLE_V_MIN |
- GEN_ADDRESS_ROUNDING_ENABLE_R_MIN;
- if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST)
- sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
- GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
- GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
-}
-
-static void
-intel_gpgpu_bind_sampler_gen7(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz)
-{
- int index;
- assert(sampler_sz <= GEN_MAX_SAMPLERS);
- for(index = 0; index < sampler_sz; index++)
- intel_gpgpu_insert_sampler_gen7(gpgpu, index, samplers[index]);
-}
-
-static void
-intel_gpgpu_bind_sampler_gen8(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz)
-{
- int index;
- assert(sampler_sz <= GEN_MAX_SAMPLERS);
- for(index = 0; index < sampler_sz; index++)
- intel_gpgpu_insert_sampler_gen8(gpgpu, index, samplers[index]);
-}
-
-static void
-intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
-{
- gpgpu->ker = kernel;
- if (gpgpu->drv->null_bo)
- intel_gpgpu_setup_bti(gpgpu, gpgpu->drv->null_bo, 0, 64*1024, 0xfe, I965_SURFACEFORMAT_RAW);
-
- intel_gpgpu_build_idrt(gpgpu, kernel);
- dri_bo_unmap(gpgpu->aux_buf.bo);
-}
-
-static void
-intel_gpgpu_set_perf_counters(intel_gpgpu_t *gpgpu, cl_buffer *perf)
-{
- if (gpgpu->perf_b.bo)
- drm_intel_bo_unreference(gpgpu->perf_b.bo);
- drm_intel_bo_reference((drm_intel_bo*) perf);
- gpgpu->perf_b.bo = (drm_intel_bo*) perf;
-}
-
-static void
-intel_gpgpu_walker_gen7(intel_gpgpu_t *gpgpu,
- uint32_t simd_sz,
- uint32_t thread_n,
- const size_t global_wk_off[3],
- const size_t global_dim_off[3],
- const size_t global_wk_sz[3],
- const size_t local_wk_sz[3])
-{
- const uint32_t global_wk_dim[3] = {
- global_wk_sz[0] / local_wk_sz[0],
- global_wk_sz[1] / local_wk_sz[1],
- global_wk_sz[2] / local_wk_sz[2]
- };
- uint32_t right_mask = ~0x0;
- size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2];
-
- assert(simd_sz == 8 || simd_sz == 16);
-
- uint32_t shift = (group_sz & (simd_sz - 1));
- shift = (shift == 0) ? simd_sz : shift;
- right_mask = (1 << shift) - 1;
-
- BEGIN_BATCH(gpgpu->batch, 11);
- OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 9);
- OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
- assert(thread_n <= 64);
- if (simd_sz == 16)
- OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
- else
- OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, global_wk_dim[0]);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, global_wk_dim[1]);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, global_wk_dim[2]);
- OUT_BATCH(gpgpu->batch, right_mask);
- OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/
- ADVANCE_BATCH(gpgpu->batch);
-
- BEGIN_BATCH(gpgpu->batch, 2);
- OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0);
- OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
- ADVANCE_BATCH(gpgpu->batch);
-
- if (IS_IVYBRIDGE(gpgpu->drv->device_id))
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static void
-intel_gpgpu_walker_gen8(intel_gpgpu_t *gpgpu,
- uint32_t simd_sz,
- uint32_t thread_n,
- const size_t global_wk_off[3],
- const size_t global_dim_off[3],
- const size_t global_wk_sz[3],
- const size_t local_wk_sz[3])
-{
- const uint32_t global_wk_dim[3] = {
- global_wk_sz[0] / local_wk_sz[0],
- global_wk_sz[1] / local_wk_sz[1],
- global_wk_sz[2] / local_wk_sz[2]
- };
- uint32_t right_mask = ~0x0;
- size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2];
-
- assert(simd_sz == 8 || simd_sz == 16);
-
- uint32_t shift = (group_sz & (simd_sz - 1));
- shift = (shift == 0) ? simd_sz : shift;
- right_mask = (1 << shift) - 1;
-
- BEGIN_BATCH(gpgpu->batch, 15);
- OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 13);
- OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
- OUT_BATCH(gpgpu->batch, 0); /* Indirect Data Length */
- OUT_BATCH(gpgpu->batch, 0); /* Indirect Data Start Address */
- assert(thread_n <= 64);
- if (simd_sz == 16)
- OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
- else
- OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */
- OUT_BATCH(gpgpu->batch, global_dim_off[0]);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, global_wk_dim[0]+global_dim_off[0]);
- OUT_BATCH(gpgpu->batch, global_dim_off[1]);
- OUT_BATCH(gpgpu->batch, 0);
- OUT_BATCH(gpgpu->batch, global_wk_dim[1]+global_dim_off[1]);
- OUT_BATCH(gpgpu->batch, global_dim_off[2]);
- OUT_BATCH(gpgpu->batch, global_wk_dim[2]+global_dim_off[2]);
- OUT_BATCH(gpgpu->batch, right_mask);
- OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/
- ADVANCE_BATCH(gpgpu->batch);
-
- BEGIN_BATCH(gpgpu->batch, 2);
- OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0);
- OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */
- ADVANCE_BATCH(gpgpu->batch);
-
- intel_gpgpu_pipe_control(gpgpu);
-}
-
-static intel_event_t*
-intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
-{
- intel_event_t *event = NULL;
- TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t));
-
- event->buffer = gpgpu->batch->buffer;
- if (event->buffer)
- drm_intel_bo_reference(event->buffer);
- event->status = command_queued;
-
- if(gpgpu->time_stamp_b.bo) {
- event->ts_buf = gpgpu->time_stamp_b.bo;
- drm_intel_bo_reference(event->ts_buf);
- }
-
-exit:
- return event;
-error:
- cl_free(event);
- event = NULL;
- goto exit;
-}
-
-/*
- The upper layer already flushed the batch buffer, just update
- internal status to command_submitted.
-*/
-static void
-intel_gpgpu_event_flush(intel_event_t *event)
-{
- assert(event->status == command_queued);
- event->status = command_running;
-}
-
-static int
-intel_gpgpu_event_update_status(intel_event_t *event, int wait)
-{
- if(event->status == command_complete)
- return event->status;
-
- if (event->buffer &&
- event->status == command_running &&
- !drm_intel_bo_busy(event->buffer)) {
- event->status = command_complete;
- drm_intel_bo_unreference(event->buffer);
- event->buffer = NULL;
- return event->status;
- }
-
- if(wait == 0)
- return event->status;
-
- if (event->buffer) {
- drm_intel_bo_wait_rendering(event->buffer);
- event->status = command_complete;
- drm_intel_bo_unreference(event->buffer);
- event->buffer = NULL;
- }
- return event->status;
-}
-
-static void
-intel_gpgpu_event_delete(intel_event_t *event)
-{
- if(event->buffer)
- drm_intel_bo_unreference(event->buffer);
- if(event->ts_buf)
- drm_intel_bo_unreference(event->ts_buf);
- cl_free(event);
-}
-
-/* IVB and HSW's result MUST shift in x86_64 system */
-static uint64_t
-intel_gpgpu_read_ts_reg_gen7(drm_intel_bufmgr *bufmgr)
-{
- uint64_t result = 0;
- drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result);
- /* In x86_64 system, the low 32bits of timestamp count are stored in the high 32 bits of
- result which got from drm_intel_reg_read, and 32-35 bits are lost; but match bspec in
- i386 system. It seems the kernel readq bug. So shift 32 bit in x86_64, and only remain
- 32 bits data in i386.
- */
- struct utsname buf;
- uname(&buf);
- /* In some systems, the user space is 32 bit, but kernel is 64 bit, so can't use the
- * compiler's flag to determine the kernel'a architecture, use uname to get it. */
- /* x86_64 in linux, amd64 in bsd */
- if(strcmp(buf.machine, "x86_64") == 0 || strcmp(buf.machine, "amd64") == 0)
- return result >> 32;
- else
- return result & 0x0ffffffff;
-}
-
-/* baytrail's result should clear high 4 bits */
-static uint64_t
-intel_gpgpu_read_ts_reg_baytrail(drm_intel_bufmgr *bufmgr)
-{
- uint64_t result = 0;
- drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result);
- return result & 0x0ffffffff;
-}
-
-/* We want to get the current time of GPU. */
-static void
-intel_gpgpu_event_get_gpu_cur_timestamp(intel_driver_t* gen_driver, uint64_t* ret_ts)
-{
- uint64_t result = 0;
- drm_intel_bufmgr *bufmgr = gen_driver->bufmgr;
-
- /* Get the ts that match the bspec */
- result = intel_gpgpu_read_ts_reg(bufmgr);
- result *= 80;
-
- *ret_ts = result;
- return;
-}
-
-/* Get the GPU execute time. */
-static void
-intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, int index, uint64_t* ret_ts)
-{
- uint64_t result = 0;
- assert(gpgpu->time_stamp_b.bo);
- assert(index == 0 || index == 1);
- drm_intel_gem_bo_map_gtt(gpgpu->time_stamp_b.bo);
- uint64_t* ptr = gpgpu->time_stamp_b.bo->virtual;
- result = ptr[index];
-
- /* According to BSpec, the timestamp counter should be 36 bits,
- but comparing to the timestamp counter from IO control reading,
- we find the first 4 bits seems to be fake. In order to keep the
- timestamp counter conformable, we just skip the first 4 bits.
- */
- result = (result & 0x0FFFFFFFF) * 80; //convert to nanoseconds
- *ret_ts = result;
-
- drm_intel_gem_bo_unmap_gtt(gpgpu->time_stamp_b.bo);
-}
-
-static int
-intel_gpgpu_set_profiling_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint32_t offset, uint8_t bti)
-{
- drm_intel_bo *bo = NULL;
-
- gpgpu->profiling_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "Profiling buffer", size, 64);
- bo = gpgpu->profiling_b.bo;
- if (!bo || (drm_intel_bo_map(bo, 1) != 0)) {
- fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
- return -1;
- }
- memset(bo->virtual, 0, size);
- drm_intel_bo_unmap(bo);
- cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti);
- return 0;
-}
-
-static void
-intel_gpgpu_set_profiling_info(intel_gpgpu_t *gpgpu, void* profiling_info)
-{
- gpgpu->profiling_info = profiling_info;
-}
-
-static void*
-intel_gpgpu_get_profiling_info(intel_gpgpu_t *gpgpu)
-{
- return gpgpu->profiling_info;
-}
-
-static int
-intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
-{
- if (gpgpu->printf_b.bo)
- dri_bo_unreference(gpgpu->printf_b.bo);
- gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf buffer", size, 4096);
-
- if (!gpgpu->printf_b.bo || (drm_intel_bo_map(gpgpu->printf_b.bo, 1) != 0)) {
- fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
- return -1;
- }
-
- memset(gpgpu->printf_b.bo->virtual, 0, size);
- *(uint32_t *)(gpgpu->printf_b.bo->virtual) = 4; // first four is for the length.
- drm_intel_bo_unmap(gpgpu->printf_b.bo);
- /* No need to bind, we do not need to emit reloc. */
- intel_gpgpu_setup_bti(gpgpu, gpgpu->printf_b.bo, 0, size, bti, I965_SURFACEFORMAT_RAW);
- return 0;
-}
-
-static void*
-intel_gpgpu_map_profiling_buf(intel_gpgpu_t *gpgpu)
-{
- drm_intel_bo *bo = NULL;
- bo = gpgpu->profiling_b.bo;
- drm_intel_bo_map(bo, 1);
- return bo->virtual;
-}
-
-static void
-intel_gpgpu_unmap_profiling_buf_addr(intel_gpgpu_t *gpgpu)
-{
- drm_intel_bo *bo = NULL;
- bo = gpgpu->profiling_b.bo;
- drm_intel_bo_unmap(bo);
-}
-
-
-static void*
-intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu)
-{
- drm_intel_bo *bo = NULL;
- bo = gpgpu->printf_b.bo;
- drm_intel_bo_map(bo, 1);
- return bo->virtual;
-}
-
-static void
-intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu)
-{
- drm_intel_bo *bo = NULL;
- bo = gpgpu->printf_b.bo;
- drm_intel_bo_unmap(bo);
-}
-
-static void
-intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu)
-{
- drm_intel_bo_unreference(gpgpu->printf_b.bo);
- gpgpu->printf_b.bo = NULL;
-}
-
-static void
-intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info)
-{
- gpgpu->printf_info = printf_info;
-}
-
-static void*
-intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu)
-{
- return gpgpu->printf_info;
-}
-
-static void
-intel_gpgpu_set_kernel(intel_gpgpu_t *gpgpu, void * kernel)
-{
- gpgpu->kernel = kernel;
-}
-
-static void*
-intel_gpgpu_get_kernel(intel_gpgpu_t *gpgpu)
-{
- return gpgpu->kernel;
-}
-
-LOCAL void
-intel_set_gpgpu_callbacks(int device_id)
-{
- cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
- cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
- cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync;
- cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
- cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack;
- cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
- cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
- cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
- cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
- cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
- cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
- cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start;
- cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
- cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
- cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen7;
- cl_gpgpu_bind_vme_state = (cl_gpgpu_bind_vme_state_cb *) intel_gpgpu_bind_vme_state_gen7;
- cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch;
- cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new;
- cl_gpgpu_event_flush = (cl_gpgpu_event_flush_cb *)intel_gpgpu_event_flush;
- cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status;
- cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
- cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp;
- cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
- cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
- cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
- cl_gpgpu_set_profiling_buffer = (cl_gpgpu_set_profiling_buffer_cb *)intel_gpgpu_set_profiling_buf;
- cl_gpgpu_set_profiling_info = (cl_gpgpu_set_profiling_info_cb *)intel_gpgpu_set_profiling_info;
- cl_gpgpu_get_profiling_info = (cl_gpgpu_get_profiling_info_cb *)intel_gpgpu_get_profiling_info;
- cl_gpgpu_map_profiling_buffer = (cl_gpgpu_map_profiling_buffer_cb *)intel_gpgpu_map_profiling_buf;
- cl_gpgpu_unmap_profiling_buffer = (cl_gpgpu_unmap_profiling_buffer_cb *)intel_gpgpu_unmap_profiling_buf_addr;
- cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf;
- cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf;
- cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr;
- cl_gpgpu_release_printf_buffer = (cl_gpgpu_release_printf_buffer_cb *)intel_gpgpu_release_printf_buf;
- cl_gpgpu_set_printf_info = (cl_gpgpu_set_printf_info_cb *)intel_gpgpu_set_printf_info;
- cl_gpgpu_get_printf_info = (cl_gpgpu_get_printf_info_cb *)intel_gpgpu_get_printf_info;
- cl_gpgpu_set_kernel = (cl_gpgpu_set_kernel_cb *)intel_gpgpu_set_kernel;
- cl_gpgpu_get_kernel = (cl_gpgpu_get_kernel_cb *)intel_gpgpu_get_kernel;
-
- if (IS_BROADWELL(device_id) || IS_CHERRYVIEW(device_id)) {
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8;
- intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
- cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen8;
- intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
- intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //BDW need not restore SLM, same as gen7
- intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
- if(IS_CHERRYVIEW(device_id))
- intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail;
- intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen8;
- intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8;
- intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8;
- cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8;
- intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8;
- intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
- intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
- cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
- intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8;
- intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
- cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen8;
- return;
- }
- if (IS_GEN9(device_id)) {
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
- intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
- cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
- intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
- intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //SKL need not restore SLM, same as gen7
- intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
- intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen9;
- intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen9;
- intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8;
- cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8;
- intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen9;
- intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
- intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
- cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
- intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8;
- intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen9;
- cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen8;
- return;
- }
-
- cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes_gen7;
- intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen7;
- intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen7;
- cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen7;
- intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen7;
- intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen7;
- intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen7;
- intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
-
- if (IS_HASWELL(device_id)) {
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
- intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75;
- cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen75;
- intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75;
- intel_gpgpu_post_action = intel_gpgpu_post_action_gen75;
- intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb
- intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75;
- intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75;
- }
- else if (IS_IVYBRIDGE(device_id)) {
- cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
- cl_gpgpu_bind_image_for_vme = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_for_vme_gen7;
- if (IS_BAYTRAIL_T(device_id)) {
- intel_gpgpu_set_L3 = intel_gpgpu_set_L3_baytrail;
- intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail;
- } else {
- intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen7;
- intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7;
- }
- cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen7;
- intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7;
- intel_gpgpu_post_action = intel_gpgpu_post_action_gen7;
- intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7;
- intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
- }
-}
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
deleted file mode 100644
index f575f8b..0000000
--- a/src/intel/intel_gpgpu.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- * Alexei Soupikov <alexei.soupikov at intel.com>
- */
-
-#ifndef __INTEL_GPGPU_H__
-#define __INTEL_GPGPU_H__
-
-#include "cl_utils.h"
-#include "cl_driver.h"
-#include "intel/intel_batchbuffer.h"
-#include "intel/intel_driver.h"
-
-#include <stdlib.h>
-#include <stdint.h>
-
-
-/* We can bind only a limited number of buffers */
-enum { max_buf_n = 128 };
-
-enum { max_img_n = 128};
-
-enum {max_sampler_n = 16 };
-
-struct intel_driver;
-struct intel_batchbuffer;
-
-/* Handle GPGPU state */
-struct intel_gpgpu
-{
- void* ker_opaque;
- void* printf_info;
- void* profiling_info;
- struct intel_driver *drv;
- struct intel_batchbuffer *batch;
- cl_gpgpu_kernel *ker;
- drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */
- uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
- uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer */
- uint32_t binded_n; /* number of buffers binded */
- void *kernel; /* cl_kernel with this gpgpu */
-
- unsigned long img_bitmap; /* image usage bitmap. */
- unsigned int img_index_base; /* base index for image surface.*/
-
- unsigned long sampler_bitmap; /* sampler usage bitmap. */
-
- struct { drm_intel_bo *bo; } stack_b;
- struct { drm_intel_bo *bo; } perf_b;
- struct { drm_intel_bo *bo; } scratch_b;
- struct { drm_intel_bo *bo; } constant_b;
- struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
- struct { drm_intel_bo *bo; } printf_b; /* the printf buf and index buf*/
- struct { drm_intel_bo *bo; } profiling_b; /* the buf for profiling*/
- struct { drm_intel_bo *bo; } aux_buf;
- struct {
- uint32_t surface_heap_offset;
- uint32_t curbe_offset;
- uint32_t idrt_offset;
- uint32_t sampler_state_offset;
- uint32_t sampler_border_color_state_offset;
- } aux_offset;
-
- uint32_t per_thread_scratch;
- struct {
- uint32_t num_cs_entries;
- uint32_t size_cs_entry; /* size of one entry in 512bit elements */
- } curb;
-
- uint32_t max_threads; /* max threads requested by the user */
-};
-
-struct intel_gpgpu_node {
- struct intel_gpgpu *gpgpu;
- struct intel_gpgpu_node *next;
-};
-
-
-/* Set the gpgpu related call backs */
-extern void intel_set_gpgpu_callbacks(int device_id);
-
-#endif /* __INTEL_GPGPU_H__ */
-
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
deleted file mode 100644
index c112a16..0000000
--- a/src/intel/intel_structs.h
+++ /dev/null
@@ -1,832 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/*
- * Copyright 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#ifndef __INTEL_STRUCTS_H__
-#define __INTEL_STRUCTS_H__
-
-#include <stdint.h>
-
-typedef struct gen6_interface_descriptor
-{
- struct {
- uint32_t pad6:6;
- uint32_t kernel_start_pointer:26;
- } desc0;
-
- struct {
- uint32_t pad:7;
- uint32_t software_exception:1;
- uint32_t pad2:3;
- uint32_t maskstack_exception:1;
- uint32_t pad3:1;
- uint32_t illegal_opcode_exception:1;
- uint32_t pad4:2;
- uint32_t floating_point_mode:1;
- uint32_t thread_priority:1;
- uint32_t single_program_flow:1;
- uint32_t pad5:1;
- uint32_t pad6:6;
- uint32_t pad7:6;
- } desc1;
-
- struct {
- uint32_t pad:2;
- uint32_t sampler_count:3;
- uint32_t sampler_state_pointer:27;
- } desc2;
-
- struct {
- uint32_t binding_table_entry_count:5; /* prefetch entries only */
- uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
- } desc3;
-
- struct {
- uint32_t curbe_read_offset:16; /* in GRFs */
- uint32_t curbe_read_len:16; /* in GRFs */
- } desc4;
-
- struct {
- uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */
- uint32_t barrier_return_byte:8;
- uint32_t slm_sz:5; /* 0..16 - 0K..64K */
- uint32_t barrier_enable:1;
- uint32_t rounding_mode:2;
- uint32_t barrier_return_grf_offset:8;
- } desc5;
-
- uint32_t desc6; /* unused */
- uint32_t desc7; /* unused */
-} gen6_interface_descriptor_t;
-
-typedef struct gen8_interface_descriptor
-{
- struct {
- uint32_t pad6:6;
- uint32_t kernel_start_pointer:26;
- } desc0;
- struct {
- uint32_t kernel_start_pointer_high:16;
- uint32_t pad6:16;
- } desc1;
-
- struct {
- uint32_t pad:7;
- uint32_t software_exception:1;
- uint32_t pad2:3;
- uint32_t maskstack_exception:1;
- uint32_t pad3:1;
- uint32_t illegal_opcode_exception:1;
- uint32_t pad4:2;
- uint32_t floating_point_mode:1;
- uint32_t thread_priority:1;
- uint32_t single_program_flow:1;
- uint32_t denorm_mode:1;
- uint32_t thread_preemption_disable:1;
- uint32_t pad5:11;
- } desc2;
-
- struct {
- uint32_t pad:2;
- uint32_t sampler_count:3;
- uint32_t sampler_state_pointer:27;
- } desc3;
-
- struct {
- uint32_t binding_table_entry_count:5; /* prefetch entries only */
- uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
- } desc4;
-
- struct {
- uint32_t curbe_read_offset:16; /* in GRFs */
- uint32_t curbe_read_len:16; /* in GRFs */
- } desc5;
-
- struct {
- uint32_t group_threads_num:10; /* 0..64, 0 - no barrier use */
- uint32_t pad:5;
- uint32_t global_barrier_enable:1;
- uint32_t slm_sz:5; /* 0..16 - 0K..64K */
- uint32_t barrier_enable:1;
- uint32_t rounding_mode:2;
- uint32_t barrier_return_grf_offset:8;
- } desc6;
-
- uint32_t desc7; /* unused */
-} gen8_interface_descriptor_t;
-
-typedef struct gen7_surface_state
-{
- struct {
- uint32_t cube_pos_z:1;
- uint32_t cube_neg_z:1;
- uint32_t cube_pos_y:1;
- uint32_t cube_neg_y:1;
- uint32_t cube_pos_x:1;
- uint32_t cube_neg_x:1;
- uint32_t media_boundary_pixel_mode:2;
- uint32_t render_cache_rw_mode:1;
- uint32_t pad1:1;
- uint32_t surface_array_spacing:1;
- uint32_t vertical_line_stride_offset:1;
- uint32_t vertical_line_stride:1;
- uint32_t tile_walk:1;
- uint32_t tiled_surface:1;
- uint32_t horizontal_alignment:1;
- uint32_t vertical_alignment:2;
- uint32_t surface_format:9;
- uint32_t pad0:1;
- uint32_t surface_array:1;
- uint32_t surface_type:3;
- } ss0;
-
- struct {
- uint32_t base_addr;
- } ss1;
-
- struct {
- uint32_t width:14;
- uint32_t pad1:2;
- uint32_t height:14;
- uint32_t pad0:2;
- } ss2;
-
- struct {
- uint32_t pitch:18;
- uint32_t pad0:3;
- uint32_t depth:11;
- } ss3;
-
- union {
- struct {
- uint32_t mulsample_pal_idx:3;
- uint32_t numer_mulsample:3;
- uint32_t mss_fmt:1;
- uint32_t rt_view_extent:11;
- uint32_t min_array_element:11;
- uint32_t rt_rotate:2;
- uint32_t pad0:1;
- } not_str_buf;
- } ss4;
-
- struct {
- uint32_t mip_count:4;
- uint32_t surface_min_load:4;
- uint32_t pad2:6;
- uint32_t coherence_type:1;
- uint32_t stateless_force_write_thru:1;
- uint32_t cache_control:4;
- uint32_t y_offset:4;
- uint32_t pad0:1;
- uint32_t x_offset:7;
- } ss5;
-
- uint32_t ss6; /* unused */
-
- struct {
- uint32_t min_lod:12;
- uint32_t pad0:4;
- uint32_t shader_a:3;
- uint32_t shader_b:3;
- uint32_t shader_g:3;
- uint32_t shader_r:3;
- uint32_t pad1:4;
- } ss7;
-} gen7_surface_state_t;
-
-typedef struct gen8_surface_state
-{
- struct {
- uint32_t cube_pos_z:1;
- uint32_t cube_neg_z:1;
- uint32_t cube_pos_y:1;
- uint32_t cube_neg_y:1;
- uint32_t cube_pos_x:1;
- uint32_t cube_neg_x:1;
- uint32_t media_boundary_pixel_mode:2;
- uint32_t render_cache_rw_mode:1;
- uint32_t sampler_L2_bypass_mode:1;
- uint32_t vertical_line_stride_offset:1;
- uint32_t vertical_line_stride:1;
- uint32_t tile_mode:2;
- uint32_t horizontal_alignment:2;
- uint32_t vertical_alignment:2;
- uint32_t surface_format:9;
- uint32_t pad0:1;
- uint32_t surface_array:1;
- uint32_t surface_type:3;
- } ss0;
-
- struct {
- uint32_t surface_qpitch:15;
- uint32_t pad0:3;
- uint32_t pad1:1;
- uint32_t base_mip_level:5;
- uint32_t mem_obj_ctrl_state:7;
- uint32_t pad2:1;
- } ss1;
-
- struct {
- uint32_t width:14;
- uint32_t pad1:2;
- uint32_t height:14;
- uint32_t pad0:2;
- } ss2;
-
- struct {
- uint32_t surface_pitch:18;
- uint32_t pad1:2;
- uint32_t pad0:1;
- uint32_t depth:11;
- } ss3;
-
- struct {
- union {
- struct {
- uint32_t multisample_pos_palette_idx:3;
- uint32_t multisample_num:3;
- uint32_t multisample_format:1;
- uint32_t render_target_view_ext:11;
- uint32_t min_array_elt:11;
- uint32_t render_target_and_sample_rotation:2;
- uint32_t pad1:1;
- };
-
- uint32_t pad0;
- };
- } ss4;
-
- struct {
- uint32_t mip_count:4;
- uint32_t surface_min_lod:4;
- uint32_t pad5:4;
- uint32_t pad4:2;
- uint32_t conherency_type:1;
- uint32_t pad3:3;
- uint32_t pad2:2;
- uint32_t cube_ewa:1;
- uint32_t y_offset:3;
- uint32_t pad0:1;
- uint32_t x_offset:7;
- } ss5;
-
- struct {
- union {
- union {
- struct {
- uint32_t aux_surface_mode:3;
- uint32_t aux_surface_pitch:9;
- uint32_t pad3:4;
- };
- struct {
- uint32_t uv_plane_y_offset:14;
- uint32_t pad2:2;
- };
- };
-
- struct {
- uint32_t uv_plane_x_offset:14;
- uint32_t pad1:1;
- uint32_t seperate_uv_plane_enable:1;
- };
- struct {
- uint32_t aux_sruface_qpitch:15;
- uint32_t pad0:1;
- };
- };
- } ss6;
-
- struct {
- uint32_t resource_min_lod:12;
- uint32_t pad0:4;
- uint32_t shader_channel_select_alpha:3;
- uint32_t shader_channel_select_blue:3;
- uint32_t shader_channel_select_green:3;
- uint32_t shader_channel_select_red:3;
- uint32_t alpha_clear_color:1;
- uint32_t blue_clear_color:1;
- uint32_t green_clear_color:1;
- uint32_t red_clear_color:1;
- } ss7;
-
- struct {
- uint32_t surface_base_addr_lo;
- } ss8;
-
- struct {
- uint32_t surface_base_addr_hi;
- } ss9;
-
- struct {
- uint32_t pad0:12;
- uint32_t aux_base_addr_lo:20;
- } ss10;
-
- struct {
- uint32_t aux_base_addr_hi:32;
- } ss11;
-
- struct {
- uint32_t pad0;
- } ss12;
-
- /* 13~15 have meaning only when aux surface mode == AUX_HIZ */
- struct {
- uint32_t pad0;
- } ss13;
- struct {
- uint32_t pad0;
- } ss14;
- struct {
- uint32_t pad0;
- } ss15;
-} gen8_surface_state_t;
-
-typedef struct gen7_media_surface_state
-{
- struct {
- uint32_t base_addr;
- } ss0;
-
- struct {
- uint32_t uv_offset_v_direction:2;
- uint32_t pic_struct:2;
- uint32_t width:14;
- uint32_t height:14;
- } ss1;
-
- struct {
- uint32_t tile_mode:2;
- uint32_t half_pitch_for_chroma:1;
- uint32_t surface_pitch:18;
- uint32_t pad1:1;
- uint32_t surface_object_control_state:4;
- uint32_t pad0:1;
- uint32_t interleave_chroma:1;
- uint32_t surface_format:4;
- } ss2;
-
- struct {
- uint32_t y_offset_for_u:14;
- uint32_t pad1:2;
- uint32_t x_offset_for_u:14;
- uint32_t pad0:2;
- } ss3;
-
- struct {
- uint32_t y_offset_for_v:15;
- uint32_t pad1:1;
- uint32_t x_offset_for_v:14;
- uint32_t pad0:2;
- } ss4;
-
- struct {
- uint32_t pad0;
- } ss5;
-
- struct {
- uint32_t pad0;
- } ss6;
-
- struct {
- uint32_t pad0;
- } ss7;
-} gen7_media_surface_state_t;
-
-typedef union gen_surface_state
-{
- gen7_surface_state_t gen7_surface_state;
- gen8_surface_state_t gen8_surface_state;
-} gen_surface_state_t;
-
-static const size_t surface_state_sz = sizeof(gen_surface_state_t);
-
-typedef struct gen6_vfe_state_inline
-{
- struct {
- uint32_t per_thread_scratch_space:4;
- uint32_t pad3:3;
- uint32_t extend_vfe_state_present:1;
- uint32_t pad2:2;
- uint32_t scratch_base:22;
- } vfe0;
-
- struct {
- uint32_t debug_counter_control:2;
- uint32_t gpgpu_mode:1; /* 0 for SNB!!! */
- uint32_t gateway_mmio_access:2;
- uint32_t fast_preempt:1;
- uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */
- uint32_t reset_gateway_timer:1;
- uint32_t urb_entries:8;
- uint32_t max_threads:16;
- } vfe1;
-
- struct {
- uint32_t pad8:8;
- uint32_t debug_object_id:24;
- } vfe2;
-
- struct {
- uint32_t curbe_size:16; /* in GRFs */
- uint32_t urb_size:16; /* in GRFs */
- } vfe3;
-
- struct {
- uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */
- } vfe4;
-
- struct {
- uint32_t scoreboard0_dx:4;
- uint32_t scoreboard0_dy:4;
- uint32_t scoreboard1_dx:4;
- uint32_t scoreboard1_dy:4;
- uint32_t scoreboard2_dx:4;
- uint32_t scoreboard2_dy:4;
- uint32_t scoreboard3_dx:4;
- uint32_t scoreboard3_dy:4;
- } vfe5;
-
- struct {
- uint32_t scoreboard4_dx:4;
- uint32_t scoreboard4_dy:4;
- uint32_t scoreboard5_dx:4;
- uint32_t scoreboard5_dy:4;
- uint32_t scoreboard6_dx:4;
- uint32_t scoreboard6_dy:4;
- uint32_t scoreboard7_dx:4;
- uint32_t scoreboard7_dy:4;
- } vfe6;
-} gen6_vfe_state_inline_t;
-
-typedef struct gen6_pipe_control
-{
- struct {
- uint32_t length : BITFIELD_RANGE(0, 7);
- uint32_t reserved : BITFIELD_RANGE(8, 15);
- uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23);
- uint32_t instruction_opcode : BITFIELD_RANGE(24, 26);
- uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28);
- uint32_t instruction_type : BITFIELD_RANGE(29, 31);
- } dw0;
-
- struct {
- uint32_t depth_cache_flush_enable : BITFIELD_BIT(0);
- uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1);
- uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2);
- uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3);
- uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4);
- uint32_t dc_flush_enable : BITFIELD_BIT(5);
- uint32_t protected_memory_app_id : BITFIELD_BIT(6);
- uint32_t pipe_control_flush_enable : BITFIELD_BIT(7);
- uint32_t notify_enable : BITFIELD_BIT(8);
- uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9);
- uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10);
- uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11);
- uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12);
- uint32_t depth_stall_enable : BITFIELD_BIT(13);
- uint32_t post_sync_operation : BITFIELD_RANGE(14, 15);
- uint32_t generic_media_state_clear : BITFIELD_BIT(16);
- uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17);
- uint32_t tlb_invalidate : BITFIELD_BIT(18);
- uint32_t global_snapshot_count_reset : BITFIELD_BIT(19);
- uint32_t cs_stall : BITFIELD_BIT(20);
- uint32_t store_data_index : BITFIELD_BIT(21);
- uint32_t protected_memory_enable : BITFIELD_BIT(22);
- uint32_t reserved : BITFIELD_RANGE(23, 31);
- } dw1;
-
- struct {
- uint32_t reserved : BITFIELD_RANGE(0, 1);
- uint32_t destination_address_type : BITFIELD_BIT(2);
- uint32_t address : BITFIELD_RANGE(3, 31);
- } dw2;
-
- struct {
- uint32_t data;
- } dw3;
-
- struct {
- uint32_t data;
- } dw4;
-} gen6_pipe_control_t;
-
-typedef struct gen8_pipe_control
-{
- struct {
- uint32_t length : BITFIELD_RANGE(0, 7);
- uint32_t reserved : BITFIELD_RANGE(8, 15);
- uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23);
- uint32_t instruction_opcode : BITFIELD_RANGE(24, 26);
- uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28);
- uint32_t instruction_type : BITFIELD_RANGE(29, 31);
- } dw0;
-
- struct {
- uint32_t depth_cache_flush_enable : BITFIELD_BIT(0);
- uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1);
- uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2);
- uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3);
- uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4);
- uint32_t dc_flush_enable : BITFIELD_BIT(5);
- uint32_t protected_memory_app_id : BITFIELD_BIT(6);
- uint32_t pipe_control_flush_enable : BITFIELD_BIT(7);
- uint32_t notify_enable : BITFIELD_BIT(8);
- uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9);
- uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10);
- uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11);
- uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12);
- uint32_t depth_stall_enable : BITFIELD_BIT(13);
- uint32_t post_sync_operation : BITFIELD_RANGE(14, 15);
- uint32_t generic_media_state_clear : BITFIELD_BIT(16);
- uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17);
- uint32_t tlb_invalidate : BITFIELD_BIT(18);
- uint32_t global_snapshot_count_reset : BITFIELD_BIT(19);
- uint32_t cs_stall : BITFIELD_BIT(20);
- uint32_t store_data_index : BITFIELD_BIT(21);
- uint32_t protected_memory_enable : BITFIELD_BIT(22);
- uint32_t reserved : BITFIELD_RANGE(23, 31);
- } dw1;
-
- struct {
- uint32_t reserved : BITFIELD_RANGE(0, 1);
- uint32_t destination_address_type : BITFIELD_BIT(2);
- uint32_t address : BITFIELD_RANGE(3, 31);
- } dw2;
-
- struct {
- uint32_t data;
- } dw3;
-
- struct {
- uint32_t data;
- } dw4;
-
- struct {
- uint32_t data;
- } dw5;
-} gen8_pipe_control_t;
-
-#define GEN7_NUM_VME_SEARCH_PATH_STATES 14
-#define GEN7_NUM_VME_RD_LUT_SETS 4
-
-typedef struct gen7_vme_search_path_state
-{
- struct {
- uint32_t SPD_0_X : BITFIELD_RANGE(0, 3); //search path distance
- uint32_t SPD_0_Y : BITFIELD_RANGE(4, 7);
- uint32_t SPD_1_X : BITFIELD_RANGE(8, 11);
- uint32_t SPD_1_Y : BITFIELD_RANGE(12, 15);
- uint32_t SPD_2_X : BITFIELD_RANGE(16, 19);
- uint32_t SPD_2_Y : BITFIELD_RANGE(20, 23);
- uint32_t SPD_3_X : BITFIELD_RANGE(24, 27);
- uint32_t SPD_3_Y : BITFIELD_RANGE(28, 31);
- }dw0;
-}gen7_vme_search_path_state_t;
-
-typedef struct gen7_vme_rd_lut_set
-{
- struct {
- uint32_t LUT_MbMode_0 : BITFIELD_RANGE(0, 7);
- uint32_t LUT_MbMode_1 : BITFIELD_RANGE(8, 15);
- uint32_t LUT_MbMode_2 : BITFIELD_RANGE(16, 23);
- uint32_t LUT_MbMode_3 : BITFIELD_RANGE(24, 31);
- }dw0;
-
- struct {
- uint32_t LUT_MbMode_4 : BITFIELD_RANGE(0, 7);
- uint32_t LUT_MbMode_5 : BITFIELD_RANGE(8, 15);
- uint32_t LUT_MbMode_6 : BITFIELD_RANGE(16, 23);
- uint32_t LUT_MbMode_7 : BITFIELD_RANGE(24, 31);
- }dw1;
-
- struct {
- uint32_t LUT_MV_0 : BITFIELD_RANGE(0, 7);
- uint32_t LUT_MV_1 : BITFIELD_RANGE(8, 15);
- uint32_t LUT_MV_2 : BITFIELD_RANGE(16, 23);
- uint32_t LUT_MV_3 : BITFIELD_RANGE(24, 31);
- }dw2;
-
- struct {
- uint32_t LUT_MV_4 : BITFIELD_RANGE(0, 7);
- uint32_t LUT_MV_5 : BITFIELD_RANGE(8, 15);
- uint32_t LUT_MV_6 : BITFIELD_RANGE(16, 23);
- uint32_t LUT_MV_7 : BITFIELD_RANGE(24, 31);
- }dw3;
-}gen7_vme_rd_lut_set_t;
-
-typedef struct gen7_vme_state
-{
- gen7_vme_search_path_state_t sp[GEN7_NUM_VME_SEARCH_PATH_STATES];
-
- struct {
- uint32_t LUT_MbMode_8_0 : BITFIELD_RANGE(0, 7);
- uint32_t LUT_MbMode_9_0 : BITFIELD_RANGE(8, 15);
- uint32_t LUT_MbMode_8_1 : BITFIELD_RANGE(16, 23);
- uint32_t LUT_MbMode_9_1 : BITFIELD_RANGE(24, 31);
- }dw14;
-
- struct {
- uint32_t LUT_MbMode_8_2 : BITFIELD_RANGE(0, 7);
- uint32_t LUT_MbMode_9_2 : BITFIELD_RANGE(8, 15);
- uint32_t LUT_MbMode_8_3 : BITFIELD_RANGE(16, 23);
- uint32_t LUT_MbMode_9_3 : BITFIELD_RANGE(24, 31);
- }dw15;
-
- gen7_vme_rd_lut_set_t lut[GEN7_NUM_VME_RD_LUT_SETS];
-}gen7_vme_state_t;
-
-typedef struct gen6_sampler_state
-{
- struct {
- uint32_t shadow_function:3;
- uint32_t lod_bias:11;
- uint32_t min_filter:3;
- uint32_t mag_filter:3;
- uint32_t mip_filter:2;
- uint32_t base_level:5;
- uint32_t min_mag_neq:1;
- uint32_t lod_preclamp:1;
- uint32_t default_color_mode:1;
- uint32_t pad0:1;
- uint32_t disable:1;
- } ss0;
-
- struct {
- uint32_t r_wrap_mode:3;
- uint32_t t_wrap_mode:3;
- uint32_t s_wrap_mode:3;
- uint32_t cube_control_mode:1;
- uint32_t pad:2;
- uint32_t max_lod:10;
- uint32_t min_lod:10;
- } ss1;
-
- struct {
- uint32_t pad:5;
- uint32_t default_color_pointer:27;
- } ss2;
-
- struct {
- uint32_t non_normalized_coord:1;
- uint32_t pad:12;
- uint32_t address_round:6;
- uint32_t max_aniso:3;
- uint32_t chroma_key_mode:1;
- uint32_t chroma_key_index:2;
- uint32_t chroma_key_enable:1;
- uint32_t monochrome_filter_width:3;
- uint32_t monochrome_filter_height:3;
- } ss3;
-} gen6_sampler_state_t;
-
-typedef struct gen7_sampler_border_color {
- float r,g,b,a;
-} gen7_sampler_border_color_t;
-
-typedef struct gen7_sampler_state
-{
- struct {
- uint32_t aniso_algorithm:1;
- uint32_t lod_bias:13;
- uint32_t min_filter:3;
- uint32_t mag_filter:3;
- uint32_t mip_filter:2;
- uint32_t base_level:5;
- uint32_t pad1:1;
- uint32_t lod_preclamp:1;
- uint32_t default_color_mode:1;
- uint32_t pad0:1;
- uint32_t disable:1;
- } ss0;
-
- struct {
- uint32_t cube_control_mode:1;
- uint32_t shadow_function:3;
- uint32_t pad:4;
- uint32_t max_lod:12;
- uint32_t min_lod:12;
- } ss1;
-
- struct {
- uint32_t pad:5;
- uint32_t default_color_pointer:27;
- } ss2;
-
- struct {
- uint32_t r_wrap_mode:3;
- uint32_t t_wrap_mode:3;
- uint32_t s_wrap_mode:3;
- uint32_t pad:1;
- uint32_t non_normalized_coord:1;
- uint32_t trilinear_quality:2;
- uint32_t address_round:6;
- uint32_t max_aniso:3;
- uint32_t chroma_key_mode:1;
- uint32_t chroma_key_index:2;
- uint32_t chroma_key_enable:1;
- uint32_t pad0:6;
- } ss3;
-} gen7_sampler_state_t;
-
-STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t));
-
-typedef struct gen8_sampler_state
-{
- struct {
- uint32_t aniso_algorithm:1;
- uint32_t lod_bias:13;
- uint32_t min_filter:3;
- uint32_t mag_filter:3;
- uint32_t mip_filter:2;
- uint32_t base_level:5;
- uint32_t lod_preclamp:2;
- uint32_t default_color_mode:1;
- uint32_t pad0:1;
- uint32_t disable:1;
- } ss0;
-
- struct {
- uint32_t cube_control_mode:1;
- uint32_t shadow_function:3;
- uint32_t chromakey_mode:1;
- uint32_t chromakey_index:2;
- uint32_t chromakey_enable:1;
- uint32_t max_lod:12;
- uint32_t min_lod:12;
- } ss1;
-
- struct {
- uint32_t lod_clamp_mag_mode:1;
- uint32_t flexible_filter_valign:1;
- uint32_t flexible_filter_halign:1;
- uint32_t flexible_filter_coeff_size:1;
- uint32_t flexible_filter_mode:1;
- uint32_t pad1:1;
- uint32_t indirect_state_ptr:18;
- uint32_t pad0:2;
- uint32_t sep_filter_height:2;
- uint32_t sep_filter_width:2;
- uint32_t sep_filter_coeff_table_size:2;
- } ss2;
-
- struct {
- uint32_t r_wrap_mode:3;
- uint32_t t_wrap_mode:3;
- uint32_t s_wrap_mode:3;
- uint32_t pad:1;
- uint32_t non_normalized_coord:1;
- uint32_t trilinear_quality:2;
- uint32_t address_round:6;
- uint32_t max_aniso:3;
- uint32_t pad0:2;
- uint32_t non_sep_filter_footprint_mask:8;
- } ss3;
-} gen8_sampler_state_t;
-
-STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen8_sampler_state_t));
-
-#undef BITFIELD_BIT
-#undef BITFIELD_RANGE
-
-#endif /* __INTEL_STRUCTS_H__ */
-
--
2.7.4
More information about the Beignet
mailing list