[RFC PATCH 2/5] drm, drm/i915: Move the memcpy_from_wc functionality to core drm
Thomas Hellström
thomas.hellstrom at linux.intel.com
Thu May 20 15:09:44 UTC 2021
Memcpy from wc will be used as well by TTM memcpy.
Move it to core drm, and make the interface do the right thing
even on !X86.
Cc: Christian König <christian.koenig at amd.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Dave Airlie <airlied at gmail.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
drivers/gpu/drm/Makefile | 2 +-
drivers/gpu/drm/drm_drv.c | 2 +
.../drm/{i915/i915_memcpy.c => drm_memcpy.c} | 31 +++++++-------
drivers/gpu/drm/i915/Makefile | 1 -
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +-
drivers/gpu/drm/i915/gem/i915_gem_object.c | 5 ++-
drivers/gpu/drm/i915/gt/selftest_reset.c | 7 ++--
drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 11 ++---
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_drv.c | 2 -
drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++--
drivers/gpu/drm/i915/i915_memcpy.h | 34 ---------------
.../drm/i915/selftests/intel_memory_region.c | 7 ++--
include/drm/drm_memcpy.h | 41 +++++++++++++++++++
14 files changed, 83 insertions(+), 76 deletions(-)
rename drivers/gpu/drm/{i915/i915_memcpy.c => drm_memcpy.c} (84%)
delete mode 100644 drivers/gpu/drm/i915/i915_memcpy.h
create mode 100644 include/drm/drm_memcpy.h
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a91cc7684904..f3ab8586c3d7 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -18,7 +18,7 @@ drm-y := drm_aperture.o drm_auth.o drm_cache.o \
drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
- drm_managed.o drm_vblank_work.o
+ drm_managed.o drm_vblank_work.o drm_memcpy.o \
drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o drm_dma.o \
drm_legacy_misc.o drm_lock.o drm_memory.o drm_scatter.o \
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3d8d68a98b95..351cc2900cf1 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -40,6 +40,7 @@
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <drm/drm_managed.h>
+#include <drm/drm_memcpy.h>
#include <drm/drm_mode_object.h>
#include <drm/drm_print.h>
@@ -1041,6 +1042,7 @@ static int __init drm_core_init(void)
drm_connector_ida_init();
idr_init(&drm_minors_idr);
+ drm_memcpy_init_early();
ret = drm_sysfs_init();
if (ret < 0) {
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/drm_memcpy.c
similarity index 84%
rename from drivers/gpu/drm/i915/i915_memcpy.c
rename to drivers/gpu/drm/drm_memcpy.c
index 1b021a4902de..03688425a096 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/drm_memcpy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright © 2016 Intel Corporation
*
@@ -22,16 +23,11 @@
*
*/
+#ifdef CONFIG_X86
#include <linux/kernel.h>
#include <asm/fpu/api.h>
-#include "i915_memcpy.h"
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-#define CI_BUG_ON(expr) BUG_ON(expr)
-#else
-#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
-#endif
+#include "drm/drm_memcpy.h"
static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
@@ -94,23 +90,23 @@ static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
}
/**
- * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * drm_memcpy_from_wc: perform an accelerated *aligned* read from WC
* @dst: destination pointer
* @src: source pointer
* @len: how many bytes to copy
*
- * i915_memcpy_from_wc copies @len bytes from @src to @dst using
+ * drm_memcpy_from_wc copies @len bytes from @src to @dst using
* non-temporal instructions where available. Note that all arguments
* (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
* of 16.
*
* To test whether accelerated reads from WC are supported, use
- * i915_memcpy_from_wc(NULL, NULL, 0);
+ * drm_memcpy_from_wc(NULL, NULL, 0);
*
* Returns true if the copy was successful, false if the preconditions
* are not met.
*/
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
{
if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
return false;
@@ -123,24 +119,23 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
return false;
}
+EXPORT_SYMBOL(drm_memcpy_from_wc);
/**
- * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
+ * drm_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
* @dst: destination pointer
* @src: source pointer
* @len: how many bytes to copy
*
- * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
+ * Like drm_memcpy_from_wc(), the unaligned variant copies @len bytes from
* @src to @dst using * non-temporal instructions where available, but
* accepts that its arguments may not be aligned, but are valid for the
* potential 16-byte read past the end.
*/
-void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
{
unsigned long addr;
- CI_BUG_ON(!i915_has_memcpy_from_wc());
-
addr = (unsigned long)src;
if (!IS_ALIGNED(addr, 16)) {
unsigned long x = min(ALIGN(addr, 16) - addr, len);
@@ -155,8 +150,9 @@ void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len
if (likely(len))
__memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
}
+EXPORT_SYMBOL(drm_unaligned_memcpy_from_wc);
-void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
+void drm_memcpy_init_early(void)
{
/*
* Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
@@ -166,3 +162,4 @@ void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
!boot_cpu_has(X86_FEATURE_HYPERVISOR))
static_branch_enable(&has_movntdqa);
}
+#endif
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cb8823570996..998606b7f49f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -61,7 +61,6 @@ i915-y += i915_drv.o \
# core library code
i915-y += \
dma_resv_utils.o \
- i915_memcpy.o \
i915_mm.o \
i915_sw_fence.o \
i915_sw_fence_work.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 297143511f99..77285e421fb8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -10,6 +10,7 @@
#include <linux/uaccess.h>
#include <drm/drm_syncobj.h>
+#include <drm/drm_memcpy.h>
#include "display/intel_frontbuffer.h"
@@ -28,7 +29,6 @@
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "i915_memcpy.h"
struct eb_vma {
struct i915_vma *vma;
@@ -2503,7 +2503,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
!(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
pw->batch_map = ERR_PTR(-ENODEV);
- if (needs_clflush && i915_has_memcpy_from_wc())
+ if (needs_clflush && drm_has_memcpy_from_wc())
pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
if (IS_ERR(pw->batch_map)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c8953e3f5c70..f1eb857fa172 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -24,6 +24,8 @@
#include <linux/sched/mm.h>
+#include <drm/drm_memcpy.h>
+
#include "display/intel_frontbuffer.h"
#include "i915_drv.h"
#include "i915_gem_clflush.h"
@@ -31,7 +33,6 @@
#include "i915_gem_mman.h"
#include "i915_gem_object.h"
#include "i915_globals.h"
-#include "i915_memcpy.h"
#include "i915_trace.h"
static struct i915_global_object {
@@ -374,7 +375,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
PAGE_SIZE);
src_ptr = src_map + offset_in_page(offset);
- if (!i915_memcpy_from_wc(dst, (void __force *)src_ptr, size))
+ if (!drm_memcpy_from_wc(dst, (void __force *)src_ptr, size))
memcpy_fromio(dst, src_ptr, size);
io_mapping_unmap(src_map);
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 8784257ec808..92ada67a3835 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -5,9 +5,10 @@
#include <linux/crc32.h>
+#include <drm/drm_memcpy.h>
+
#include "gem/i915_gem_stolen.h"
-#include "i915_memcpy.h"
#include "i915_selftest.h"
#include "intel_gpu_commands.h"
#include "selftests/igt_reset.h"
@@ -99,7 +100,7 @@ __igt_reset_stolen(struct intel_gt *gt,
memset_io(s, STACK_MAGIC, PAGE_SIZE);
in = (void __force *)s;
- if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+ if (drm_memcpy_from_wc(tmp, in, PAGE_SIZE))
in = tmp;
crc[page] = crc32_le(0, in, PAGE_SIZE);
@@ -135,7 +136,7 @@ __igt_reset_stolen(struct intel_gt *gt,
PAGE_SIZE);
in = (void __force *)s;
- if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+ if (drm_memcpy_from_wc(tmp, in, PAGE_SIZE))
in = tmp;
x = crc32_le(0, in, PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index c36d5eb5bbb9..f045e42be6ca 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -5,9 +5,10 @@
#include <linux/debugfs.h>
+#include <drm/drm_memcpy.h>
+
#include "gt/intel_gt.h"
#include "i915_drv.h"
-#include "i915_memcpy.h"
#include "intel_guc_log.h"
static void guc_log_capture_logs(struct intel_guc_log *log);
@@ -295,13 +296,13 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
/* Just copy the newly written data */
if (read_offset > write_offset) {
- i915_memcpy_from_wc(dst_data, src_data, write_offset);
+ drm_memcpy_from_wc(dst_data, src_data, write_offset);
bytes_to_copy = buffer_size - read_offset;
} else {
bytes_to_copy = write_offset - read_offset;
}
- i915_memcpy_from_wc(dst_data + read_offset,
- src_data + read_offset, bytes_to_copy);
+ drm_memcpy_from_wc(dst_data + read_offset,
+ src_data + read_offset, bytes_to_copy);
src_data += buffer_size;
dst_data += buffer_size;
@@ -569,7 +570,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
* it should be present on the chipsets supporting GuC based
* submisssions.
*/
- if (!i915_has_memcpy_from_wc()) {
+ if (!drm_has_memcpy_from_wc()) {
ret = -ENXIO;
goto out_unlock;
}
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5b4b2bd46e7c..98653f1a2b1d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -24,12 +24,12 @@
* Brad Volkin <bradley.d.volkin at intel.com>
*
*/
+#include <drm/drm_memcpy.h>
#include "gt/intel_engine.h"
#include "gt/intel_gpu_commands.h"
#include "i915_drv.h"
-#include "i915_memcpy.h"
/**
* DOC: batch buffer command parser
@@ -1152,7 +1152,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
if (src) {
GEM_BUG_ON(!needs_clflush);
- i915_unaligned_memcpy_from_wc(dst, src + offset, length);
+ drm_unaligned_memcpy_from_wc(dst, src + offset, length);
} else {
struct scatterlist *sg;
void *ptr;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 122dd297b6af..0df9dd62c717 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -72,7 +72,6 @@
#include "i915_drv.h"
#include "i915_ioc32.h"
#include "i915_irq.h"
-#include "i915_memcpy.h"
#include "i915_perf.h"
#include "i915_query.h"
#include "i915_suspend.h"
@@ -325,7 +324,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
mutex_init(&dev_priv->pps_mutex);
mutex_init(&dev_priv->hdcp_comp_mutex);
- i915_memcpy_init_early(dev_priv);
intel_runtime_pm_init_early(&dev_priv->runtime_pm);
ret = i915_workqueues_init(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 99ca242ec13b..ee11920fbea5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -34,6 +34,7 @@
#include <linux/utsname.h>
#include <linux/zlib.h>
+#include <drm/drm_memcpy.h>
#include <drm/drm_print.h>
#include "display/intel_csr.h"
@@ -46,7 +47,6 @@
#include "i915_drv.h"
#include "i915_gpu_error.h"
-#include "i915_memcpy.h"
#include "i915_scatterlist.h"
#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -255,7 +255,7 @@ static bool compress_init(struct i915_vma_compress *c)
}
c->tmp = NULL;
- if (i915_has_memcpy_from_wc())
+ if (drm_has_memcpy_from_wc())
c->tmp = pool_alloc(&c->pool, ALLOW_FAIL);
return true;
@@ -295,7 +295,7 @@ static int compress_page(struct i915_vma_compress *c,
struct z_stream_s *zstream = &c->zstream;
zstream->next_in = src;
- if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+ if (wc && c->tmp && drm_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
zstream->next_in = c->tmp;
zstream->avail_in = PAGE_SIZE;
@@ -395,7 +395,7 @@ static int compress_page(struct i915_vma_compress *c,
if (!ptr)
return -ENOMEM;
- if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
+ if (!(wc && drm_memcpy_from_wc(ptr, src, PAGE_SIZE)))
memcpy(ptr, src, PAGE_SIZE);
dst->pages[dst->page_count++] = ptr;
cond_resched();
diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h
deleted file mode 100644
index 3df063a3293b..000000000000
--- a/drivers/gpu/drm/i915/i915_memcpy.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __I915_MEMCPY_H__
-#define __I915_MEMCPY_H__
-
-#include <linux/types.h>
-
-struct drm_i915_private;
-
-void i915_memcpy_init_early(struct drm_i915_private *i915);
-
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-
-/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
- * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
- * perform the operation. To check beforehand, pass in the parameters to
- * to i915_can_memcpy_from_wc() - since we only care about the low 4 bits,
- * you only need to pass in the minor offsets, page-aligned pointers are
- * always valid.
- *
- * For just checking for SSE4.1, in the foreknowledge that the future use
- * will be correctly aligned, just use i915_has_memcpy_from_wc().
- */
-#define i915_can_memcpy_from_wc(dst, src, len) \
- i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
-
-#define i915_has_memcpy_from_wc() \
- i915_memcpy_from_wc(NULL, NULL, 0)
-
-#endif /* __I915_MEMCPY_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index c85d516b85cd..6bb399e9be78 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -6,6 +6,8 @@
#include <linux/prime_numbers.h>
#include <linux/sort.h>
+#include <drm/drm_memcpy.h>
+
#include "../i915_selftest.h"
#include "mock_drm.h"
@@ -20,7 +22,6 @@
#include "gem/selftests/mock_context.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
-#include "i915_memcpy.h"
#include "selftests/igt_flush_test.h"
#include "selftests/i915_random.h"
@@ -901,7 +902,7 @@ static inline void igt_memcpy(void *dst, const void *src, size_t size)
static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
{
- i915_memcpy_from_wc(dst, src, size);
+ drm_memcpy_from_wc(dst, src, size);
}
static int _perf_memcpy(struct intel_memory_region *src_mr,
@@ -925,7 +926,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr,
{
"memcpy_from_wc",
igt_memcpy_from_wc,
- !i915_has_memcpy_from_wc(),
+ !drm_has_memcpy_from_wc(),
},
};
struct drm_i915_gem_object *src, *dst;
diff --git a/include/drm/drm_memcpy.h b/include/drm/drm_memcpy.h
new file mode 100644
index 000000000000..dbf52cd43382
--- /dev/null
+++ b/include/drm/drm_memcpy.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __DRM_MEMCPY_H__
+#define __DRM_MEMCPY_H__
+
+#include <linux/types.h>
+
+#ifdef CONFIG_X86
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
+/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
+ * as well as SSE4.1 support. drm_memcpy_from_wc() will report if it cannot
+ * perform the operation. To check beforehand, pass in the parameters to
+ * drm_can_memcpy_from_wc() - since we only care about the low 4 bits,
+ * you only need to pass in the minor offsets, page-aligned pointers are
+ * always valid.
+ *
+ * For just checking for SSE4.1, in the foreknowledge that the future use
+ * will be correctly aligned, just use drm_has_memcpy_from_wc().
+ */
+#define drm_can_memcpy_from_wc(dst, src, len) \
+ drm_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
+
+#define drm_has_memcpy_from_wc() \
+ drm_memcpy_from_wc(NULL, NULL, 0)
+
+void drm_memcpy_init_early(void);
+
+#else
+
+#define drm_memcpy_from_wc(_dst, _src, _len) (false)
+#define drm_can_memcpy_from_wc(_dst, _src, _len) (false)
+#define drm_has_memcpy_from_wc() (false)
+#define drm_unaligned_memcpy_from_wc(_dst, _src, _len) WARN_ON(1)
+#define drm_memcpy_init_early() do {} while (0)
+#endif /* CONFIG_X86 */
+#endif /* __DRM_MEMCPY_H__ */
--
2.31.1
More information about the dri-devel
mailing list