[Intel-gfx] [PATCH 3/6] drm/i915: Use WC copies on !llc platforms for the command parser
Chris Wilson
chris at chris-wilson.co.uk
Thu Oct 1 04:57:12 PDT 2015
Since we blow the TLB caches by using kmap/kunmap, we may as well go the
whole hog and see if declaring our destination page as WC is faster than
keeping it as WB and using clflush. It should be!
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_cmd_parser.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 0e826bec7942..f4d4c3132932 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -984,9 +984,10 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
const struct drm_i915_cmd_descriptor *desc = &default_desc;
u32 last_cmd_header = 0;
unsigned dst_iter, src_iter;
- int needs_clflush = 0;
struct get_page rewind;
void *src, *dst;
+ int src_needs_clflush = 0;
+ bool dst_needs_clflush;
unsigned in, out;
u32 *buf, partial = 0, length = 1;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
@@ -999,13 +1000,19 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
if (WARN_ON(shadow_batch_obj->pages_pin_count == 0))
return -ENODEV;
- ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush);
+ ret = i915_gem_obj_prepare_shmem_read(batch_obj, &src_needs_clflush);
if (ret) {
DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
return ret;
}
- ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true);
+ dst_needs_clflush =
+ shadow_batch_obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+ !INTEL_INFO(shadow_batch_obj->base.dev)->has_llc;
+ if (dst_needs_clflush)
+ ret = i915_gem_object_set_to_gtt_domain(shadow_batch_obj, true);
+ else
+ ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true);
if (ret) {
DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
goto unpin;
@@ -1035,7 +1042,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
this = PAGE_SIZE - in;
src = kmap_atomic(i915_gem_object_get_page(batch_obj, src_iter));
- if (needs_clflush)
+ if (src_needs_clflush)
drm_clflush_virt_range(src + in, this);
i = this;
@@ -1054,10 +1061,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
k = i;
if (k > PAGE_SIZE - out)
k = PAGE_SIZE - out;
- if (k == PAGE_SIZE)
+ if (k == PAGE_SIZE) {
copy_page(dst, src);
- else
+ } else {
+ /* Partial cache lines need clflushing */
+ if (dst_needs_clflush &&
+ (out | k) & (boot_cpu_data.x86_clflush_size - 1))
+ drm_clflush_virt_range(dst + out, k);
memcpy(dst + out, src + j, k);
+ }
+ if (dst_needs_clflush)
+ drm_clflush_virt_range(dst + out, k);
out += k;
j += k;
--
2.6.0
More information about the Intel-gfx
mailing list