[PATCH] alignment
Matthew Auld
matthew.auld at intel.com
Thu Oct 31 15:12:33 UTC 2019
---
.../gpu/drm/i915/gem/selftests/huge_pages.c | 177 ++++++++++++++++++
drivers/gpu/drm/i915/i915_pci.c | 3 +-
2 files changed, 179 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 688c49a24f32..ed03e95a1a84 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -4,15 +4,19 @@
* Copyright © 2017 Intel Corporation
*/
+#include <linux/sort.h>
#include <linux/prime_numbers.h>
#include "i915_selftest.h"
+#include "gem/i915_gem_object_blt.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_pm.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_rps.h"
#include "igt_gem_utils.h"
#include "mock_context.h"
@@ -1852,6 +1856,178 @@ static int igt_shrink_thp(void *arg)
return err;
}
+static void perf_begin(struct intel_gt *gt)
+{
+ intel_gt_pm_get(gt);
+
+ /* Boost gpufreq to max [waitboost] and keep it fixed */
+ atomic_inc(>->rps.num_waiters);
+ schedule_work(>->rps.work);
+ flush_work(>->rps.work);
+}
+
+static void perf_end(struct intel_gt *gt)
+{
+ atomic_dec(>->rps.num_waiters);
+ intel_gt_pm_put(gt);
+}
+
+static int wrap_ktime_compare(const void *A, const void *B)
+{
+ const ktime_t *a = A, *b = B;
+
+ return ktime_compare(*a, *b);
+}
+
+static int perf_measure_blt(struct intel_context *ce,
+ struct drm_i915_gem_object *obj,
+ u64 *bw)
+{
+ ktime_t t[5];
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(t); ++i) {
+ ktime_t t0, t1;
+
+ t0 = ktime_get();
+
+ err = i915_gem_object_fill_blt(obj, ce, 0);
+ if (err)
+ return err;
+
+ err = i915_gem_object_wait(obj,
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ return err;
+
+ t1 = ktime_get();
+ t[i] = ktime_sub(t1, t0);
+ }
+
+ sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
+ *bw = div64_u64(mul_u32_u32(4 * obj->base.size,
+ 1000 * 1000 * 1000),
+ t[1] + 2 * t[2] + t[3]) >> 20;
+
+ return 0;
+}
+
+static int perf_pathological_alignment(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *i915 = ctx->i915;
+ struct intel_gt *gt = &i915->gt;
+ struct intel_context *ce = i915->engine[BCS0]->kernel_context;
+ struct drm_i915_gem_object *obj;
+ const unsigned int n_pages = 64;
+ struct scatterlist *sg;
+ struct sg_table *old_st;
+ struct sg_table *st;
+ unsigned long i;
+ int err;
+ u64 bw;
+
+ if (!HAS_PAGE_SIZES(i915, SZ_2M))
+ return 0;
+
+ obj = huge_pages_object(i915, n_pages * SZ_2M, SZ_4K);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out_put;
+
+ GEM_BUG_ON(obj->mm.page_sizes.sg != SZ_4K);
+
+ perf_begin(gt);
+
+ err = perf_measure_blt(ce, obj, &bw);
+ if (err)
+ goto out_unpin;
+
+ pr_info("%s 4K: %zd KiB fill: %lld MiB/s\n",
+ __func__, obj->base.size >> 10, bw);
+
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+
+ obj = huge_pages_object(i915, n_pages * SZ_2M, SZ_2M);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out_put;
+
+ GEM_BUG_ON(obj->mm.page_sizes.sg < SZ_2M);
+
+ /* Eliminate huge-GTT-pages from the equation */
+ obj->mm.page_sizes.sg = SZ_4K;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st) {
+ err = -ENOMEM;
+ goto out_unpin;
+ }
+
+ if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP_KERNEL)) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ sg = st->sgl;
+ st->nents = 0;
+
+ for (i = 0; i < obj->base.size >> PAGE_SHIFT; ++i) {
+ unsigned long idx = (SZ_2M * (i % n_pages)) >> PAGE_SHIFT;
+ dma_addr_t daddr = i915_gem_object_get_dma_address(obj, idx);
+
+ if (!IS_ALIGNED(daddr, SZ_2M)) {
+ pr_info("%s dma-mapper screwed us over, skipping\n",
+ __func__);
+ goto out_sg_free;
+ }
+
+ sg_dma_address(sg) = daddr;
+ sg_dma_len(sg) = PAGE_SIZE;
+
+ sg->length = PAGE_SIZE;
+
+ st->nents++;
+
+ sg = __sg_next(sg);
+ }
+
+ old_st = obj->mm.pages;
+ obj->mm.pages = st;
+
+ err = perf_measure_blt(ce, obj, &bw);
+ i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ if (err)
+ goto out_reset;
+
+ pr_info("%s 2M: %zd KiB fill: %lld MiB/s\n",
+ __func__, obj->base.size >> 10, bw);
+out_reset:
+ obj->mm.pages = old_st;
+out_sg_free:
+ sg_free_table(st);
+out_free:
+ kfree(st);
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+out_put:
+ i915_gem_object_put(obj);
+ perf_end(gt);
+
+ if (err == -ENOMEM)
+ err = 0;
+
+ return err;
+}
+
int i915_gem_huge_page_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
@@ -1911,6 +2087,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_exhaust_huge),
SUBTEST(igt_ppgtt_smoke_huge),
SUBTEST(igt_ppgtt_sanity_check),
+ SUBTEST(perf_pathological_alignment),
};
struct drm_file *file;
struct i915_gem_context *ctx;
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 1bb701d32a5d..87e9e3887da3 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -603,7 +603,8 @@ static const struct intel_device_info intel_cherryview_info = {
#define GEN9_DEFAULT_PAGE_SIZES \
.page_sizes = I915_GTT_PAGE_SIZE_4K | \
- I915_GTT_PAGE_SIZE_64K
+ I915_GTT_PAGE_SIZE_64K | \
+ I915_GTT_PAGE_SIZE_2M
#define GEN9_FEATURES \
GEN8_FEATURES, \
--
2.20.1
More information about the Intel-gfx-trybot
mailing list