[Intel-gfx] [PATCH 11/18] drm/i915: Integration of GuC client
yu.dai at intel.com
yu.dai at intel.com
Thu Mar 26 12:41:18 PDT 2015
From: Alex Dai <yu.dai at intel.com>
Implementation of GuC client. A GuC client has its own doorbell
and workqueue. It maintains the doorbell cache line, process
description object and work queue item.
A default guc_client is created to do the in-order legacy execlist
submission.
Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai at intel.com>
---
drivers/gpu/drm/i915/Makefile | 3 +-
drivers/gpu/drm/i915/i915_drv.h | 5 +
drivers/gpu/drm/i915/intel_guc.h | 52 ++++
drivers/gpu/drm/i915/intel_guc_client.c | 399 +++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_guc_loader.c | 14 +-
drivers/gpu/drm/i915/intel_guc_scheduler.c | 29 ++-
6 files changed, 492 insertions(+), 10 deletions(-)
create mode 100644 drivers/gpu/drm/i915/intel_guc_client.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 50b2057..0407720 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,7 +42,8 @@ i915-y += i915_cmd_parser.o \
# ancilliary microcontroller support
i915-y += intel_uc_loader.o \
- intel_guc_loader.o
+ intel_guc_loader.o \
+ intel_guc_client.o
# autogenerated null render state
i915-y += intel_renderstate_gen6.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 72cc12a..ba5da95 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1847,6 +1847,11 @@ static inline struct drm_i915_private *dev_to_i915(struct device *dev)
return to_i915(dev_get_drvdata(dev));
}
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+ return container_of(guc, struct drm_i915_private, guc);
+}
+
/* Iterate over initialised rings */
#define for_each_ring(ring__, dev_priv__, i__) \
for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 7eabadf..09508e0 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -27,11 +27,38 @@
#include "intel_guc_api.h"
#include "intel_uc_loader.h"
+#define GUC_DB_SIZE PAGE_SIZE
+#define GUC_WQ_SIZE (PAGE_SIZE * 2)
+
+struct i915_guc_client {
+ struct drm_i915_gem_object *client_obj;
+ u32 priority;
+ off_t doorbell_offset;
+ off_t proc_desc_offset;
+ off_t wq_offset;
+ uint16_t doorbell_id;
+ uint32_t ctx_index;
+ uint32_t wq_size;
+};
+
+#define I915_MAX_DOORBELLS 256
+#define INVALID_DOORBELL_ID I915_MAX_DOORBELLS
+
+#define INVALID_CTX_ID (MAX_GUC_GPU_CONTEXTS+1)
+
struct intel_guc {
struct intel_uc_fw guc_fw;
/* GuC-specific additions */
+ spinlock_t host2guc_lock;
+
struct drm_i915_gem_object *ctx_pool_obj;
+
+ struct i915_guc_client *execbuf_client;
+
+ struct ida ctx_ids;
+ int db_cacheline;
+ DECLARE_BITMAP(doorbell_bitmap, I915_MAX_DOORBELLS);
};
#define GUC_STATUS 0xc000
@@ -91,6 +118,22 @@ struct intel_guc {
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | \
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA)
+#define HOST2GUC_INTERRUPT 0xc4c8
+#define HOST2GUC_TRIGGER (1<<0)
+
+#define DRBMISC1 0x1984
+#define DOORBELL_ENABLE (1<<0)
+
+#define GEN8_DRBREGL(x) (0x1000 + (x) * 8)
+#define GEN8_DRB_VALID (1<<0)
+#define GEN8_DRBREGU(x) (0x1000 + (x) * 8 + 4)
+
+#define GEN8_GT_PM_CONFIG 0x138140
+#define GEN8_GT_DOORBELL_ENABLE (1<<0)
+
+#define GEN8_GTCR 0x4274
+#define GEN8_GTCR_INVALIDATE (1<<0)
+
/* intel_guc_loader.c */
extern int intel_guc_load_ucode(struct drm_device *dev, bool wait);
extern void intel_guc_ucode_fini(struct drm_device *dev);
@@ -106,4 +149,13 @@ int guc_scheduler_enable(struct drm_device *dev);
void guc_scheduler_disable(struct drm_device *dev);
bool sanitize_enable_guc_scheduling(struct drm_device *dev);
+/* intel_guc_client.c */
+struct i915_guc_client*
+i915_guc_client_alloc(struct drm_device *dev, u32 priority);
+void i915_guc_client_free(struct drm_device *dev,
+ struct i915_guc_client *client);
+int i915_guc_client_submit(struct i915_guc_client *client,
+ struct intel_context *ctx,
+ struct intel_engine_cs *ring);
+
#endif
diff --git a/drivers/gpu/drm/i915/intel_guc_client.c b/drivers/gpu/drm/i915/intel_guc_client.c
new file mode 100644
index 0000000..f7672ff
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_client.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "i915_drv.h"
+#include "intel_guc.h"
+
+/*
+ * Read GuC command/status register (SOFT_SCRATCH_0)
+ * Return true if it contains a response rather than a command
+ */
+static inline bool i915_guc_get_response(struct drm_i915_private *dev_priv,
+ u32 *status)
+{
+ u32 val = I915_READ(SOFT_SCRATCH(0));
+ *status = val;
+ return GUC2HOST_IS_RESPONSE(val);
+}
+
+static int intel_guc_action(struct intel_guc *guc, u32 *data, u32 len)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ u32 status;
+ int i;
+ int ret;
+
+ if (WARN_ON(len < 1 || len > 15))
+ return -EINVAL;
+
+ spin_lock(&dev_priv->guc.host2guc_lock);
+
+ for (i = 0; i < len; i++)
+ I915_WRITE(SOFT_SCRATCH(i), data[i]);
+
+ POSTING_READ(SOFT_SCRATCH(i - 1));
+
+ I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER);
+
+ ret = wait_for_atomic(i915_guc_get_response(dev_priv, &status), 10);
+ if (status != GUC2HOST_STATUS_SUCCESS) {
+ /* either GuC doesn't response, which is a TIMEOUT,
+ * or a failure code is returned. */
+ if (ret != -ETIMEDOUT)
+ ret = -EIO;
+
+ DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d "
+ "status=0x%08X response=0x%08X\n",
+ data[0], ret, status,
+ I915_READ(SOFT_SCRATCH(15)));
+ }
+
+ spin_unlock(&dev_priv->guc.host2guc_lock);
+
+ return ret;
+}
+
+static void guc_release_ctx_desc(struct intel_guc *guc, uint32_t id)
+{
+ spin_lock(&guc->host2guc_lock);
+ ida_simple_remove(&guc->ctx_ids, id);
+ spin_unlock(&guc->host2guc_lock);
+}
+
+static uint32_t guc_assign_ctx_desc(struct intel_guc *guc)
+{
+ int id;
+
+ spin_lock(&guc->host2guc_lock);
+ id = ida_simple_get(&guc->ctx_ids, 0,
+ MAX_GUC_GPU_CONTEXTS, GFP_KERNEL);
+ spin_unlock(&guc->host2guc_lock);
+
+ return (id < 0) ? INVALID_CTX_ID : (uint32_t)id;
+}
+
+static void guc_release_doorbell(struct intel_guc *guc, uint16_t id)
+{
+ spin_lock(&guc->host2guc_lock);
+ bitmap_clear(guc->doorbell_bitmap, id, 1);
+ spin_unlock(&guc->host2guc_lock);
+}
+
+static uint16_t guc_assign_doorbell(struct intel_guc *guc, u32 priority)
+{
+ const uint16_t size = I915_MAX_DOORBELLS;
+ uint16_t id;
+
+ spin_lock(&guc->host2guc_lock);
+
+ /* The bitmap is split into two halves - high and normal priority. */
+ if (priority <= GUC_CTX_PRIORITY_HIGH) {
+ id = find_next_zero_bit(guc->doorbell_bitmap, size, size / 2);
+ if (id == size)
+ id = INVALID_DOORBELL_ID;
+ } else {
+ id = find_next_zero_bit(guc->doorbell_bitmap, size / 2, 0);
+ if (id == size / 2)
+ id = INVALID_DOORBELL_ID;
+ }
+
+ if (id != INVALID_DOORBELL_ID)
+ bitmap_set(guc->doorbell_bitmap, id, 1);
+
+ spin_unlock(&guc->host2guc_lock);
+
+ return id;
+}
+
+static off_t guc_select_doorbell_cacheline(struct intel_guc *guc)
+{
+ const int cacheline_size = boot_cpu_data.x86_clflush_size;
+ const int cacheline_per_page = PAGE_SIZE / cacheline_size;
+ off_t offset;
+
+ spin_lock(&guc->host2guc_lock);
+
+ /* Doorbell uses single cache line */
+ offset = cacheline_size * guc->db_cacheline;
+
+ /* Moving to next cache line to reduce contention */
+ guc->db_cacheline = (guc->db_cacheline + 1) % cacheline_per_page;
+
+ spin_unlock(&guc->host2guc_lock);
+
+ return offset;
+}
+
+static void init_ctx_desc(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ struct guc_context_desc desc;
+ struct sg_table *sg;
+
+ memset(&desc, 0, sizeof(desc));
+
+ desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
+ desc.context_id = client->ctx_index;
+ desc.priority = client->priority;
+ desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) |
+ (1 << VECS) | (1 << VCS2); /* all engines */
+ desc.db_id = client->doorbell_id;
+
+ /*
+ * The CPU address is only needed at certain points, so kmap_atomic on
+ * demand instead of storing it in the ctx descriptor.
+ * XXX: May make debug easier to have it mapped
+ */
+ desc.db_trigger_cpu = 0;
+ desc.db_trigger_uk = client->doorbell_offset +
+ i915_gem_obj_ggtt_offset(client->client_obj);
+ desc.db_trigger_phy = client->doorbell_offset +
+ sg_dma_address(client->client_obj->pages->sgl);
+
+ desc.process_desc = client->proc_desc_offset +
+ i915_gem_obj_ggtt_offset(client->client_obj);
+
+ desc.wq_addr = client->wq_offset +
+ i915_gem_obj_ggtt_offset(client->client_obj);
+
+ desc.wq_size = client->wq_size;
+
+ /*
+ * XXX: Take LRCs from an existing intel_context if this is not an
+ * IsKMDCreatedContext client
+ */
+ desc.desc_private = (uintptr_t)client;
+
+ /* Pool context is pinned already */
+ sg = guc->ctx_pool_obj->pages;
+ sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
+ sizeof(desc) * client->ctx_index);
+}
+
+static void fini_ctx_desc(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ struct guc_context_desc desc;
+ struct sg_table *sg;
+
+ memset(&desc, 0, sizeof(desc));
+
+ sg = guc->ctx_pool_obj->pages;
+ sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
+ sizeof(desc) * client->ctx_index);
+}
+
+static void init_proc_desc(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ struct guc_process_desc *desc;
+ void *base;
+
+ base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
+ desc = base + client->proc_desc_offset;
+
+ memset(desc, 0, sizeof(*desc));
+
+ /*
+ * XXX: pDoorbell and WQVBaseAddress are pointers in process address
+ * space for ring3 clients (set them as in mmap_ioctl) or kernel
+ * space for kernel clients (map on demand instead? May make debug
+ * easier to have it mapped).
+ */
+ desc->wq_base_addr = 0;
+ desc->db_base_addr = 0;
+
+ desc->context_id = client->ctx_index;
+ desc->wq_size_bytes = client->wq_size;
+ desc->wq_status = WQ_STATUS_ACTIVE;
+ desc->priority = client->priority;
+
+ kunmap_atomic(base);
+}
+
+static int host2guc_allocate_doorbell(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ u32 data[2];
+
+ data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL;
+ data[1] = client->ctx_index;
+
+ return intel_guc_action(guc, data, 2);
+}
+
+static int host2guc_release_doorbell(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ u32 data[2];
+
+ data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL;
+ data[1] = client->ctx_index;
+
+ return intel_guc_action(guc, data, 2);
+}
+
+static void init_doorbell(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ struct guc_doorbell_info *doorbell;
+ void *base;
+
+ base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
+ doorbell = base + client->doorbell_offset;
+
+ doorbell->db_status = 1;
+ doorbell->cookie = 0;
+
+ kunmap_atomic(base);
+}
+
+static void disable_doorbell(struct intel_guc *guc,
+ struct i915_guc_client *client)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct guc_doorbell_info *doorbell;
+ void *base;
+ int drbreg = GEN8_DRBREGL(client->doorbell_id);
+ int value;
+
+ base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
+ doorbell = base + client->doorbell_offset;
+
+ doorbell->db_status = 0;
+
+ kunmap_atomic(base);
+
+ I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID);
+
+ value = I915_READ(drbreg);
+ WARN_ON((value & GEN8_DRB_VALID) != 0);
+
+ I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0);
+ I915_WRITE(drbreg, 0);
+
+ /* XXX: wait for any interrupts */
+ /* XXX: wait for workqueue to drain */
+}
+
+void i915_guc_client_free(struct drm_device *dev,
+ struct i915_guc_client *client)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_guc *guc = &dev_priv->guc;
+
+ if (!client)
+ return;
+
+ if (client->doorbell_id != INVALID_DOORBELL_ID) {
+ disable_doorbell(guc, client);
+ host2guc_release_doorbell(guc, client);
+ guc_release_doorbell(guc, client->doorbell_id);
+ }
+
+ /*
+ * XXX: wait for any outstanding submissions before freeing memory.
+ * Be sure to drop any locks
+ */
+
+ intel_guc_release_gem_obj(client->client_obj);
+
+ if (client->ctx_index != INVALID_CTX_ID) {
+ fini_ctx_desc(guc, client);
+ guc_release_ctx_desc(guc, client->ctx_index);
+ }
+
+ kfree(client);
+}
+
+struct i915_guc_client*
+i915_guc_client_alloc(struct drm_device *dev, u32 priority)
+{
+ struct i915_guc_client *client;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_guc *guc = &dev_priv->guc;
+ struct drm_i915_gem_object *obj;
+
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (!client)
+ return NULL;
+
+ client->doorbell_id = INVALID_DOORBELL_ID;
+ client->priority = priority;
+
+ client->ctx_index = guc_assign_ctx_desc(guc);
+ if (client->ctx_index == INVALID_CTX_ID)
+ goto err;
+
+ /* The first page is doorbell/proc_desc. Two followed pages are wq. */
+ obj = intel_guc_allocate_gem_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE);
+ if (!obj)
+ goto err;
+
+ client->client_obj = obj;
+ client->wq_offset = GUC_DB_SIZE;
+ client->wq_size = GUC_WQ_SIZE;
+
+ client->doorbell_offset = guc_select_doorbell_cacheline(guc);
+
+ /*
+ * Since the doorbell only requires a single cacheline, we can save
+ * space by putting the application process descriptor in the same
+ * page. Use the half of the page that doesn't include the doorbell.
+ */
+ if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
+ client->proc_desc_offset = 0;
+ else
+ client->proc_desc_offset = (GUC_DB_SIZE / 2);
+
+ client->doorbell_id = guc_assign_doorbell(guc, client->priority);
+ if (client->doorbell_id == INVALID_DOORBELL_ID)
+ /* XXX: evict a doorbell instead */
+ goto err;
+
+ init_ctx_desc(guc, client);
+ init_proc_desc(guc, client);
+ init_doorbell(guc, client);
+
+ /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
+ /* XXX: Any cache flushes needed? General domain mgmt calls? */
+
+ if (host2guc_allocate_doorbell(guc, client))
+ goto err;
+
+ return client;
+
+err:
+ i915_guc_client_free(dev, client);
+ return NULL;
+}
+
+int i915_guc_client_submit(struct i915_guc_client *client,
+ struct intel_context *ctx,
+ struct intel_engine_cs *ring)
+{
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index c266be3..58e1e33 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -233,12 +233,19 @@ static int guc_load_ucode(struct drm_device *dev)
/* Set MMIO/WA for GuC init */
+ I915_WRITE(DRBMISC1, DOORBELL_ENABLE);
+
/* Enable MIA caching. GuC clock gating is disabled. */
I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
/* WaC6DisallowByGfxPause*/
I915_WRITE(GEN6_GFXPAUSE, 0x30FFF);
+ if (IS_SKYLAKE(dev))
+ I915_WRITE(GEN9_GT_PM_CONFIG, GEN8_GT_DOORBELL_ENABLE);
+ else
+ I915_WRITE(GEN8_GT_PM_CONFIG, GEN8_GT_DOORBELL_ENABLE);
+
if (IS_GEN9(dev)) {
/* DOP Clock Gating Enable for GuC clocks */
I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
@@ -255,13 +262,6 @@ static int guc_load_ucode(struct drm_device *dev)
set_guc_init_params(dev_priv);
- /* FIXME: !UPSTREAM - I don't have real keys, so we need to disable the
- * authentication. This can only work if the part is fused in a special
- * configuration. Therefore, even if it leaked externally, it won't be
- * detrimental to security
- */
- I915_WRITE(0xc068, 0x3);
-
ret = ucode_dma_xfer_sync(dev_priv);
/* We can free the object pages now, and we would, except we might as
diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c b/drivers/gpu/drm/i915/intel_guc_scheduler.c
index ed744fb..a78b4b3 100644
--- a/drivers/gpu/drm/i915/intel_guc_scheduler.c
+++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c
@@ -36,6 +36,8 @@ void guc_scheduler_fini(struct drm_device *dev)
if (ctx_pool) {
intel_guc_release_gem_obj(ctx_pool);
guc->ctx_pool_obj = NULL;
+
+ ida_destroy(&guc->ctx_ids);
}
}
@@ -60,21 +62,44 @@ int guc_scheduler_init(struct drm_device *dev)
if (!guc->ctx_pool_obj)
return -ENOMEM;
+ spin_lock_init(&dev_priv->guc.host2guc_lock);
+
+ ida_init(&guc->ctx_ids);
+
+ memset(guc->doorbell_bitmap, 0, sizeof(guc->doorbell_bitmap));
+ guc->db_cacheline = 0;
+
return 0;
}
int guc_scheduler_enable(struct drm_device *dev)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_guc *guc = &dev_priv->guc;
+
if (!i915.enable_guc_scheduling)
return 0;
- /* TODO: placeholder for guc scheduler enabling */
+ /* client for execbuf submission */
+ guc->execbuf_client =
+ i915_guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL);
+ if (!guc->execbuf_client) {
+ DRM_ERROR("Failed to create execbuf guc_client\n");
+ return -ENOMEM;
+ }
+
return 0;
}
void guc_scheduler_disable(struct drm_device *dev)
{
- /* TODO: placeholder for guc scheduler disabling */
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_guc *guc = &dev_priv->guc;
+
+ if (guc->execbuf_client) {
+ i915_guc_client_free(dev, guc->execbuf_client);
+ guc->execbuf_client = NULL;
+ }
}
bool sanitize_enable_guc_scheduling(struct drm_device *dev)
--
1.9.1
More information about the Intel-gfx
mailing list