[Intel-gfx] [PATCH 2/4] drm/i915/guc: put all guc objects in lmem when available
John Harrison
john.c.harrison at intel.com
Fri Aug 6 18:43:39 UTC 2021
On 8/2/2021 22:11, Matthew Brost wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>
> The firmware binary has to be loaded from lmem and the recommendation is
> to put all other objects in there as well. Note that we don't fall back
> to system memory if the allocation in lmem fails because all objects are
> allocated during driver load and if we have issues with lmem at that point
> something is seriously wrong with the system, so no point in trying to
> handle it.
>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg at intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 ++++++++
> drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 ++
> drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++-
> drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 +++-
> drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 ++++-
> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 75 +++++++++++++++++++++--
> 6 files changed, 127 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> index eb345305dc52..034226c5d4d0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> @@ -103,6 +103,32 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
> size, page_size, flags);
> }
>
> +struct drm_i915_gem_object *
> +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
> + const void *data, size_t size)
> +{
> + struct drm_i915_gem_object *obj;
> + void *map;
> +
> + obj = i915_gem_object_create_lmem(i915,
> + round_up(size, PAGE_SIZE),
> + I915_BO_ALLOC_CONTIGUOUS);
> + if (IS_ERR(obj))
> + return obj;
> +
> + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
> + if (IS_ERR(map)) {
> + i915_gem_object_put(obj);
> + return map;
> + }
> +
> + memcpy(map, data, size);
> +
> + i915_gem_object_unpin_map(obj);
> +
> + return obj;
> +}
> +
> struct drm_i915_gem_object *
> i915_gem_object_create_lmem(struct drm_i915_private *i915,
> resource_size_t size,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
> index 4ee81fc66302..1b88ea13435c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
> @@ -23,6 +23,10 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
>
> bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
>
> +struct drm_i915_gem_object *
> +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
> + const void *data, size_t size);
> +
> struct drm_i915_gem_object *
> __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
> resource_size_t size,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> index 979128e28372..55160d3e401a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> @@ -3,6 +3,7 @@
> * Copyright © 2014-2019 Intel Corporation
> */
>
> +#include "gem/i915_gem_lmem.h"
> #include "gt/intel_gt.h"
> #include "gt/intel_gt_irq.h"
> #include "gt/intel_gt_pm_irq.h"
> @@ -630,7 +631,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
> u64 flags;
> int ret;
>
> - obj = i915_gem_object_create_shmem(gt->i915, size);
> + if (HAS_LMEM(gt->i915))
> + obj = i915_gem_object_create_lmem(gt->i915, size,
> + I915_BO_ALLOC_CPU_CLEAR |
> + I915_BO_ALLOC_CONTIGUOUS);
> + else
> + obj = i915_gem_object_create_shmem(gt->i915, size);
> +
> if (IS_ERR(obj))
> return ERR_CAST(obj);
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> index 76fe766ad1bc..962be0c12208 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> @@ -41,7 +41,7 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
> }
>
> /* Copy RSA signature from the fw image to HW for verification */
> -static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
> +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
> struct intel_uncore *uncore)
> {
> u32 rsa[UOS_RSA_SCRATCH_COUNT];
> @@ -49,10 +49,13 @@ static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
> int i;
>
> copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
> - GEM_BUG_ON(copied < sizeof(rsa));
> + if (copied < sizeof(rsa))
> + return -ENOMEM;
>
> for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
> intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
> +
> + return 0;
> }
>
> /*
> @@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc)
> * by the DMA engine in one operation, whereas the RSA signature is
> * loaded via MMIO.
> */
> - guc_xfer_rsa(&guc->fw, uncore);
> + ret = guc_xfer_rsa(&guc->fw, uncore);
> + if (ret)
> + goto out;
>
> /*
> * Current uCode expects the code to be loaded at 8k; locations below
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index fc5387b410a2..ff4b6869b80b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
> vma->obj, true));
> if (IS_ERR(vaddr)) {
> i915_vma_unpin_and_release(&vma, 0);
> - return PTR_ERR(vaddr);
> + err = PTR_ERR(vaddr);
> + goto unpin_out;
> }
>
> copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
> - GEM_BUG_ON(copied < huc->fw.rsa_size);
> -
> i915_gem_object_unpin_map(vma->obj);
>
> + if (copied < huc->fw.rsa_size) {
> + err = -ENOMEM;
> + goto unpin_out;
> + }
> +
> huc->rsa_data = vma;
>
> return 0;
> +
> +unpin_out:
> + i915_vma_unpin_and_release(&vma, 0);
> + return err;
> }
>
> static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index f632dbd32b42..f8cb00ffb506 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -7,6 +7,7 @@
> #include <linux/firmware.h>
> #include <drm/drm_print.h>
>
> +#include "gem/i915_gem_lmem.h"
> #include "intel_uc_fw.h"
> #include "intel_uc_fw_abi.h"
> #include "i915_drv.h"
> @@ -370,7 +371,11 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
> if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
> uc_fw->private_data_size = css->private_data_size;
>
> - obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
> + if (HAS_LMEM(i915))
> + obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
> + else
> + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
> +
> if (IS_ERR(obj)) {
> err = PTR_ERR(obj);
> goto fail;
> @@ -414,6 +419,7 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
> struct drm_i915_gem_object *obj = uc_fw->obj;
> struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
> struct i915_vma *dummy = &uc_fw->dummy;
> + u32 pte_flags = 0;
>
> dummy->node.start = uc_fw_ggtt_offset(uc_fw);
> dummy->node.size = obj->base.size;
> @@ -424,9 +430,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
> GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
>
> /* uc_fw->obj cache domains were not controlled across suspend */
> - drm_clflush_sg(dummy->pages);
> + if (i915_gem_object_has_struct_page(obj))
> + drm_clflush_sg(dummy->pages);
> +
> + if (i915_gem_object_is_lmem(obj))
> + pte_flags |= PTE_LM;
>
> - ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0);
> + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
> }
>
> static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
> @@ -585,13 +595,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw)
> */
> size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
> {
> - struct sg_table *pages = uc_fw->obj->mm.pages;
> + struct intel_memory_region *mr = uc_fw->obj->mm.region;
> u32 size = min_t(u32, uc_fw->rsa_size, max_len);
> u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
> + struct sgt_iter iter;
> + size_t count = 0;
> + int idx;
>
> + /* Called during reset handling, must be atomic [no fs_reclaim] */
> GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
>
> - return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset);
> + idx = offset >> PAGE_SHIFT;
> + offset = offset_in_page(offset);
> + if (i915_gem_object_has_struct_page(uc_fw->obj)) {
> + struct page *page;
> +
> + for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
Why can't this just use 'sg_pcopy_to_buffer' as before?
John.
> + u32 len = min_t(u32, size, PAGE_SIZE - offset);
> + void *vaddr;
> +
> + if (idx > 0) {
> + idx--;
> + continue;
> + }
> +
> + vaddr = kmap_atomic(page);
> + memcpy(dst, vaddr + offset, len);
> + kunmap_atomic(vaddr);
> +
> + offset = 0;
> + dst += len;
> + size -= len;
> + count += len;
> + if (!size)
> + break;
> + }
> + } else {
> + dma_addr_t addr;
> +
> + for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) {
> + u32 len = min_t(u32, size, PAGE_SIZE - offset);
> + void __iomem *vaddr;
> +
> + if (idx > 0) {
> + idx--;
> + continue;
> + }
> +
> + vaddr = io_mapping_map_atomic_wc(&mr->iomap,
> + addr - mr->region.start);
> + memcpy_fromio(dst, vaddr + offset, len);
> + io_mapping_unmap_atomic(vaddr);
> +
> + offset = 0;
> + dst += len;
> + size -= len;
> + count += len;
> + if (!size)
> + break;
> + }
> + }
> +
> + return count;
> }
>
> /**
More information about the Intel-gfx
mailing list