[PATCH] drm/amdgpu: Add firmware VRAM reservation from vbios

Wed Sep 27 08:50:36 UTC 2017

Hi Alex,

OK, I'll split it to three.

I need to set the placement when creating bo, otherwise the driver may report a pin failure. According to my test result, the pin failure will happen when there is overlap between original place and pined place. 

For example, I need to pin 256K memory to a specified place.  At the very beginning, after bo was created, the bo offset should be 0, and size is 256K.
If I just need to pin the bo to 0-256K on the VRAM through amdgpu_bo_pin_restricted(), it will be all right. 
But If I need to pin the bo to 64K-320K, the driver will report a pin failure. In this case, there is not only an eviction, but also has overlaps between original address and pinned address(64K-256K).
Further, if I need to pin the bo to 256K-512K, it also can pass. 

I don't know whether it is a bug or just not supported by the TTM, but putting the bo to the specified position when creating can fix this issue.

Thank & Regards,
Horace.

-----Original Message-----
From: Deucher, Alexander 
Sent: Tuesday, September 26, 2017 10:18 PM
To: Chen, Horace <Horace.Chen at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Chen, Horace <Horace.Chen at amd.com>
Subject: RE: [PATCH] drm/amdgpu: Add firmware VRAM reservation from vbios

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Horace Chen
> Sent: Tuesday, September 26, 2017 6:12 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chen, Horace
> Subject: [PATCH] drm/amdgpu: Add firmware VRAM reservation from vbios
> 
>  VBIOS has a structure VRAM_UsageByFirmware to tell the driver that
> VBIOS need reserve some memory at the specified place on the VRAM.
>  SR-IOV need to enable this feature to exchange data between PF
> and VF.
>  So add two new method to create and free bo at the exact place.
>  And add a new AMDGPU_GEM_CREATE_VRAM_SPECIFIED flag to tell
> function to set the ttm_placement to the exact place to avoid
> eviction whenpinnning.

First, this patch needs to be split up into 3 patches:
1. update atombios.h
2. Updates to support fixed memory placements
3. add the vbios reserved area

Some additional comments below.

> 
> Signed-off-by: Horace Chen <horace.chen at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h          |  17 +++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c |  18 ++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  58 ++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 109
> ++++++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h   |   8 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c      |   9 +++
>  drivers/gpu/drm/amd/include/atombios.h       |   1 +
>  include/uapi/drm/amdgpu_drm.h                |   2 +
>  8 files changed, 219 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index a5b0b67..ac53dba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1380,6 +1380,21 @@ struct amdgpu_atcs {
>  };
> 
>  /*
> + * Firmware VRAM reservation
> + */
> +#define KB_TO_BYTE(x)                           ((x) << 10)
> +#define BYTE_TO_KB(x)                           ((x) >> 10)
> +
> +struct amdgpu_fw_vram_usage {
> +	uint32_t start_address_in_kb;
> +	uint16_t size_in_kb;
> +	struct amdgpu_bo *reserved_bo;
> +	volatile uint32_t *va;
> +};
> +
> +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev);
> +
> +/*
>   * CGS
>   */
>  struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device
> *adev);
> @@ -1588,6 +1603,8 @@ struct amdgpu_device {
>  	struct delayed_work     late_init_work;
> 
>  	struct amdgpu_virt	virt;
> +	/* firmware VRAM reservation */
> +	struct amdgpu_fw_vram_usage fw_vram_usage;
> 
>  	/* link all shadow bo */
>  	struct list_head                shadow_list;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> index ce44358..56bfddf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
> @@ -1807,6 +1807,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct
> amdgpu_device *adev)
>  	uint16_t data_offset;
>  	int usage_bytes = 0;
>  	struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
> +	uint32_t start_addr;
> +	uint16_t size;
> 
>  	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL,
> &data_offset)) {
>  		firmware_usage = (struct
> _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
> @@ -1815,7 +1817,21 @@ int amdgpu_atombios_allocate_fb_scratch(struct
> amdgpu_device *adev)
>  			  le32_to_cpu(firmware_usage-
> >asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
>  			  le16_to_cpu(firmware_usage-
> >asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
> 
> -		usage_bytes = le16_to_cpu(firmware_usage-
> >asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
> +		start_addr = firmware_usage-
> >asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware;
> +		size = firmware_usage-
> >asFirmwareVramReserveInfo[0].usFirmwareUseInKb;
> +
> +		if ((uint32_t)(start_addr &
> ATOM_VRAM_OPERATION_FLAGS_MASK) ==
> +
> 	(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATIO
> N <<
> +			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
> +			/* Firmware request VRAM reservation for SR-IOV */
> +			adev->fw_vram_usage.start_address_in_kb =
> +			    start_addr &
> (~ATOM_VRAM_OPERATION_FLAGS_MASK);
> +			adev->fw_vram_usage.size_in_kb = size;
> +			/* Use the default scratch size */
> +			usage_bytes = 0;
> +		} else {
> +			usage_bytes = le16_to_cpu(firmware_usage-
> >asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
> +		}
>  	}
>  	ctx->scratch_size_bytes = 0;
>  	if (usage_bytes == 0)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index a86d856..51bee68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -658,6 +658,62 @@ void amdgpu_gart_location(struct amdgpu_device
> *adev, struct amdgpu_mc *mc)
>  }
> 
>  /*
> + * Firmware Reservation function
> + */
> +/**
> + * amdgpu_fw_reserve_vram_fini - free fw reserved vram
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * free fw reserved vram if it is reserved.
> + */
> +void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev)
> +{
> +
> +	if (adev->fw_vram_usage.reserved_bo == NULL)
> +		return;
> +
> +	amdgpu_bo_free_vram_specified(&adev-
> >fw_vram_usage.reserved_bo, NULL,
> +		(void **)&adev->fw_vram_usage.va);
> +	adev->fw_vram_usage.reserved_bo = NULL;
> +}
> +
> +/**
> + * amdgpu_fw_reserve_vram_fini - create bo vram reservation from fw
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * create bo vram reservation from fw.
> + */
> +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
> +{
> +	int r = 0;
> +	u64 gpu_addr;
> +	u64 vram_size = adev->mc.visible_vram_size;
> +
> +	adev->fw_vram_usage.va = NULL;
> +	adev->fw_vram_usage.reserved_bo = NULL;
> +
> +	if (adev->fw_vram_usage.size_in_kb > 0 &&
> +		KB_TO_BYTE((u64)adev->fw_vram_usage.size_in_kb) <=
> vram_size) {
> +
> +		r = amdgpu_bo_create_vram_specified(adev,
> +			    KB_TO_BYTE((u64)adev-
> >fw_vram_usage.start_address_in_kb),
> +			    KB_TO_BYTE((u64)adev-
> >fw_vram_usage.size_in_kb),
> +			    PAGE_SIZE,
> +			    AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
> |
> +			    AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
> +			    &adev->fw_vram_usage.reserved_bo,
> +			    &gpu_addr, (void **)&adev->fw_vram_usage.va);
> +		if (r)
> +			dev_info(adev->dev, "Reserve VRAM for firmware
> failed");
> +	}
> +
> +	return r;
> +}
> +
> +
> +/*
>   * GPU helpers function.
>   */
>  /**
> @@ -2055,6 +2111,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	adev->vm_manager.vm_pte_num_rings = 0;
>  	adev->gart.gart_funcs = NULL;
>  	adev->fence_context =
> dma_fence_context_alloc(AMDGPU_MAX_RINGS);
> +	adev->fw_vram_usage.size_in_kb = 0;
> 
>  	adev->smc_rreg = &amdgpu_invalid_rreg;
>  	adev->smc_wreg = &amdgpu_invalid_wreg;
> @@ -2331,6 +2388,7 @@ void amdgpu_device_fini(struct amdgpu_device
> *adev)
>  	/* evict vram memory */
>  	amdgpu_bo_evict_vram(adev);
>  	amdgpu_ib_pool_fini(adev);
> +	amdgpu_fw_reserve_vram_fini(adev);
>  	amdgpu_fence_driver_fini(adev);
>  	amdgpu_fbdev_fini(adev);
>  	r = amdgpu_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6982bae..8602784 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -87,6 +87,14 @@ void amdgpu_ttm_placement_from_domain(struct
> amdgpu_bo *abo, u32 domain)
> 
>  		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
>  			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
> +
> +		if (flags & AMDGPU_GEM_CREATE_VRAM_SPECIFIED) {
> +			/*  specified vram must be contiguous */
> +			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
> +			places[c].fpfn = abo->start_address >> PAGE_SHIFT;
> +			places[c].lpfn =
> +			    (abo->start_address + abo->gem_base.size) >>
> PAGE_SHIFT;
> +		}

Do we actually need this?  Won't amdgpu_bo_pin_restructed take care of this without the need for the extra flag and code here?

Alex

>  		c++;
>  	}
> 
> @@ -284,6 +292,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo
> **bo, u64 *gpu_addr,
>  }
> 
>  static int amdgpu_bo_do_create(struct amdgpu_device *adev,
> +			       u64 start_addr,
>  			       unsigned long size, int byte_align,
>  			       bool kernel, u32 domain, u64 flags,
>  			       struct sg_table *sg,
> @@ -300,6 +309,7 @@ static int amdgpu_bo_do_create(struct
> amdgpu_device *adev,
> 
>  	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
>  	size = ALIGN(size, PAGE_SIZE);
> +	start_addr = ALIGN(start_addr, PAGE_SIZE);
> 
>  	if (kernel) {
>  		type = ttm_bo_type_kernel;
> @@ -333,6 +343,7 @@ static int amdgpu_bo_do_create(struct
> amdgpu_device *adev,
>  	if (!kernel && bo->allowed_domains ==
> AMDGPU_GEM_DOMAIN_VRAM)
>  		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
> 
> +	bo->start_address = start_addr;
>  	bo->flags = flags;
> 
>  #ifdef CONFIG_X86_32
> @@ -418,6 +429,100 @@ static int amdgpu_bo_do_create(struct
> amdgpu_device *adev,
>  	return r;
>  }
> 
> +/**
> + * amdgpu_bo_create_vram_specified -
> + *    create BO at the specified place on the VRAM.
> + *
> + * @adev: amdgpu device object
> + * @start_addr: start offset of the new BO
> + * @size: size for the new BO
> + * @byte_align: alignment for the new BO
> + * @flags: addition flags for the BO
> + * @bo_ptr: resulting BO
> + * @gpu_addr: GPU addr of the pinned BO
> + * @cpu_addr: optional CPU address mapping
> + *
> + * Allocates and pins a BO at the specified place on the VRAM.
> + *
> + * Returns 0 on success, negative error code otherwise.
> + */
> +int amdgpu_bo_create_vram_specified(struct amdgpu_device *adev,
> +		     u64 start_addr, unsigned long size, int byte_align,
> +		     u64 flags, struct amdgpu_bo **bo_ptr,
> +		     u64 *gpu_addr, void **cpu_addr)
> +{
> +	int r;
> +	/* specified memory must be in contiguous*/
> +	flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
> +	flags |= AMDGPU_GEM_CREATE_VRAM_SPECIFIED;
> +
> +	r = amdgpu_bo_do_create(adev, start_addr, size, byte_align, true,
> +		AMDGPU_GEM_DOMAIN_VRAM, flags, NULL, NULL, 0,
> bo_ptr);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_bo_reserve(*bo_ptr, false);
> +	if (r)
> +		goto error_reserve;
> +	r = amdgpu_bo_pin_restricted(*bo_ptr,
> +		AMDGPU_GEM_DOMAIN_VRAM, start_addr, (start_addr +
> size),
> +		gpu_addr);
> +	if (r)
> +		goto error_pin;
> +	if (cpu_addr) {
> +		r = amdgpu_bo_kmap(*bo_ptr,
> +					cpu_addr);
> +		if (r)
> +			goto error_kmap;
> +	}
> +
> +	amdgpu_bo_unreserve(*bo_ptr);
> +
> +	return r;
> +error_kmap:
> +	amdgpu_bo_unpin(*bo_ptr);
> +error_pin:
> +	amdgpu_bo_unreserve(*bo_ptr);
> +error_reserve:
> +	amdgpu_bo_unref(bo_ptr);
> +	if (cpu_addr)
> +		*cpu_addr = NULL;
> +	if (gpu_addr)
> +		*gpu_addr = 0;
> +	return r;
> +}
> +
> +/**
> + * amdgpu_bo_free_vram_specified - free BO for specified vram
> + *
> + * @bo: amdgpu BO to free
> + * @gpu_addr : gpu address
> + * @cpu_addr : cpu address if mapped
> + *
> + * unmaps and unpin a BO for specified vram.
> + */
> +void amdgpu_bo_free_vram_specified(struct amdgpu_bo **bo, u64
> *gpu_addr,
> +			   void **cpu_addr)
> +{
> +	if (*bo == NULL)
> +		return;
> +
> +	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
> +		if (cpu_addr)
> +			amdgpu_bo_kunmap(*bo);
> +		amdgpu_bo_unpin(*bo);
> +		amdgpu_bo_unreserve(*bo);
> +	}
> +	amdgpu_bo_unref(bo);
> +
> +	if (gpu_addr)
> +		*gpu_addr = 0;
> +
> +	if (cpu_addr)
> +		*cpu_addr = NULL;
> +}
> +
> +
>  static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
>  				   unsigned long size, int byte_align,
>  				   struct amdgpu_bo *bo)
> @@ -427,7 +532,7 @@ static int amdgpu_bo_create_shadow(struct
> amdgpu_device *adev,
>  	if (bo->shadow)
>  		return 0;
> 
> -	r = amdgpu_bo_do_create(adev, size, byte_align, true,
> +	r = amdgpu_bo_do_create(adev, 0, size, byte_align, true,
>  				AMDGPU_GEM_DOMAIN_GTT,
>  				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>  				AMDGPU_GEM_CREATE_SHADOW,
> @@ -457,7 +562,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>  	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
>  	int r;
> 
> -	r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
> +	r = amdgpu_bo_do_create(adev, 0, size, byte_align, kernel, domain,
>  				parent_flags, sg, resv, init_value, bo_ptr);
>  	if (r)
>  		return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 39b6bf6..88f2c5a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -78,6 +78,8 @@ struct amdgpu_bo {
>  	void				*metadata;
>  	u32				metadata_size;
>  	unsigned			prime_shared_count;
> +	/* specified VRAM place */
> +	u64				start_address;
>  	/* list of all virtual address to which this bo is associated to */
>  	struct list_head		va;
>  	/* Constant after initialization */
> @@ -205,6 +207,12 @@ int amdgpu_bo_create_kernel(struct
> amdgpu_device *adev,
>  			    u64 *gpu_addr, void **cpu_addr);
>  void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
>  			   void **cpu_addr);
> +int amdgpu_bo_create_vram_specified(struct amdgpu_device *adev,
> +		     u64 start_addr, unsigned long size, int byte_align,
> +		     u64 flags, struct amdgpu_bo **bo_ptr,
> +		     u64 *gpu_addr, void **cpu_addr);
> +void amdgpu_bo_free_vram_specified(struct amdgpu_bo **bo, u64
> *gpu_addr,
> +			   void **cpu_addr);
>  int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
>  void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
>  void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 1086f03..a5253cf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1256,6 +1256,15 @@ int amdgpu_ttm_init(struct amdgpu_device
> *adev)
>  	/* Change the size here instead of the init above so only lpfn is
> affected */
>  	amdgpu_ttm_set_active_vram_size(adev, adev-
> >mc.visible_vram_size);
> 
> +	/*
> +	 *The reserved vram for firmware must be pinned to the specified
> +	 *place on the VRAM, so reserve it early.
> +	 */
> +	r = amdgpu_fw_reserve_vram_init(adev);
> +	if (r) {
> +		return r;
> +	}
> +
>  	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size,
> PAGE_SIZE,
>  				    AMDGPU_GEM_DOMAIN_VRAM,
>  				    &adev->stolen_vga_memory,
> diff --git a/drivers/gpu/drm/amd/include/atombios.h
> b/drivers/gpu/drm/amd/include/atombios.h
> index 181a2c3..f696bbb 100644
> --- a/drivers/gpu/drm/amd/include/atombios.h
> +++ b/drivers/gpu/drm/amd/include/atombios.h
> @@ -4292,6 +4292,7 @@ typedef struct _ATOM_DPCD_INFO
>  #define ATOM_VRAM_OPERATION_FLAGS_SHIFT        30
>  #define   ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION   0x1
>  #define   ATOM_VRAM_BLOCK_NEEDS_RESERVATION      0x0
> +#define   ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION 0x2
> 
> 
> /**********************************************************
> *************************/
>  // Structure used in VRAM_UsageByFirmwareTable
> diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h
> index 9f5bd97..8c213f0 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -91,6 +91,8 @@ extern "C" {
>  #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
>  /* Flag that BO is always valid in this VM */
>  #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
> +/* Flag that allocating the BO at the specified place on VRAM*/
> +#define AMDGPU_GEM_CREATE_VRAM_SPECIFIED	(1 << 7)

We don't want to expose this to userspace.

> 
>  struct drm_amdgpu_gem_create_in  {
>  	/** the requested memory size */
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx