[PATCH] drm/amdgpu: Modify alloc_sgt apis to consider size of request
Ramesh Errabolu
Ramesh.Errabolu at amd.com
Sat Feb 13 04:32:18 UTC 2021
The current method that builds SG table does not allow its users
to request a sub-block of the buffer object. The change modifies
api signature to allow users to specify both the offset and size
of the request.
Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 11 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 9 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 245 +++++++++++++++----
4 files changed, 210 insertions(+), 63 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 2808d5752de1..b23f44999814 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -326,8 +326,8 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
- r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, attach->dev,
- dir, &sgt);
+ r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, 0, 0,
+ attach->dev, dir, &sgt);
if (r)
return ERR_PTR(r);
break;
@@ -356,17 +356,12 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- struct dma_buf *dma_buf = attach->dmabuf;
- struct drm_gem_object *obj = dma_buf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
if (sgt->sgl->page_link) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
} else {
- amdgpu_vram_mgr_free_sgt(adev, attach->dev, dir, sgt);
+ amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ce92768cd146..ad504d0e5b26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -222,8 +222,8 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
* @offset: The offset that drm_mm_node is used for finding.
*
*/
-static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem,
- uint64_t *offset)
+struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem,
+ uint64_t *offset)
{
struct drm_mm_node *mm_node = mem->mm_node;
@@ -782,8 +782,8 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso
return 0;
}
-static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
- unsigned long page_offset)
+unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+ unsigned long page_offset)
{
uint64_t offset = (page_offset << PAGE_SHIFT);
struct drm_mm_node *mm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 4df4cf2fd4dd..e3e413dbfd72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -116,11 +116,12 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
struct ttm_resource *mem,
+ uint64_t req_offset,
+ uint64_t req_size,
struct device *dev,
enum dma_data_direction dir,
struct sg_table **sgt);
-void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
- struct device *dev,
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt);
uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);
@@ -155,6 +156,10 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
+struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem,
+ uint64_t *offset);
+unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+ unsigned long page_offset);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 21d18efca277..37a57a5ecd85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -28,6 +28,9 @@
#include "amdgpu_atomfirmware.h"
#include "atom.h"
+/* Defines the maximum size of a SG node */
+int64_t VRAM_MAX_SG_NODE_SIZE = 0x80000000;
+
static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_vram_mgr, manager);
@@ -565,6 +568,90 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
mem->mm_node = NULL;
}
+/**
+ * amdgpu_vram_mgr_get_size_sgt - Determine the number of scatterlist (SG) nodes
+ * that are needed to encapsulate @size amount of memory.
+ *
+ * @mm_node: handle of the first memory node to walk down
+ * @size: number of bytes of memory
+ * @page_offset: offset of desired memory in page index terms
+ * @byte_offset: offset of desired memory when it is not on page boundary
+ *
+ * Returns number of scatterlist nodes needed to build sg_table
+ *
+ */
+uint32_t amdgpu_vram_mgr_get_size_sgt(struct drm_mm_node *mm_node,
+ uint64_t req_size, uint64_t page_offset, uint32_t byte_offset)
+{
+ int32_t num_sg_nodes = 0;
+
+ /* Walk down memory nodes list to determine number of SG node */
+ while (req_size > 0) {
+ uint64_t node_size, node_offset, min_size, tmp_cnt;
+
+ /* Determine available memory for current memory node */
+ node_offset = page_offset << PAGE_SHIFT;
+ node_offset = node_offset + byte_offset;
+ node_size = mm_node->size << PAGE_SHIFT;
+ node_size = node_size - node_offset;
+
+ /* Offsets apply only to first memory node */
+ byte_offset = 0;
+ page_offset = 0;
+
+ /* Determine number of SG nodes for current memory node */
+ min_size = min(req_size, node_size);
+ tmp_cnt = (min_size + (VRAM_MAX_SG_NODE_SIZE - 1)) /
+ VRAM_MAX_SG_NODE_SIZE;
+ num_sg_nodes = num_sg_nodes + tmp_cnt;
+ req_size = req_size - min_size;
+
+ /* Get handle of next memory node */
+ mm_node++;
+ }
+
+ /* Number of SG nodes in SG Table */
+ return num_sg_nodes;
+}
+
+static struct scatterlist *amdgpu_vram_mgr_populate_nodes_sg(uint64_t size,
+ uint32_t iter, uint64_t pfn, uint32_t offset,
+ struct device *dev, enum dma_data_direction dir,
+ struct scatterlist *sg_node)
+{
+ uint64_t node_addr, sg_size;
+ dma_addr_t dma_addr;
+ int32_t idx, ret;
+
+ for (idx = 0; idx < iter; idx++) {
+
+ /* Get bus address from page frame number */
+ node_addr = pfn << PAGE_SHIFT;
+ node_addr = node_addr + (idx * VRAM_MAX_SG_NODE_SIZE);
+
+ /* Determine size of memory scatter node */
+ sg_size = min_t(uint64_t, size, VRAM_MAX_SG_NODE_SIZE);
+ size = size - sg_size;
+
+ dma_addr = dma_map_resource(dev, (phys_addr_t)node_addr,
+ sg_size, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ ret = dma_mapping_error(dev, dma_addr);
+ if (ret)
+ return NULL;
+
+ /* Populate the scatter node and get handle of next node */
+ sg_set_page(sg_node, NULL, sg_size, offset);
+ sg_dma_address(sg_node) = dma_addr;
+ sg_dma_len(sg_node) = sg_size;
+ sg_node = sg_next(sg_node);
+
+ /* Offset applies only to the first node */
+ offset = 0;
+ }
+
+ return sg_node;
+}
+
/**
* amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
*
@@ -572,71 +659,132 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
* @mem: TTM memory object
* @dev: the other device
* @dir: dma direction
- * @sgt: resulting sg table
+ * @ret_sgt: resulting sg table
*
* Allocate and fill a sg table from a VRAM allocation.
*/
int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
struct ttm_resource *mem,
- struct device *dev,
+ uint64_t req_offset,
+ uint64_t req_size,
+ struct device *dma_dev,
enum dma_data_direction dir,
- struct sg_table **sgt)
+ struct sg_table **ret_sgt)
{
- struct drm_mm_node *node;
- struct scatterlist *sg;
- int num_entries = 0;
- unsigned int pages;
- int i, r;
-
- *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
- if (!*sgt)
- return -ENOMEM;
-
- for (pages = mem->num_pages, node = mem->mm_node;
- pages; pages -= node->size, ++node)
- ++num_entries;
-
- r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
- if (r)
- goto error_free;
-
- for_each_sgtable_sg((*sgt), sg, i)
- sg->length = 0;
-
- node = mem->mm_node;
- for_each_sgtable_sg((*sgt), sg, i) {
- phys_addr_t phys = (node->start << PAGE_SHIFT) +
- adev->gmc.aper_base;
- size_t size = node->size << PAGE_SHIFT;
- dma_addr_t addr;
-
- ++node;
- addr = dma_map_resource(dev, phys, size, dir,
- DMA_ATTR_SKIP_CPU_SYNC);
- r = dma_mapping_error(dev, addr);
- if (r)
+ uint64_t node_page_offset, byte_offset, page_offset;
+ uint64_t num_sg_nodes, base_pfn, work_size;
+ struct drm_mm_node *node, *start_node;
+ struct scatterlist *sg_node;
+ struct sg_table *sg_tbl;
+ int32_t idx, ret;
+
+ /*
+ * Determine the first mm_node to use in computing MMIO address. This
+ * is determined by the offset of the request, which can be at a page
+ * or non-page boundary. Furthermore this offset may not coincide with
+ * the start of mm_node i.e. it may lie internal to a mm_node. Thus the
+ * offset of request should be treated as follows:
+ *
+ * offset = (N * PAGE_SIZE) + OFFSET_IN_PAGE
+ * N can be zero or higher
+ * OFFSET_IN_PAGE could be zero or (PAGE_SIZE - 2)
+ * mm_node->start refers to K pages off from MMIO base address
+ * mm_node->size refers to number of pages mm_node encapsulates
+ *
+ * @note: It is possible that the offset of starting page of a request is
+ * one or more pages away from the start of mm_node
+ */
+ uint64_t req_page_idx = req_offset / (_AC(1, UL) << PAGE_SHIFT);
+ uint64_t req_byte_offset = req_page_idx << PAGE_SHIFT;
+ uint32_t offset_in_page = req_offset & ((_AC(1, UL) << PAGE_SHIFT) - 1);
+
+ start_node = amdgpu_find_mm_node(mem, &req_byte_offset);
+ node_page_offset = req_byte_offset >> PAGE_SHIFT;
+
+ /*
+ * Determine the number of scatter gather (SG) nodes that are needed
+ * to export requested size of memory. Depending upon request, following
+ * are possible in building sg_table
+ * Starting mm_node contributes all of the pages
+ * Starting mm_nodes does not have all of the pages
+ */
+ num_sg_nodes = amdgpu_vram_mgr_get_size_sgt(start_node, req_size,
+ node_page_offset, offset_in_page);
+
+ /* Allocate sg_table to carry list of scatter gather (SG) nodes */
+ sg_tbl = kmalloc(sizeof(*sg_tbl), GFP_KERNEL);
+ if (!sg_tbl) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = sg_alloc_table(sg_tbl, num_sg_nodes, GFP_KERNEL);
+ if (unlikely(ret))
+ goto out;
+ for_each_sgtable_sg(sg_tbl, sg_node, idx)
+ sg_node->length = 0;
+
+ /* Determine base page frame number (PFN) of MMIO space */
+ base_pfn = adev->gmc.aper_base >> PAGE_SHIFT;
+
+ /* Populate the nodes of scatterlist table */
+ work_size = req_size;
+ sg_node = sg_tbl->sgl;
+ node = start_node;
+ byte_offset = offset_in_page;
+ page_offset = node_page_offset;
+ while (work_size > 0) {
+ uint32_t iter;
+ uint64_t elem_pfn, node_size, node_offset, min_size;
+
+ /* Adjust PFN to correspond to request */
+ elem_pfn = base_pfn + node->start + page_offset;
+
+ /* Determine size of available memory upon adjustment */
+ node_size = node->size << PAGE_SHIFT;
+ node_offset = page_offset << PAGE_SHIFT;
+ node_offset = node_offset + byte_offset;
+ node_size = node_size - node_offset;
+
+ /* Distribute memory of mm_mode into one or more SG nodes */
+ min_size = min_t(int64_t, work_size, node_size);
+ iter = (min_size + (VRAM_MAX_SG_NODE_SIZE - 1)) /
+ VRAM_MAX_SG_NODE_SIZE;
+ sg_node = amdgpu_vram_mgr_populate_nodes_sg(min_size, iter,
+ elem_pfn, byte_offset, dma_dev, dir, sg_node);
+
+ /* Update size of request left to handle */
+ work_size = work_size - min_size;
+
+ /* Determine if there was an error in populating sg nodes */
+ if ((sg_node == NULL) && (work_size > 0))
goto error_unmap;
- sg_set_page(sg, NULL, size, 0);
- sg_dma_address(sg) = addr;
- sg_dma_len(sg) = size;
+ /* Offset apply only to the first SG node */
+ page_offset = 0;
+ byte_offset = 0;
+
+ /* Get handle of next memory node */
+ node++;
}
+
+ *ret_sgt = sg_tbl;
return 0;
error_unmap:
- for_each_sgtable_sg((*sgt), sg, i) {
- if (!sg->length)
+ for_each_sgtable_sg(sg_tbl, sg_node, idx) {
+ if (!sg_node->length)
continue;
- dma_unmap_resource(dev, sg->dma_address,
- sg->length, dir,
+ dma_unmap_resource(dma_dev, sg_node->dma_address,
+ sg_node->length, dir,
DMA_ATTR_SKIP_CPU_SYNC);
}
- sg_free_table(*sgt);
+ sg_free_table(sg_tbl);
-error_free:
- kfree(*sgt);
- return r;
+out:
+ kfree(sg_tbl);
+ *ret_sgt = NULL;
+ return ret;
}
/**
@@ -649,8 +797,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
*
* Free a previously allocate sg table.
*/
-void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
- struct device *dev,
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt)
{
--
2.29.2
More information about the amd-gfx
mailing list