[PATCH 5/8] gpu: host1x: Add IOMMU support
Mikko Perttunen
mperttunen at nvidia.com
Thu Nov 10 18:23:42 UTC 2016
Add support for the Host1x unit to be located behind
an IOMMU. This is required when gather buffers may be
allocated non-contiguously in physical memory, as can
be the case when TegraDRM is also using the IOMMU.
Signed-off-by: Mikko Perttunen <mperttunen at nvidia.com>
---
drivers/gpu/host1x/cdma.c | 65 +++++++++++++++++++++++++++++------
drivers/gpu/host1x/cdma.h | 4 ++-
drivers/gpu/host1x/dev.c | 39 +++++++++++++++++++--
drivers/gpu/host1x/dev.h | 5 +++
drivers/gpu/host1x/hw/cdma_hw.c | 10 +++---
drivers/gpu/host1x/job.c | 76 +++++++++++++++++++++++++++++++++++------
drivers/gpu/host1x/job.h | 1 +
7 files changed, 171 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index c5d82a8..14692446e 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
struct host1x_cdma *cdma = pb_to_cdma(pb);
struct host1x *host1x = cdma_to_host1x(cdma);
- if (pb->phys != 0)
- dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
- pb->phys);
+ if (!pb->phys)
+ return;
+
+ if (host1x->domain) {
+ iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes);
+ free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
+ }
+
+ dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
pb->mapped = NULL;
pb->phys = 0;
@@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
{
struct host1x_cdma *cdma = pb_to_cdma(pb);
struct host1x *host1x = cdma_to_host1x(cdma);
+ struct iova *alloc;
+ int err;
pb->mapped = NULL;
pb->phys = 0;
pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
+ pb->alloc_size_bytes = pb->size_bytes + 4;
/* initialize buffer pointers */
pb->fence = pb->size_bytes - 8;
pb->pos = 0;
- /* allocate and map pushbuffer memory */
- pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
- GFP_KERNEL);
- if (!pb->mapped)
- goto fail;
+ if (host1x->domain) {
+ unsigned long shift;
+
+ pb->alloc_size_bytes = iova_align(&host1x->iova,
+ pb->alloc_size_bytes);
+
+ pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
+ &pb->phys, GFP_KERNEL);
+ if (!pb->mapped)
+ return -ENOMEM;
+
+ shift = iova_shift(&host1x->iova);
+ alloc = alloc_iova(
+ &host1x->iova, pb->alloc_size_bytes >> shift,
+ host1x->domain->geometry.aperture_end >> shift, true);
+ if (!alloc) {
+ err = -ENOMEM;
+ goto iommu_free_mem;
+ }
+
+ err = iommu_map(host1x->domain,
+ iova_dma_addr(&host1x->iova, alloc),
+ pb->phys, pb->alloc_size_bytes,
+ IOMMU_READ);
+ if (err)
+ goto iommu_free_iova;
+
+ pb->dma = iova_dma_addr(&host1x->iova, alloc);
+ } else {
+ pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
+ &pb->phys, GFP_KERNEL);
+ if (!pb->mapped)
+ return -ENOMEM;
+
+ pb->dma = pb->phys;
+ }
host1x_hw_pushbuffer_init(host1x, pb);
return 0;
-fail:
- host1x_pushbuffer_destroy(pb);
- return -ENOMEM;
+iommu_free_iova:
+ __free_iova(&host1x->iova, alloc);
+iommu_free_mem:
+ dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
+
+ return err;
}
/*
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 470087a..4b3645f 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -43,10 +43,12 @@ struct host1x_job;
struct push_buffer {
void *mapped; /* mapped pushbuffer memory */
- dma_addr_t phys; /* physical address of pushbuffer */
+ dma_addr_t dma; /* device address of pushbuffer */
+ phys_addr_t phys; /* physical address of pushbuffer */
u32 fence; /* index we've written */
u32 pos; /* index to write to */
u32 size_bytes;
+ u32 alloc_size_bytes;
};
struct buffer_timeout {
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index a62317a..ca4527e 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev)
return err;
}
+ if (iommu_present(&platform_bus_type)) {
+ struct iommu_domain_geometry *geometry;
+ unsigned long order;
+
+ host->domain = iommu_domain_alloc(&platform_bus_type);
+ if (!host->domain)
+ return -ENOMEM;
+
+ err = iommu_attach_device(host->domain, &pdev->dev);
+ if (err)
+ goto fail_free_domain;
+
+ geometry = &host->domain->geometry;
+
+ order = __ffs(host->domain->pgsize_bitmap);
+ init_iova_domain(&host->iova, 1UL << order,
+ geometry->aperture_start >> order,
+ geometry->aperture_end >> order);
+ }
+
err = host1x_channel_list_init(host);
if (err) {
dev_err(&pdev->dev, "failed to initialize channel list\n");
- return err;
+ goto fail_detach_device;
}
err = clk_prepare_enable(host->clk);
if (err < 0) {
dev_err(&pdev->dev, "failed to enable clock\n");
- return err;
+ goto fail_detach_device;
}
err = host1x_syncpt_init(host);
@@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev)
host1x_syncpt_deinit(host);
fail_unprepare_disable:
clk_disable_unprepare(host->clk);
+fail_detach_device:
+ if (host->domain) {
+ put_iova_domain(&host->iova);
+ iommu_detach_device(host->domain, &pdev->dev);
+ }
+fail_free_domain:
+ if (host->domain)
+ iommu_domain_free(host->domain);
+
return err;
}
@@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev)
host1x_syncpt_deinit(host);
clk_disable_unprepare(host->clk);
+ if (host->domain) {
+ put_iova_domain(&host->iova);
+ iommu_detach_device(host->domain, &pdev->dev);
+ iommu_domain_free(host->domain);
+ }
+
return 0;
}
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 5220510..6189825 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -19,6 +19,8 @@
#include <linux/platform_device.h>
#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
#include "channel.h"
#include "syncpt.h"
@@ -108,6 +110,9 @@ struct host1x {
struct device *dev;
struct clk *clk;
+ struct iommu_domain *domain;
+ struct iova_domain iova;
+
struct mutex intr_mutex;
int intr_syncpt_irq;
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 659c1bb..b8c0348 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
*(p++) = HOST1X_OPCODE_NOP;
*(p++) = HOST1X_OPCODE_NOP;
dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
- &pb->phys, getptr);
+ &pb->dma, getptr);
getptr = (getptr + 8) & (pb->size_bytes - 1);
}
@@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma)
HOST1X_CHANNEL_DMACTRL);
/* set base, put and end pointer */
- host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+ host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
- host1x_ch_writel(ch, cdma->push_buffer.phys +
+ host1x_ch_writel(ch, cdma->push_buffer.dma +
cdma->push_buffer.size_bytes + 4,
HOST1X_CHANNEL_DMAEND);
@@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
HOST1X_CHANNEL_DMACTRL);
/* set base, end pointer (all of memory) */
- host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
- host1x_ch_writel(ch, cdma->push_buffer.phys +
+ host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
+ host1x_ch_writel(ch, cdma->push_buffer.dma +
cdma->push_buffer.size_bytes,
HOST1X_CHANNEL_DMAEND);
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index a91b7c4..222d930 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
return 0;
}
-static unsigned int pin_job(struct host1x_job *job)
+static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
+ int err;
unsigned int i;
job->num_unpins = 0;
@@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job)
dma_addr_t phys_addr;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
- if (!reloc->target.bo)
+ if (!reloc->target.bo) {
+ err = -EINVAL;
goto unpin;
+ }
phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
- if (!phys_addr)
+ if (!phys_addr) {
+ err = -EINVAL;
goto unpin;
+ }
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo;
@@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job)
struct sg_table *sgt;
dma_addr_t phys_addr;
+ size_t gather_size = 0;
+ struct scatterlist *sg;
+ struct iova *alloc;
+ unsigned long shift;
+ int j;
+
g->bo = host1x_bo_get(g->bo);
- if (!g->bo)
+ if (!g->bo) {
+ err = -EINVAL;
goto unpin;
+ }
phys_addr = host1x_bo_pin(g->bo, &sgt);
- if (!phys_addr)
+ if (!phys_addr) {
+ err = -EINVAL;
goto unpin;
+ }
+
+ if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
+ for_each_sg(sgt->sgl, sg, sgt->nents, j) {
+ gather_size += sg->length;
+ }
+ gather_size = iova_align(&host->iova, gather_size);
+
+ shift = iova_shift(&host->iova);
+ alloc = alloc_iova(
+ &host->iova, gather_size >> shift,
+ host->domain->geometry.aperture_end >> shift,
+ true);
+ if (!alloc) {
+ err = -ENOMEM;
+ goto unpin;
+ }
+
+ err = iommu_map_sg(host->domain,
+ iova_dma_addr(&host->iova, alloc),
+ sgt->sgl, sgt->nents, IOMMU_READ);
+ if (err == 0) {
+ __free_iova(&host->iova, alloc);
+ err = -EINVAL;
+ goto unpin;
+ }
+
+ job->addr_phys[job->num_unpins] =
+ iova_dma_addr(&host->iova, alloc);
+ job->unpins[job->num_unpins].size = gather_size;
+ } else {
+ job->addr_phys[job->num_unpins] = phys_addr;
+ }
+
+ job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
- job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt;
job->num_unpins++;
}
- return job->num_unpins;
+ return 0;
unpin:
host1x_job_unpin(job);
- return 0;
+ return err;
}
static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
@@ -525,8 +573,8 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
host1x_syncpt_load(host->syncpt + i);
/* pin memory */
- err = pin_job(job);
- if (!err)
+ err = pin_job(host, job);
+ if (err)
goto out;
/* patch gathers */
@@ -569,11 +617,19 @@ EXPORT_SYMBOL(host1x_job_pin);
void host1x_job_unpin(struct host1x_job *job)
{
+ struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
unsigned int i;
for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i];
+ if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
+ iommu_unmap(host->domain, job->addr_phys[i],
+ unpin->size);
+ free_iova(&host->iova,
+ iova_pfn(&host->iova, job->addr_phys[i]));
+ }
+
host1x_bo_unpin(unpin->bo, unpin->sgt);
host1x_bo_put(unpin->bo);
}
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 8b3c15d..878239c4 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -44,6 +44,7 @@ struct host1x_waitchk {
struct host1x_job_unpin_data {
struct host1x_bo *bo;
struct sg_table *sgt;
+ size_t size;
};
/*
--
2.10.2
More information about the dri-devel
mailing list