On Fri 17 Sep 07:59 CDT 2021, Alexandre Bailon wrote:
Some Mediatek SoC provides hardware accelerator for AI / ML. This driver use the DRM driver to manage the shared memory, and use rpmsg to execute jobs on the APU.
Signed-off-by: Alexandre Bailon abailon@baylibre.com
drivers/rpmsg/Kconfig | 10 +++ drivers/rpmsg/Makefile | 1 + drivers/rpmsg/apu_rpmsg.c | 184 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 drivers/rpmsg/apu_rpmsg.c
diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig index 0b4407abdf138..fc1668f795004 100644 --- a/drivers/rpmsg/Kconfig +++ b/drivers/rpmsg/Kconfig @@ -73,4 +73,14 @@ config RPMSG_VIRTIO select RPMSG_NS select VIRTIO
+config RPMSG_APU
- tristate "APU RPMSG driver"
- select REMOTEPROC
- select RPMSG_VIRTIO
- select DRM_APU
- help
This provides a RPMSG driver that provides some facilities to
communicate with an accelerated processing unit (APU).
This Uses the APU DRM driver to manage memory and job scheduling.
Similar to how a driver for e.g. an I2C device doesn't live in drivers/i2c, this doesn't belong in drivers/rpmsg. Probably rather directly in the DRM driver.
endmenu diff --git a/drivers/rpmsg/Makefile b/drivers/rpmsg/Makefile index 8d452656f0ee3..8b336b9a817c1 100644 --- a/drivers/rpmsg/Makefile +++ b/drivers/rpmsg/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_RPMSG_QCOM_GLINK_RPM) += qcom_glink_rpm.o obj-$(CONFIG_RPMSG_QCOM_GLINK_SMEM) += qcom_glink_smem.o obj-$(CONFIG_RPMSG_QCOM_SMD) += qcom_smd.o obj-$(CONFIG_RPMSG_VIRTIO) += virtio_rpmsg_bus.o +obj-$(CONFIG_RPMSG_APU) += apu_rpmsg.o diff --git a/drivers/rpmsg/apu_rpmsg.c b/drivers/rpmsg/apu_rpmsg.c new file mode 100644 index 0000000000000..7e504bd176a4d --- /dev/null +++ b/drivers/rpmsg/apu_rpmsg.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright 2020 BayLibre SAS
+#include <asm/cacheflush.h>
+#include <linux/cdev.h> +#include <linux/dma-buf.h> +#include <linux/dma-map-ops.h> +#include <linux/dma-mapping.h> +#include <linux/iommu.h> +#include <linux/iova.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/remoteproc.h> +#include <linux/rpmsg.h> +#include <linux/slab.h> +#include <linux/types.h>
+#include <drm/apu_drm.h>
+#include "rpmsg_internal.h"
+#define APU_RPMSG_SERVICE_MT8183 "rpmsg-mt8183-apu0"
+struct rpmsg_apu {
- struct apu_core *core;
- struct rpmsg_device *rpdev;
+};
+static int apu_rpmsg_callback(struct rpmsg_device *rpdev, void *data, int count,
void *priv, u32 addr)
+{
- struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
- struct apu_core *apu_core = apu->core;
- return apu_drm_callback(apu_core, data, count);
+}
+static int apu_rpmsg_send(struct apu_core *apu_core, void *data, int len) +{
- struct rpmsg_apu *apu = apu_drm_priv(apu_core);
- struct rpmsg_device *rpdev = apu->rpdev;
- return rpmsg_send(rpdev->ept, data, len);
The rpmsg API is exposed outside drivers/rpmsg, so as I said above, just implement this directly in your driver, no need to lug around a dummy wrapper for things like this.
+}
+static struct apu_drm_ops apu_rpmsg_ops = {
- .send = apu_rpmsg_send,
+};
+static int apu_init_iovad(struct rproc *rproc, struct rpmsg_apu *apu) +{
- struct resource_table *table;
- struct fw_rsc_carveout *rsc;
- int i;
- if (!rproc->table_ptr) {
dev_err(&apu->rpdev->dev,
"No resource_table: has the firmware been loaded ?\n");
return -ENODEV;
- }
- table = rproc->table_ptr;
- for (i = 0; i < table->num; i++) {
int offset = table->offset[i];
struct fw_rsc_hdr *hdr = (void *)table + offset;
if (hdr->type != RSC_CARVEOUT)
continue;
rsc = (void *)hdr + sizeof(*hdr);
if (apu_drm_reserve_iova(apu->core, rsc->da, rsc->len)) {
dev_err(&apu->rpdev->dev,
"failed to reserve iova\n");
return -ENOMEM;
}
- }
- return 0;
+}
+static struct rproc *apu_get_rproc(struct rpmsg_device *rpdev) +{
- /*
* To work, the APU RPMsg driver need to get the rproc device.
* Currently, we only use virtio so we could use that to find the
* remoteproc parent.
*/
- if (!rpdev->dev.parent && rpdev->dev.parent->bus) {
dev_err(&rpdev->dev, "invalid rpmsg device\n");
return ERR_PTR(-EINVAL);
- }
- if (strcmp(rpdev->dev.parent->bus->name, "virtio")) {
dev_err(&rpdev->dev, "unsupported bus\n");
return ERR_PTR(-EINVAL);
- }
- return vdev_to_rproc(dev_to_virtio(rpdev->dev.parent));
+}
+static int apu_rpmsg_probe(struct rpmsg_device *rpdev) +{
- struct rpmsg_apu *apu;
- struct rproc *rproc;
- int ret;
- apu = devm_kzalloc(&rpdev->dev, sizeof(*apu), GFP_KERNEL);
- if (!apu)
return -ENOMEM;
- apu->rpdev = rpdev;
- rproc = apu_get_rproc(rpdev);
I believe that you can replace apu_get_rproc() with:
rproc = rproc_get_by_child(&rpdev->dev);
- if (IS_ERR_OR_NULL(rproc))
return PTR_ERR(rproc);
- /* Make device dma capable by inheriting from parent's capabilities */
- set_dma_ops(&rpdev->dev, get_dma_ops(rproc->dev.parent));
- ret = dma_coerce_mask_and_coherent(&rpdev->dev,
dma_get_mask(rproc->dev.parent));
- if (ret)
goto err_put_device;
- rpdev->dev.iommu_group = rproc->dev.parent->iommu_group;
Would it be better or you if we have a device_node, so that you could specify the iommus property for this compute device?
I'm asking because I've seen cases where multi-purpose remoteproc firmware operate using multiple different iommu streams...
- apu->core = apu_drm_register_core(rproc, &apu_rpmsg_ops, apu);
- if (!apu->core) {
ret = -ENODEV;
goto err_put_device;
- }
- ret = apu_init_iovad(rproc, apu);
- dev_set_drvdata(&rpdev->dev, apu);
- return ret;
+err_put_device:
This label looks misplaced, and sure enough, if apu_init_iovad() fails you're not apu_drm_unregister_core().
But on that note, don't you want to apu_init_iovad() before you apu_drm_register_core()?
- devm_kfree(&rpdev->dev, apu);
The reason for using devm_kzalloc() is that once you return unsuccessfully from probe, or from remove the memory is freed.
So devm_kfree() should go in both cases.
- return ret;
+}
+static void apu_rpmsg_remove(struct rpmsg_device *rpdev) +{
- struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
- apu_drm_unregister_core(apu);
- devm_kfree(&rpdev->dev, apu);
No need to explicitly free devm resources.
Regards, Bjorn
+}
+static const struct rpmsg_device_id apu_rpmsg_match[] = {
- { APU_RPMSG_SERVICE_MT8183 },
- {}
+};
+static struct rpmsg_driver apu_rpmsg_driver = {
- .probe = apu_rpmsg_probe,
- .remove = apu_rpmsg_remove,
- .callback = apu_rpmsg_callback,
- .id_table = apu_rpmsg_match,
- .drv = {
.name = "apu_rpmsg",
- },
+};
+static int __init apu_rpmsg_init(void) +{
- return register_rpmsg_driver(&apu_rpmsg_driver);
+} +arch_initcall(apu_rpmsg_init);
+static void __exit apu_rpmsg_exit(void) +{
- unregister_rpmsg_driver(&apu_rpmsg_driver);
+} +module_exit(apu_rpmsg_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("APU RPMSG driver");
2.31.1