[PATCH v2 3/3] drm/panfrost: Add initial panfrost driver

Mon Apr 1 19:12:03 UTC 2019

On 01/04/2019 08:47, Rob Herring wrote:
> This adds the initial driver for panfrost which supports Arm Mali
> Midgard and Bifrost family of GPUs. Currently, only the T860 and
> T760 Midgard GPUs have been tested.

FWIW, on an antique T624 (Juno) it seems to work no worse than the kbase 
driver plus panfrost-nondrm, which is to say it gets far enough to prove 
that the userspace definitely doesn't support T624 (kmscube manages to 
show a grey background, but the GPU is constantly falling over with page 
faults trying to dereference address 0 - for obvious reasons I'm not 
going to get any further involved in debugging that).

A couple of discoveries and general observations below.

> v2:
> - Add GPU reset on job hangs (Tomeu)
> - Add RuntimePM and devfreq support (Tomeu)
> - Fix T760 support (Tomeu)
> - Add a TODO file (Rob, Tomeu)
> - Support multiple in fences (Tomeu)
> - Drop support for shared fences (Tomeu)
> - Fill in MMU de-init (Rob)
> - Move register definitions back to single header (Rob)
> - Clean-up hardcoded job submit todos (Rob)
> - Implement feature setup based on features/issues (Rob)
> - Add remaining Midgard DT compatible strings (Rob)
> 
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Cc: Maxime Ripard <maxime.ripard at bootlin.com>
> Cc: Sean Paul <sean at poorly.run>
> Cc: David Airlie <airlied at linux.ie>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Cc: Alyssa Rosenzweig <alyssa at rosenzweig.io>
> Cc: Lyude Paul <lyude at redhat.com>
> Cc: Eric Anholt <eric at anholt.net>
> Signed-off-by: Marty E. Plummer <hanetzer at startmail.com>
> Signed-off-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>
> Signed-off-by: Rob Herring <robh at kernel.org>
> ---
[...]
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
> new file mode 100644
> index 000000000000..227ba5202a6f
> --- /dev/null
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> @@ -0,0 +1,227 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright 2018 Marty E. Plummer <hanetzer at startmail.com> */
> +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh at kernel.org> */
> +
> +#include <linux/clk.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/regulator/consumer.h>
> +
> +#include "panfrost_device.h"
> +#include "panfrost_devfreq.h"
> +#include "panfrost_features.h"
> +#include "panfrost_gpu.h"
> +#include "panfrost_job.h"
> +#include "panfrost_mmu.h"
> +
> +static int panfrost_clk_init(struct panfrost_device *pfdev)
> +{
> +	int err;
> +	unsigned long rate;
> +
> +	pfdev->clock = devm_clk_get(pfdev->dev, NULL);
> +	if (IS_ERR(pfdev->clock)) {

The DT binding says clocks are optional, but this doesn't treat them as 
such.

> +		dev_err(pfdev->dev, "get clock failed %ld\n", PTR_ERR(pfdev->clock));
> +		return PTR_ERR(pfdev->clock);
> +	}
> +
> +	rate = clk_get_rate(pfdev->clock);
> +	dev_info(pfdev->dev, "clock rate = %lu\n", rate);
> +
> +	err = clk_prepare_enable(pfdev->clock);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
[...]
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> new file mode 100644
> index 000000000000..57a99032bcc6
> --- /dev/null
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
[...]
> +static int panfrost_probe(struct platform_device *pdev)
> +{
> +	struct panfrost_device *pfdev;
> +	struct drm_device *ddev;
> +	int err;
> +
> +	pfdev = devm_kzalloc(&pdev->dev, sizeof(*pfdev), GFP_KERNEL);
> +	if (!pfdev)
> +		return -ENOMEM;
> +
> +	pfdev->pdev = pdev;
> +	pfdev->dev = &pdev->dev;
> +
> +	platform_set_drvdata(pdev, pfdev);
> +
> +	/* Allocate and initialze the DRM device. */
> +	ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev);
> +	if (IS_ERR(ddev))
> +		return PTR_ERR(ddev);
> +
> +	ddev->dev_private = pfdev;
> +	pfdev->ddev = ddev;
> +
> +	spin_lock_init(&pfdev->mm_lock);
> +
> +	/* 4G enough for now. can be 48-bit */
> +	drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, SZ_4G);

You probably want a dma_set_mask_and_coherent() call for your 'real' 
output address size somewhere - the default 32-bit mask works out OK for 
RK3399, but on systems with RAM above 4GB io-pgtable will get very 
unhappy about DMA bounce-buffering.

> +
> +	pm_runtime_use_autosuspend(pfdev->dev);
> +	pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
> +	pm_runtime_enable(pfdev->dev);
> +
> +	err = panfrost_device_init(pfdev);
> +	if (err) {
> +		dev_err(&pdev->dev, "Fatal error during GPU init\n");
> +		goto err_out0;
> +	}
> +
> +	err = panfrost_devfreq_init(pfdev);
> +	if (err) {
> +		dev_err(&pdev->dev, "Fatal error during devfreq init\n");
> +		goto err_out1;
> +	}
> +
> +	/*
> +	 * Register the DRM device with the core and the connectors with
> +	 * sysfs
> +	 */
> +	err = drm_dev_register(ddev, 0);
> +	if (err < 0)
> +		goto err_out1;
> +
> +	return 0;
> +
> +err_out1:
> +	panfrost_device_fini(pfdev);
> +err_out0:
> +	drm_dev_put(ddev);

Reloading the module after a failed probe complains about an unbalanced 
pm_runtime_enable(), so I guess you need a disable somewhere around here.

> +	return err;
> +}
> +
> +static int panfrost_remove(struct platform_device *pdev)
> +{
> +	struct panfrost_device *pfdev = platform_get_drvdata(pdev);
> +	struct drm_device *ddev = pfdev->ddev;
> +
> +	drm_dev_unregister(ddev);
> +	pm_runtime_get_sync(pfdev->dev);
> +	pm_runtime_put_sync_autosuspend(pfdev->dev);
> +	pm_runtime_disable(pfdev->dev);
> +	panfrost_device_fini(pfdev);
> +	drm_dev_put(ddev);
> +	return 0;
> +}
> +
> +static const struct of_device_id dt_match[] = {
> +	{ .compatible = "arm,mali-t604" },
> +	{ .compatible = "arm,mali-t624" },
> +	{ .compatible = "arm,mali-t628" },
> +	{ .compatible = "arm,mali-t720" },
> +	{ .compatible = "arm,mali-t760" },
> +	{ .compatible = "arm,mali-t820" },
> +	{ .compatible = "arm,mali-t830" },
> +	{ .compatible = "arm,mali-t860" },
> +	{ .compatible = "arm,mali-t880" },

Any chance of resurrecting the generic "arm,mali-midgard" compatible? :P

> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, dt_match);
> +
> +static const struct dev_pm_ops panfrost_pm_ops = {
> +	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
> +	SET_RUNTIME_PM_OPS(panfrost_device_suspend, panfrost_device_resume, NULL)
> +};
> +
> +static struct platform_driver panfrost_driver = {
> +	.probe		= panfrost_probe,
> +	.remove		= panfrost_remove,
> +	.driver		= {
> +		.name	= "panfrost",
> +		.pm	= &panfrost_pm_ops,
> +		.of_match_table = dt_match,
> +	},
> +};
> +module_platform_driver(panfrost_driver);
> +
> +MODULE_AUTHOR("Panfrost Project Developers");
> +MODULE_DESCRIPTION("Panfrost DRM Driver");
> +MODULE_LICENSE("GPL v2");
[...]
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> new file mode 100644
> index 000000000000..867e2ba3a761
> --- /dev/null
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
[...]
> +static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
> +{
> +	u32 quirks = 0;
> +
> +	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8443) ||
> +	    panfrost_has_hw_issue(pfdev, HW_ISSUE_11035))
> +		quirks |= SC_LS_PAUSEBUFFER_DISABLE;
> +
> +	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10327))
> +		quirks |= SC_SDC_DISABLE_OQ_DISCARD;
> +
> +	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10797))
> +		quirks |= SC_ENABLE_TEXGRD_FLAGS;
> +
> +	if (!panfrost_has_hw_issue(pfdev, GPUCORE_1619)) {
> +		if (panfrost_model_cmp(pfdev, 0x750) < 0) /* T60x, T62x, T72x */
> +			quirks |= SC_LS_ATTR_CHECK_DISABLE;
> +		else if (panfrost_model_cmp(pfdev, 0x880) <= 0) /* T76x, T8xx */
> +			quirks |= SC_LS_ALLOW_ATTR_TYPES;
> +	}
> +
> +	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_TLS_HASHING))
> +		quirks |= SC_TLS_HASH_ENABLE;
> +
> +	if (quirks)
> +		gpu_write(pfdev, GPU_SHADER_CONFIG, quirks);
> +
> +
> +	quirks = gpu_read(pfdev, GPU_TILER_CONFIG);
> +
> +	/* Set tiler clock gate override if required */
> +	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_T76X_3953))
> +		quirks |= TC_CLOCK_GATE_OVERRIDE;
> +
> +	gpu_write(pfdev, GPU_TILER_CONFIG, quirks);
> +
> +
> +	quirks = gpu_read(pfdev, GPU_L2_MMU_CONFIG);
> +
> +	/* Limit read & write ID width for AXI */
> +	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
> +		quirks &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS |
> +			    L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
> +	else
> +		quirks &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS |
> +			    L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
> +
> +#if 0
> +	if (kbdev->system_coherency == COHERENCY_ACE) {
> +		/* Allow memory configuration disparity to be ignored, we
> +		 * optimize the use of shared memory and thus we expect
> +		 * some disparity in the memory configuration */
> +		quirks |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;

Well that sounds terrifying; I rather wish my brain had preprocessed 
that #if already.

> +	}
> +#endif
> +	gpu_write(pfdev, GPU_L2_MMU_CONFIG, quirks);
> +
> +	quirks = 0;
> +	if ((panfrost_model_eq(pfdev, 0x860) || panfrost_model_eq(pfdev, 0x880)) &&
> +	    pfdev->features.revision >= 0x2000)
> +		quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT;
> +	else if (panfrost_model_eq(pfdev, 0x6000) &&
> +		 pfdev->features.coherency_features == COHERENCY_ACE)
> +		quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
> +			   JM_FORCE_COHERENCY_FEATURES_SHIFT;

Experience says you can never really trust what ID registers claim about 
system integration stuff like coherency, because eventually someone will 
get a tieoff wrong and make it all fall apart. If even the vendor driver 
has a DT override for it you know you're on thin ice ;)

Ultimately, most of your I/O coherency behaviour will be governed by 
what the DMA API thinks (based on "dma-coherent"), so if you end up with 
mismatched expectations at the point coherency_features gets set up then 
you're liable to have a bad time. See the arm-smmu drivers for prior 
examples of handling the equivalent thing.

Robin.

> +
> +	if (quirks)
> +		gpu_write(pfdev, GPU_JM_CONFIG, quirks);
> +}