[PATCH v3 0/3] Add option to mmap GEM buffers cached

List overview All Threads
Download

newer

older

[PATCH] drm: simpledrm: fix a...

[bug report] drm: remove usage of...

Paul Cercueil

14 May 2021 14 May '21

8:11 p.m.

oRework of my previous patchset which added support for GEM buffers backed by non-coherent memory to the ingenic-drm driver.

Having GEM buffers backed by non-coherent memory is interesting in the particular case where it is faster to render to a non-coherent buffer then sync the data cache, than to render to a write-combine buffer, and (by extension) much faster than using a shadow buffer. This is true for instance on some Ingenic SoCs, where even simple blits (e.g. memcpy) are about three times faster using this method.

For the record, a previous patchset was accepted for 5.10 then had to be reverted, as it conflicted with some changes made to the DMA API.

The first two patches add support for cached GEM buffers in the DRM core, the third patch adds support for this functionality in the ingenic-drm driver for the JZ4770 SoC.

Cheers, -Paul

Paul Cercueil (3): drm: Add support for GEM buffers backed by non-coherent memory drm: Add and export function drm_gem_cma_sync_data drm/ingenic: Add option to alloc cached GEM buffers

drivers/gpu/drm/drm_gem_cma_helper.c | 96 ++++++++++++++++++++++- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 56 ++++++++++++- drivers/gpu/drm/ingenic/ingenic-ipu.c | 18 ++++- include/drm/drm_gem_cma_helper.h | 12 ++- 4 files changed, 171 insertions(+), 11 deletions(-)

-- 2.30.2

Show replies by date

Paul Cercueil

14 May 14 May

8:11 p.m.

New subject: [PATCH v3 1/3] drm: Add support for GEM buffers backed by non-coherent memory

Add a 'map_noncoherent' flag to the drm_gem_cma_object structure, which can be set by the drivers when they create the dumb buffer.

Since this really only applies to software rendering, disable this flag as soon as the CMA objects are exported via PRIME.

v3: New patch. Now uses a simple 'map_noncoherent' flag to control how the objects are mapped, and use the new dma_mmap_pages function.

Signed-off-by: Paul Cercueil paul@crapouillou.net --- drivers/gpu/drm/drm_gem_cma_helper.c | 41 +++++++++++++++++++++++++--- include/drm/drm_gem_cma_helper.h | 7 ++++- 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 7942cf05cd93..81a31bcf7d68 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -115,8 +115,15 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, if (IS_ERR(cma_obj)) return cma_obj;

- cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr, - GFP_KERNEL | __GFP_NOWARN); + if (cma_obj->map_noncoherent) { + cma_obj->vaddr = dma_alloc_noncoherent(drm->dev, size, + &cma_obj->paddr, + DMA_TO_DEVICE, + GFP_KERNEL | __GFP_NOWARN); + } else { + cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr, + GFP_KERNEL | __GFP_NOWARN); + } if (!cma_obj->vaddr) { drm_dbg(drm, "failed to allocate buffer with size %zu\n", size); @@ -499,8 +506,13 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)

cma_obj = to_drm_gem_cma_obj(obj);

- ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr, - cma_obj->paddr, vma->vm_end - vma->vm_start); + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + if (!cma_obj->map_noncoherent) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + ret = dma_mmap_pages(cma_obj->base.dev->dev, + vma, vma->vm_end - vma->vm_start, + virt_to_page(cma_obj->vaddr)); if (ret) drm_gem_vm_close(vma);

@@ -556,3 +568,24 @@ drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *dev, return obj; } EXPORT_SYMBOL(drm_gem_cma_prime_import_sg_table_vmap); + +/** + * drm_gem_cma_prime_mmap - PRIME mmap function for CMA GEM drivers + * @obj: GEM object + * @vma: Virtual address range + * + * Carbon copy of drm_gem_prime_mmap, but the 'map_noncoherent' flag is + * disabled to ensure that the exported buffers have the expected cache + * coherency. + */ +int drm_gem_cma_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) +{ + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + + /* Use standard cache settings for PRIME-exported GEM buffers */ + cma_obj->map_noncoherent = false; + + return drm_gem_prime_mmap(obj, vma); +} +EXPORT_SYMBOL(drm_gem_cma_prime_mmap); diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index 0a9711caa3e8..b597e00fd5f6 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -16,6 +16,7 @@ struct drm_mode_create_dumb; * more than one entry but they are guaranteed to have contiguous * DMA addresses. * @vaddr: kernel virtual address of the backing memory + * @map_noncoherent: if true, the GEM object is backed by non-coherent memory */ struct drm_gem_cma_object { struct drm_gem_object base; @@ -24,6 +25,8 @@ struct drm_gem_cma_object {

/* For objects with DMA memory allocated by GEM CMA */ void *vaddr; + + bool map_noncoherent; };

#define to_drm_gem_cma_obj(gem_obj) \ @@ -119,7 +122,7 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); .prime_handle_to_fd = drm_gem_prime_handle_to_fd, \ .prime_fd_to_handle = drm_gem_prime_fd_to_handle, \ .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, \ - .gem_prime_mmap = drm_gem_prime_mmap + .gem_prime_mmap = drm_gem_cma_prime_mmap

/** * DRM_GEM_CMA_DRIVER_OPS - CMA GEM driver operations @@ -181,5 +184,7 @@ struct drm_gem_object * drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm, struct dma_buf_attachment *attach, struct sg_table *sgt); +int drm_gem_cma_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma);

#endif /* __DRM_GEM_CMA_HELPER_H__ */

-- 2.30.2

Thomas Zimmermann

15 May 15 May

9:03 a.m.

New subject: [PATCH v3 1/3] drm: Add support for GEM buffers backed by non-coherent memory

Am 14.05.21 um 22:11 schrieb Paul Cercueil:

...

Having GEM buffers backed by non-coherent memory is interesting in the particular case where it is faster to render to a non-coherent buffer then sync the data cache, than to render to a write-combine buffer, and (by extension) much faster than using a shadow buffer. This is true for instance on some Ingenic SoCs, where even simple blits (e.g. memcpy) are about three times faster using this method.

Add a 'map_noncoherent' flag to the drm_gem_cma_object structure, which can be set by the drivers when they create the dumb buffer.

Since this really only applies to software rendering, disable this flag as soon as the CMA objects are exported via PRIME.

v3: New patch. Now uses a simple 'map_noncoherent' flag to control how the objects are mapped, and use the new dma_mmap_pages function.

Signed-off-by: Paul Cercueil paul@crapouillou.net

drivers/gpu/drm/drm_gem_cma_helper.c | 41 +++++++++++++++++++++++++--- include/drm/drm_gem_cma_helper.h | 7 ++++- 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 7942cf05cd93..81a31bcf7d68 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -115,8 +115,15 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, if (IS_ERR(cma_obj)) return cma_obj;
cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
		      GFP_KERNEL | __GFP_NOWARN);
if (cma_obj->map_noncoherent) {
cma_obj->vaddr = dma_alloc_noncoherent(drm->dev, size,
				       &cma_obj->paddr,
				       DMA_TO_DEVICE,
				       GFP_KERNEL | __GFP_NOWARN);
} else {
cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
			      GFP_KERNEL | __GFP_NOWARN);
} if (!cma_obj->vaddr) { drm_dbg(drm, "failed to allocate buffer with size %zu\n", size);
@@ -499,8 +506,13 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)

cma_obj = to_drm_gem_cma_obj(obj);
ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
	  cma_obj->paddr, vma->vm_end - vma->vm_start);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);

if (!cma_obj->map_noncoherent)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
ret = dma_mmap_pages(cma_obj->base.dev->dev,
	     vma, vma->vm_end - vma->vm_start,
	     virt_to_page(cma_obj->vaddr));
if (ret) drm_gem_vm_close(vma);
@@ -556,3 +568,24 @@ drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *dev, return obj; } EXPORT_SYMBOL(drm_gem_cma_prime_import_sg_table_vmap);

+/**

drm_gem_cma_prime_mmap - PRIME mmap function for CMA GEM drivers

@obj: GEM object

@vma: Virtual address range

Carbon copy of drm_gem_prime_mmap, but the 'map_noncoherent' flag is

disabled to ensure that the exported buffers have the expected cache

coherency.

*/

+int drm_gem_cma_prime_mmap(struct drm_gem_object *obj,
	   struct vm_area_struct *vma)
+{

struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);

/* Use standard cache settings for PRIME-exported GEM buffers */

cma_obj->map_noncoherent = false;

return drm_gem_prime_mmap(obj, vma);

+} +EXPORT_SYMBOL(drm_gem_cma_prime_mmap); diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index 0a9711caa3e8..b597e00fd5f6 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -16,6 +16,7 @@ struct drm_mode_create_dumb;
  more than one entry but they are guaranteed to have contiguous
  DMA addresses.
@vaddr: kernel virtual address of the backing memory
@map_noncoherent: if true, the GEM object is backed by non-coherent

memory

...

*/ struct drm_gem_cma_object { struct drm_gem_object base; @@ -24,6 +25,8 @@ struct drm_gem_cma_object {

/* For objects with DMA memory allocated by GEM CMA */ void *vaddr;

bool map_noncoherent; };

#define to_drm_gem_cma_obj(gem_obj) \

@@ -119,7 +122,7 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); .prime_handle_to_fd = drm_gem_prime_handle_to_fd, \ .prime_fd_to_handle = drm_gem_prime_fd_to_handle, \ .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, \

.gem_prime_mmap = drm_gem_prime_mmap

.gem_prime_mmap = drm_gem_cma_prime_mmap

gem_prime_mmap is deprecated and on the way out. Only mmap in drm_gem_object_funcs should be used. I have patches for other drivers that convert everything to drm_gem_prime_mmap. Afterwards the pointer can be removed.

Rather than writing a custom prime mmap function, update drm_gem_cma_prime_import_sg_table() so that it disables non-coherent mappings for imported buffers. For an example, see how SHMEM's internal create function uses the 'private' parameter. [1]

Best regards Thomas

[1] https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/drm_gem_shmem...

...

/**

DRM_GEM_CMA_DRIVER_OPS - CMA GEM driver operations

@@ -181,5 +184,7 @@ struct drm_gem_object * drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm, struct dma_buf_attachment *attach, struct sg_table *sgt); +int drm_gem_cma_prime_mmap(struct drm_gem_object *obj,
	   struct vm_area_struct *vma);
#endif /* __DRM_GEM_CMA_HELPER_H__ */

-- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Felix Imendörffer

Paul Cercueil

9:14 a.m.

New subject: [PATCH v3 1/3] drm: Add support for GEM buffers backed by non-coherent memory

Hi Thomas,

Le sam., mai 15 2021 at 11:03:34 +0200, Thomas Zimmermann tzimmermann@suse.de a écrit :

...

Hi

Am 14.05.21 um 22:11 schrieb Paul Cercueil:

...
Having GEM buffers backed by non-coherent memory is interesting in the particular case where it is faster to render to a non-coherent buffer then sync the data cache, than to render to a write-combine buffer, and (by extension) much faster than using a shadow buffer. This is true for instance on some Ingenic SoCs, where even simple blits (e.g. memcpy) are about three times faster using this method.

Add a 'map_noncoherent' flag to the drm_gem_cma_object structure, which can be set by the drivers when they create the dumb buffer.

Since this really only applies to software rendering, disable this flag as soon as the CMA objects are exported via PRIME.

v3: New patch. Now uses a simple 'map_noncoherent' flag to control how the objects are mapped, and use the new dma_mmap_pages function.

Signed-off-by: Paul Cercueil paul@crapouillou.net

drivers/gpu/drm/drm_gem_cma_helper.c | 41 +++++++++++++++++++++++++--- include/drm/drm_gem_cma_helper.h | 7 ++++- 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 7942cf05cd93..81a31bcf7d68 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -115,8 +115,15 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, if (IS_ERR(cma_obj)) return cma_obj; - cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
		      GFP_KERNEL | __GFP_NOWARN);
if (cma_obj->map_noncoherent) {
cma_obj->vaddr = dma_alloc_noncoherent(drm->dev, size,
				       &cma_obj->paddr,
				       DMA_TO_DEVICE,
				       GFP_KERNEL | __GFP_NOWARN);
} else {
cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
			      GFP_KERNEL | __GFP_NOWARN);
} if (!cma_obj->vaddr) { drm_dbg(drm, "failed to allocate buffer with size %zu\n", size);
@@ -499,8 +506,13 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) cma_obj = to_drm_gem_cma_obj(obj); - ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
	  cma_obj->paddr, vma->vm_end - vma->vm_start);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);

if (!cma_obj->map_noncoherent)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
ret = dma_mmap_pages(cma_obj->base.dev->dev,
	     vma, vma->vm_end - vma->vm_start,
	     virt_to_page(cma_obj->vaddr));
if (ret) drm_gem_vm_close(vma); @@ -556,3 +568,24 @@
drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *dev, return obj; } EXPORT_SYMBOL(drm_gem_cma_prime_import_sg_table_vmap);

+/**

drm_gem_cma_prime_mmap - PRIME mmap function for CMA GEM drivers

@obj: GEM object

@vma: Virtual address range

Carbon copy of drm_gem_prime_mmap, but the 'map_noncoherent'

flag is

disabled to ensure that the exported buffers have the expected

cache

coherency.

*/

+int drm_gem_cma_prime_mmap(struct drm_gem_object *obj,
	   struct vm_area_struct *vma)
+{

struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);

/* Use standard cache settings for PRIME-exported GEM buffers */

cma_obj->map_noncoherent = false;

return drm_gem_prime_mmap(obj, vma);

+} +EXPORT_SYMBOL(drm_gem_cma_prime_mmap); diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index 0a9711caa3e8..b597e00fd5f6 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -16,6 +16,7 @@ struct drm_mode_create_dumb;
  more than one entry but they are guaranteed to have 
contiguous
  DMA addresses.
@vaddr: kernel virtual address of the backing memory
@map_noncoherent: if true, the GEM object is backed by

non-coherent memory */ struct drm_gem_cma_object { struct drm_gem_object base; @@ -24,6 +25,8 @@ struct drm_gem_cma_object { /* For objects with DMA memory allocated by GEM CMA */ void *vaddr;

bool map_noncoherent; }; #define to_drm_gem_cma_obj(gem_obj) \

@@ -119,7 +122,7 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); .prime_handle_to_fd = drm_gem_prime_handle_to_fd, \ .prime_fd_to_handle = drm_gem_prime_fd_to_handle, \ .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, \

.gem_prime_mmap = drm_gem_prime_mmap

.gem_prime_mmap = drm_gem_cma_prime_mmap
gem_prime_mmap is deprecated and on the way out. Only mmap in drm_gem_object_funcs should be used. I have patches for other drivers that convert everything to drm_gem_prime_mmap. Afterwards the pointer can be removed.

Rather than writing a custom prime mmap function, update drm_gem_cma_prime_import_sg_table() so that it disables non-coherent mappings for imported buffers. For an example, see how SHMEM's internal create function uses the 'private' parameter. [1]

Alright, I can do that.

Cheers, -Paul

...

Best regards Thomas

[1] https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/drm_gem_shmem...

...
/**

DRM_GEM_CMA_DRIVER_OPS - CMA GEM driver operations

@@ -181,5 +184,7 @@ struct drm_gem_object * drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm, struct dma_buf_attachment *attach, struct sg_table *sgt); +int drm_gem_cma_prime_mmap(struct drm_gem_object *obj,
	   struct vm_area_struct *vma);
#endif /* __DRM_GEM_CMA_HELPER_H__ */
-- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Felix Imendörffer

Paul Cercueil

14 May 14 May

8:11 p.m.

New subject: [PATCH v3 2/3] drm: Add and export function drm_gem_cma_sync_data

This function can be used by drivers that use damage clips and have CMA GEM objects backed by non-coherent memory. Calling this function in a plane's .atomic_update ensures that all the data in the backing memory have been written to RAM.

v3: - Only sync data if using GEM objects backed by non-coherent memory. - Use a drm_device pointer instead of device pointer in prototype

Signed-off-by: Paul Cercueil paul@crapouillou.net --- drivers/gpu/drm/drm_gem_cma_helper.c | 55 ++++++++++++++++++++++++++++ include/drm/drm_gem_cma_helper.h | 5 +++ 2 files changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 81a31bcf7d68..9dbe766c3177 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -17,9 +17,14 @@ #include <linux/slab.h>

#include <drm/drm.h> +#include <drm/drm_damage_helper.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> +#include <drm/drm_fourcc.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_framebuffer.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_plane.h> #include <drm/drm_vma_manager.h>

/** @@ -589,3 +594,53 @@ int drm_gem_cma_prime_mmap(struct drm_gem_object *obj, return drm_gem_prime_mmap(obj, vma); } EXPORT_SYMBOL(drm_gem_cma_prime_mmap); + +/** + * drm_gem_cma_sync_data - Sync GEM object to non-coherent backing memory + * @drm: DRM device + * @old_state: Old plane state + * @state: New plane state + * + * This function can be used by drivers that use damage clips and have + * CMA GEM objects backed by non-coherent memory. Calling this function + * in a plane's .atomic_update ensures that all the data in the backing + * memory have been written to RAM. + */ +void drm_gem_cma_sync_data(struct drm_device *drm, + struct drm_plane_state *old_state, + struct drm_plane_state *state) +{ + const struct drm_format_info *finfo = state->fb->format; + struct drm_atomic_helper_damage_iter iter; + const struct drm_gem_cma_object *cma_obj; + unsigned int offset, i; + struct drm_rect clip; + dma_addr_t daddr; + + for (i = 0; i < finfo->num_planes; i++) { + cma_obj = drm_fb_cma_get_gem_obj(state->fb, i); + + if (cma_obj->map_noncoherent) + break; + } + + /* No non-coherent buffers - no need to sync anything. */ + if (i == finfo->num_planes) + return; + + drm_atomic_helper_damage_iter_init(&iter, old_state, state); + + drm_atomic_for_each_plane_damage(&iter, &clip) { + for (i = 0; i < finfo->num_planes; i++) { + daddr = drm_fb_cma_get_gem_addr(state->fb, state, i); + + /* Ignore x1/x2 values, invalidate complete lines */ + offset = clip.y1 * state->fb->pitches[i]; + + dma_sync_single_for_device(drm->dev, daddr + offset, + (clip.y2 - clip.y1) * state->fb->pitches[i], + DMA_TO_DEVICE); + } + } +} +EXPORT_SYMBOL_GPL(drm_gem_cma_sync_data); diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index b597e00fd5f6..4ccc8c69e594 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -7,6 +7,7 @@ #include <drm/drm_gem.h>

struct drm_mode_create_dumb; +struct drm_plane_state;

/** * struct drm_gem_cma_object - GEM object backed by CMA memory allocations @@ -187,4 +188,8 @@ drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm, int drm_gem_cma_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);

+void drm_gem_cma_sync_data(struct drm_device *drm, + struct drm_plane_state *old_state, + struct drm_plane_state *state); + #endif /* __DRM_GEM_CMA_HELPER_H__ */

-- 2.30.2

Paul Cercueil

8:11 p.m.

New subject: [PATCH v3 3/3] drm/ingenic: Add option to alloc cached GEM buffers

With the module parameter ingenic-drm.cached_gem_buffers, it is possible to specify that we want GEM buffers backed by non-coherent memory.

This dramatically speeds up software rendering on Ingenic SoCs, even for tasks where write-combine memory should in theory be faster (e.g. simple blits).

Enable it on SoCs where it is actually faster than write-combine.

v3: The option is now selected per-SoC instead of being a module parameter.

Signed-off-by: Paul Cercueil paul@crapouillou.net --- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 56 ++++++++++++++++++++++- drivers/gpu/drm/ingenic/ingenic-ipu.c | 18 ++++++-- 2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index 09225b770bb8..5f64e8583eec 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -9,6 +9,7 @@ #include <linux/component.h> #include <linux/clk.h> #include <linux/dma-mapping.h> +#include <linux/io.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/of_device.h> @@ -23,6 +24,7 @@ #include <drm/drm_color_mgmt.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> @@ -57,6 +59,7 @@ struct ingenic_dma_hwdescs { struct jz_soc_info { bool needs_dev_clk; bool has_osd; + bool map_noncoherent; unsigned int max_width, max_height; const u32 *formats_f0, *formats_f1; unsigned int num_formats_f0, num_formats_f1; @@ -410,6 +413,8 @@ static int ingenic_drm_plane_atomic_check(struct drm_plane *plane, old_plane_state->fb->format->format != new_plane_state->fb->format->format)) crtc_state->mode_changed = true;

+ drm_atomic_helper_check_plane_damage(state, new_plane_state); + return 0; }

@@ -544,8 +549,8 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct ingenic_drm *priv = drm_device_get_priv(plane->dev); - struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state, - plane); + struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state, plane); + struct drm_plane_state *oldstate = drm_atomic_get_old_plane_state(state, plane); struct drm_crtc_state *crtc_state; struct ingenic_dma_hwdesc *hwdesc; unsigned int width, height, cpp, offset; @@ -553,6 +558,8 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, u32 fourcc;

if (newstate && newstate->fb) { + drm_gem_cma_sync_data(&priv->drm, oldstate, newstate); + crtc_state = newstate->crtc->state;

addr = drm_fb_cma_get_gem_addr(newstate->fb, newstate, 0); @@ -742,6 +749,43 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc) regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, JZ_LCD_CTRL_EOF_IRQ, 0); }

+static int ingenic_drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int flags, + unsigned int color, + struct drm_clip_rect *clips, + unsigned int num_clips) +{ + struct ingenic_drm *priv = drm_device_get_priv(fb->dev); + + if (!priv->soc_info->map_noncoherent) + return 0; + + return drm_atomic_helper_dirtyfb(fb, file_priv, flags, + color, clips, num_clips); +} + +static const struct drm_framebuffer_funcs ingenic_drm_gem_fb_funcs = { + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, + .dirty = ingenic_drm_atomic_helper_dirtyfb, +}; + +static struct drm_gem_object * +ingenic_drm_gem_create_object(struct drm_device *drm, size_t size) +{ + struct ingenic_drm *priv = drm_device_get_priv(drm); + struct drm_gem_cma_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + obj->map_noncoherent = priv->soc_info->map_noncoherent; + + return &obj->base; +} + DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops);

static const struct drm_driver ingenic_drm_driver_data = { @@ -754,6 +798,7 @@ static const struct drm_driver ingenic_drm_driver_data = { .patchlevel = 0,

.fops = &ingenic_drm_fops, + .gem_create_object = ingenic_drm_gem_create_object, DRM_GEM_CMA_DRIVER_OPS,

.irq_handler = ingenic_drm_irq_handler, @@ -961,6 +1006,8 @@ static int ingenic_drm_bind(struct device *dev, bool has_components) return ret; }

+ drm_plane_enable_fb_damage_clips(&priv->f1); + drm_crtc_helper_add(&priv->crtc, &ingenic_drm_crtc_helper_funcs);

ret = drm_crtc_init_with_planes(drm, &priv->crtc, primary, @@ -989,6 +1036,8 @@ static int ingenic_drm_bind(struct device *dev, bool has_components) return ret; }

+ drm_plane_enable_fb_damage_clips(&priv->f0); + if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && has_components) { ret = component_bind_all(dev, drm); if (ret) { @@ -1245,6 +1294,7 @@ static const u32 jz4770_formats_f0[] = { static const struct jz_soc_info jz4740_soc_info = { .needs_dev_clk = true, .has_osd = false, + .map_noncoherent = false, .max_width = 800, .max_height = 600, .formats_f1 = jz4740_formats, @@ -1255,6 +1305,7 @@ static const struct jz_soc_info jz4740_soc_info = { static const struct jz_soc_info jz4725b_soc_info = { .needs_dev_clk = false, .has_osd = true, + .map_noncoherent = false, .max_width = 800, .max_height = 600, .formats_f1 = jz4725b_formats_f1, @@ -1266,6 +1317,7 @@ static const struct jz_soc_info jz4725b_soc_info = { static const struct jz_soc_info jz4770_soc_info = { .needs_dev_clk = false, .has_osd = true, + .map_noncoherent = true, .max_width = 1280, .max_height = 720, .formats_f1 = jz4770_formats_f1, diff --git a/drivers/gpu/drm/ingenic/ingenic-ipu.c b/drivers/gpu/drm/ingenic/ingenic-ipu.c index 3b1091e7c0cd..a4d1b500c3ad 100644 --- a/drivers/gpu/drm/ingenic/ingenic-ipu.c +++ b/drivers/gpu/drm/ingenic/ingenic-ipu.c @@ -20,10 +20,13 @@

#include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_plane.h> #include <drm/drm_plane_helper.h> #include <drm/drm_property.h> @@ -285,8 +288,8 @@ static void ingenic_ipu_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct ingenic_ipu *ipu = plane_to_ingenic_ipu(plane); - struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state, - plane); + struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state, plane); + struct drm_plane_state *oldstate = drm_atomic_get_new_plane_state(state, plane); const struct drm_format_info *finfo; u32 ctrl, stride = 0, coef_index = 0, format = 0; bool needs_modeset, upscaling_w, upscaling_h; @@ -317,6 +320,8 @@ static void ingenic_ipu_plane_atomic_update(struct drm_plane *plane, JZ_IPU_CTRL_CHIP_EN | JZ_IPU_CTRL_LCDC_SEL); }

+ drm_gem_cma_sync_data(ipu->drm, oldstate, newstate); + /* New addresses will be committed in vblank handler... */ ipu->addr_y = drm_fb_cma_get_gem_addr(newstate->fb, newstate, 0); if (finfo->num_planes > 1) @@ -541,7 +546,7 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane,

if (!new_plane_state->crtc || !crtc_state->mode.hdisplay || !crtc_state->mode.vdisplay) - return 0; + goto out_check_damage;

/* Plane must be fully visible */ if (new_plane_state->crtc_x < 0 || new_plane_state->crtc_y < 0 || @@ -558,7 +563,7 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL;

if (!osd_changed(new_plane_state, old_plane_state)) - return 0; + goto out_check_damage;

crtc_state->mode_changed = true;

@@ -592,6 +597,9 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane, ipu->denom_w = denom_w; ipu->denom_h = denom_h;

+out_check_damage: + drm_atomic_helper_check_plane_damage(state, new_plane_state); + return 0; }

@@ -773,6 +781,8 @@ static int ingenic_ipu_bind(struct device *dev, struct device *master, void *d) return err; }

+ drm_plane_enable_fb_damage_clips(plane); + /* * Sharpness settings range is [0,32] * 0 : nearest-neighbor

-- 2.30.2

1465

Age (days ago)

1466

Last active (days ago)

dri-devel@lists.freedesktop.org

5 comments

2 participants

tags (0)

participants (2)

Paul Cercueil
Thomas Zimmermann