[Intel-gfx] [PATCH 09/26] drm/i915: Split out gtt specific header file

Daniel Vetter daniel at ffwll.ch
Tue Mar 18 10:15:56 CET 2014


On Mon, Mar 17, 2014 at 10:48:41PM -0700, Ben Widawsky wrote:
> TODO: Do header files need a copyright?

Yup ;-)

I like this though, especially since finer-grained files will make
kerneldoc inclusion (well, grouped into sensible chapters at least) much
simpler.
-Daniel

> 
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     | 162 +-------------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.c |  57 ---------
>  drivers/gpu/drm/i915/i915_gem_gtt.h | 225 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 227 insertions(+), 217 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_gtt.h
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 084e82f..b19442c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -44,6 +44,8 @@
>  #include <linux/kref.h>
>  #include <linux/pm_qos.h>
>  
> +#include "i915_gem_gtt.h"
> +
>  /* General customization:
>   */
>  
> @@ -572,166 +574,6 @@ enum i915_cache_level {
>  	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
>  };
>  
> -typedef uint32_t gen6_gtt_pte_t;
> -
> -/**
> - * A VMA represents a GEM BO that is bound into an address space. Therefore, a
> - * VMA's presence cannot be guaranteed before binding, or after unbinding the
> - * object into/from the address space.
> - *
> - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
> - * will always be <= an objects lifetime. So object refcounting should cover us.
> - */
> -struct i915_vma {
> -	struct drm_mm_node node;
> -	struct drm_i915_gem_object *obj;
> -	struct i915_address_space *vm;
> -
> -	/** This object's place on the active/inactive lists */
> -	struct list_head mm_list;
> -
> -	struct list_head vma_link; /* Link in the object's VMA list */
> -
> -	/** This vma's place in the batchbuffer or on the eviction list */
> -	struct list_head exec_list;
> -
> -	/**
> -	 * Used for performing relocations during execbuffer insertion.
> -	 */
> -	struct hlist_node exec_node;
> -	unsigned long exec_handle;
> -	struct drm_i915_gem_exec_object2 *exec_entry;
> -
> -	/**
> -	 * How many users have pinned this object in GTT space. The following
> -	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
> -	 * (via user_pin_count), execbuffer (objects are not allowed multiple
> -	 * times for the same batchbuffer), and the framebuffer code. When
> -	 * switching/pageflipping, the framebuffer code has at most two buffers
> -	 * pinned per crtc.
> -	 *
> -	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> -	 * bits with absolutely no headroom. So use 4 bits. */
> -	unsigned int pin_count:4;
> -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
> -
> -	/** Unmap an object from an address space. This usually consists of
> -	 * setting the valid PTE entries to a reserved scratch page. */
> -	void (*unbind_vma)(struct i915_vma *vma);
> -	/* Map an object into an address space with the given cache flags. */
> -#define GLOBAL_BIND (1<<0)
> -	void (*bind_vma)(struct i915_vma *vma,
> -			 enum i915_cache_level cache_level,
> -			 u32 flags);
> -};
> -
> -struct i915_address_space {
> -	struct drm_mm mm;
> -	struct drm_device *dev;
> -	struct list_head global_link;
> -	unsigned long start;		/* Start offset always 0 for dri2 */
> -	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
> -
> -	struct {
> -		dma_addr_t addr;
> -		struct page *page;
> -	} scratch;
> -
> -	/**
> -	 * List of objects currently involved in rendering.
> -	 *
> -	 * Includes buffers having the contents of their GPU caches
> -	 * flushed, not necessarily primitives.  last_rendering_seqno
> -	 * represents when the rendering involved will be completed.
> -	 *
> -	 * A reference is held on the buffer while on this list.
> -	 */
> -	struct list_head active_list;
> -
> -	/**
> -	 * LRU list of objects which are not in the ringbuffer and
> -	 * are ready to unbind, but are still in the GTT.
> -	 *
> -	 * last_rendering_seqno is 0 while an object is in this list.
> -	 *
> -	 * A reference is not held on the buffer while on this list,
> -	 * as merely being GTT-bound shouldn't prevent its being
> -	 * freed, and we'll pull it off the list in the free path.
> -	 */
> -	struct list_head inactive_list;
> -
> -	/* FIXME: Need a more generic return type */
> -	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
> -				     enum i915_cache_level level,
> -				     bool valid); /* Create a valid PTE */
> -	void (*clear_range)(struct i915_address_space *vm,
> -			    uint64_t start,
> -			    uint64_t length,
> -			    bool use_scratch);
> -	void (*insert_entries)(struct i915_address_space *vm,
> -			       struct sg_table *st,
> -			       uint64_t start,
> -			       enum i915_cache_level cache_level);
> -	void (*cleanup)(struct i915_address_space *vm);
> -};
> -
> -/* The Graphics Translation Table is the way in which GEN hardware translates a
> - * Graphics Virtual Address into a Physical Address. In addition to the normal
> - * collateral associated with any va->pa translations GEN hardware also has a
> - * portion of the GTT which can be mapped by the CPU and remain both coherent
> - * and correct (in cases like swizzling). That region is referred to as GMADR in
> - * the spec.
> - */
> -struct i915_gtt {
> -	struct i915_address_space base;
> -	size_t stolen_size;		/* Total size of stolen memory */
> -
> -	unsigned long mappable_end;	/* End offset that we can CPU map */
> -	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
> -	phys_addr_t mappable_base;	/* PA of our GMADR */
> -
> -	/** "Graphics Stolen Memory" holds the global PTEs */
> -	void __iomem *gsm;
> -
> -	bool do_idle_maps;
> -
> -	int mtrr;
> -
> -	/* global gtt ops */
> -	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
> -			  size_t *stolen, phys_addr_t *mappable_base,
> -			  unsigned long *mappable_end);
> -};
> -#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
> -
> -#define GEN8_LEGACY_PDPS 4
> -struct i915_hw_ppgtt {
> -	struct i915_address_space base;
> -	struct kref ref;
> -	struct drm_mm_node node;
> -	unsigned num_pd_entries;
> -	unsigned num_pd_pages; /* gen8+ */
> -	union {
> -		struct page **pt_pages;
> -		struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
> -	};
> -	struct page *pd_pages;
> -	union {
> -		uint32_t pd_offset;
> -		dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
> -	};
> -	union {
> -		dma_addr_t *pt_dma_addr;
> -		dma_addr_t *gen8_pt_dma_addr[GEN8_LEGACY_PDPS];
> -	};
> -
> -	int (*enable)(struct i915_hw_ppgtt *ppgtt);
> -	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_ring_buffer *ring,
> -			 bool synchronous);
> -	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
> -};
> -
>  struct i915_ctx_hang_stats {
>  	/* This context had batch pending when hang was declared */
>  	unsigned batch_pending;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 5f73284..a239196 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -53,60 +53,6 @@ bool intel_enable_ppgtt(struct drm_device *dev, bool full)
>  		return HAS_ALIASING_PPGTT(dev);
>  }
>  
> -#define GEN6_PPGTT_PD_ENTRIES 512
> -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> -typedef uint64_t gen8_gtt_pte_t;
> -typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> -
> -/* PPGTT stuff */
> -#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
> -#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
> -
> -#define GEN6_PDE_VALID			(1 << 0)
> -/* gen6+ has bit 11-4 for physical addr bit 39-32 */
> -#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> -
> -#define GEN6_PTE_VALID			(1 << 0)
> -#define GEN6_PTE_UNCACHED		(1 << 1)
> -#define HSW_PTE_UNCACHED		(0)
> -#define GEN6_PTE_CACHE_LLC		(2 << 1)
> -#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
> -#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> -#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
> -
> -/* Cacheability Control is a 4-bit value. The low three bits are stored in *
> - * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
> - */
> -#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
> -					 (((bits) & 0x8) << (11 - 3)))
> -#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
> -#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
> -#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
> -#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
> -#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
> -#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
> -
> -#define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> -#define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> -
> -/* GEN8 legacy style addressis defined as a 3 level page table:
> - * 31:30 | 29:21 | 20:12 |  11:0
> - * PDPE  |  PDE  |  PTE  | offset
> - * The difference as compared to normal x86 3 level page table is the PDPEs are
> - * programmed via register.
> - */
> -#define GEN8_PDPE_SHIFT			30
> -#define GEN8_PDPE_MASK			0x3
> -#define GEN8_PDE_SHIFT			21
> -#define GEN8_PDE_MASK			0x1ff
> -#define GEN8_PTE_SHIFT			12
> -#define GEN8_PTE_MASK			0x1ff
> -
> -#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> -#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> -#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> -#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> -
>  static void ppgtt_bind_vma(struct i915_vma *vma,
>  			   enum i915_cache_level cache_level,
>  			   u32 flags);
> @@ -185,9 +131,6 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
>  
> -#define BYT_PTE_WRITEABLE		(1 << 1)
> -#define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
> -
>  static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
>  				     bool valid)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> new file mode 100644
> index 0000000..c8d5c77
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -0,0 +1,225 @@
> +#ifndef _I915_GEM_GTT_H
> +#define _I915_GEM_GTT_H
> +
> +#define GEN6_PPGTT_PD_ENTRIES 512
> +#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> +typedef uint32_t gen6_gtt_pte_t;
> +typedef uint64_t gen8_gtt_pte_t;
> +typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> +
> +/* PPGTT stuff */
> +#define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
> +#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
> +
> +#define GEN6_PDE_VALID			(1 << 0)
> +/* gen6+ has bit 11-4 for physical addr bit 39-32 */
> +#define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> +
> +#define GEN6_PTE_VALID			(1 << 0)
> +#define GEN6_PTE_UNCACHED		(1 << 1)
> +#define HSW_PTE_UNCACHED		(0)
> +#define GEN6_PTE_CACHE_LLC		(2 << 1)
> +#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
> +#define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> +#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
> +
> +#define BYT_PTE_WRITEABLE		(1 << 1)
> +#define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
> +
> +/* Cacheability Control is a 4-bit value. The low three bits are stored in *
> + * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
> + */
> +#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
> +					 (((bits) & 0x8) << (11 - 3)))
> +#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
> +#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
> +#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
> +#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
> +#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
> +#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
> +
> +#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> +#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> +#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> +#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> +
> +#define GEN8_LEGACY_PDPS		4
> +#define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> +#define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> +
> +/* GEN8 legacy style addressis defined as a 3 level page table:
> + * 31:30 | 29:21 | 20:12 |  11:0
> + * PDPE  |  PDE  |  PTE  | offset
> + * The difference as compared to normal x86 3 level page table is the PDPEs are
> + * programmed via register.
> + *
> + * The x86 pagetable code is flexible in its ability to handle varying page
> + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
> + * instead replicate the interesting functionality.
> + */
> +#define GEN8_PDPE_SHIFT			30
> +#define GEN8_PDPE_MASK			0x3
> +#define GEN8_PDE_SHIFT			21
> +#define GEN8_PDE_MASK			0x1ff
> +#define GEN8_PTE_SHIFT			12
> +#define GEN8_PTE_MASK			0x1ff
> +
> +enum i915_cache_level;
> +/**
> + * A VMA represents a GEM BO that is bound into an address space. Therefore, a
> + * VMA's presence cannot be guaranteed before binding, or after unbinding the
> + * object into/from the address space.
> + *
> + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
> + * will always be <= an objects lifetime. So object refcounting should cover us.
> + */
> +struct i915_vma {
> +	struct drm_mm_node node;
> +	struct drm_i915_gem_object *obj;
> +	struct i915_address_space *vm;
> +
> +	/** This object's place on the active/inactive lists */
> +	struct list_head mm_list;
> +
> +	struct list_head vma_link; /* Link in the object's VMA list */
> +
> +	/** This vma's place in the batchbuffer or on the eviction list */
> +	struct list_head exec_list;
> +
> +	/**
> +	 * Used for performing relocations during execbuffer insertion.
> +	 */
> +	struct hlist_node exec_node;
> +	unsigned long exec_handle;
> +	struct drm_i915_gem_exec_object2 *exec_entry;
> +
> +	/**
> +	 * How many users have pinned this object in GTT space. The following
> +	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
> +	 * (via user_pin_count), execbuffer (objects are not allowed multiple
> +	 * times for the same batchbuffer), and the framebuffer code. When
> +	 * switching/pageflipping, the framebuffer code has at most two buffers
> +	 * pinned per crtc.
> +	 *
> +	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> +	 * bits with absolutely no headroom. So use 4 bits. */
> +	unsigned int pin_count:4;
> +#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
> +
> +	/** Unmap an object from an address space. This usually consists of
> +	 * setting the valid PTE entries to a reserved scratch page. */
> +	void (*unbind_vma)(struct i915_vma *vma);
> +	/* Map an object into an address space with the given cache flags. */
> +#define GLOBAL_BIND (1<<0)
> +	void (*bind_vma)(struct i915_vma *vma,
> +			 enum i915_cache_level cache_level,
> +			 u32 flags);
> +};
> +
> +struct i915_address_space {
> +	struct drm_mm mm;
> +	struct drm_device *dev;
> +	struct list_head global_link;
> +	unsigned long start;		/* Start offset always 0 for dri2 */
> +	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
> +
> +	struct {
> +		dma_addr_t addr;
> +		struct page *page;
> +	} scratch;
> +
> +	/**
> +	 * List of objects currently involved in rendering.
> +	 *
> +	 * Includes buffers having the contents of their GPU caches
> +	 * flushed, not necessarily primitives.  last_rendering_seqno
> +	 * represents when the rendering involved will be completed.
> +	 *
> +	 * A reference is held on the buffer while on this list.
> +	 */
> +	struct list_head active_list;
> +
> +	/**
> +	 * LRU list of objects which are not in the ringbuffer and
> +	 * are ready to unbind, but are still in the GTT.
> +	 *
> +	 * last_rendering_seqno is 0 while an object is in this list.
> +	 *
> +	 * A reference is not held on the buffer while on this list,
> +	 * as merely being GTT-bound shouldn't prevent its being
> +	 * freed, and we'll pull it off the list in the free path.
> +	 */
> +	struct list_head inactive_list;
> +
> +	/* FIXME: Need a more generic return type */
> +	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
> +				     enum i915_cache_level level,
> +				     bool valid); /* Create a valid PTE */
> +	void (*clear_range)(struct i915_address_space *vm,
> +			    uint64_t start,
> +			    uint64_t length,
> +			    bool use_scratch);
> +	void (*insert_entries)(struct i915_address_space *vm,
> +			       struct sg_table *st,
> +			       uint64_t start,
> +			       enum i915_cache_level cache_level);
> +	void (*cleanup)(struct i915_address_space *vm);
> +};
> +
> +/* The Graphics Translation Table is the way in which GEN hardware translates a
> + * Graphics Virtual Address into a Physical Address. In addition to the normal
> + * collateral associated with any va->pa translations GEN hardware also has a
> + * portion of the GTT which can be mapped by the CPU and remain both coherent
> + * and correct (in cases like swizzling). That region is referred to as GMADR in
> + * the spec.
> + */
> +struct i915_gtt {
> +	struct i915_address_space base;
> +	size_t stolen_size;		/* Total size of stolen memory */
> +
> +	unsigned long mappable_end;	/* End offset that we can CPU map */
> +	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
> +	phys_addr_t mappable_base;	/* PA of our GMADR */
> +
> +	/** "Graphics Stolen Memory" holds the global PTEs */
> +	void __iomem *gsm;
> +
> +	bool do_idle_maps;
> +
> +	int mtrr;
> +
> +	/* global gtt ops */
> +	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
> +			  size_t *stolen, phys_addr_t *mappable_base,
> +			  unsigned long *mappable_end);
> +};
> +#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
> +
> +struct i915_hw_ppgtt {
> +	struct i915_address_space base;
> +	struct kref ref;
> +	struct drm_mm_node node;
> +	unsigned num_pd_entries;
> +	unsigned num_pd_pages; /* gen8+ */
> +	union {
> +		struct page **pt_pages;
> +		struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
> +	};
> +	struct page *pd_pages;
> +	union {
> +		uint32_t pd_offset;
> +		dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
> +	};
> +	union {
> +		dma_addr_t *pt_dma_addr;
> +		dma_addr_t *gen8_pt_dma_addr[GEN8_LEGACY_PDPS];
> +	};
> +
> +	int (*enable)(struct i915_hw_ppgtt *ppgtt);
> +	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> +			 struct intel_ring_buffer *ring,
> +			 bool synchronous);
> +	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
> +};
> +
> +#endif
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list