[Intel-gfx] [PATCH 09/26] drm/i915: Split out gtt specific header file
Daniel Vetter
daniel at ffwll.ch
Tue Mar 18 10:15:56 CET 2014
On Mon, Mar 17, 2014 at 10:48:41PM -0700, Ben Widawsky wrote:
> TODO: Do header files need a copyright?
Yup ;-)
I like this though, especially since finer-grained files will make
kerneldoc inclusion (well, grouped into sensible chapters at least) much
simpler.
-Daniel
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 162 +-------------------------
> drivers/gpu/drm/i915/i915_gem_gtt.c | 57 ---------
> drivers/gpu/drm/i915/i915_gem_gtt.h | 225 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 227 insertions(+), 217 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_gem_gtt.h
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 084e82f..b19442c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -44,6 +44,8 @@
> #include <linux/kref.h>
> #include <linux/pm_qos.h>
>
> +#include "i915_gem_gtt.h"
> +
> /* General customization:
> */
>
> @@ -572,166 +574,6 @@ enum i915_cache_level {
> I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
> };
>
> -typedef uint32_t gen6_gtt_pte_t;
> -
> -/**
> - * A VMA represents a GEM BO that is bound into an address space. Therefore, a
> - * VMA's presence cannot be guaranteed before binding, or after unbinding the
> - * object into/from the address space.
> - *
> - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
> - * will always be <= an objects lifetime. So object refcounting should cover us.
> - */
> -struct i915_vma {
> - struct drm_mm_node node;
> - struct drm_i915_gem_object *obj;
> - struct i915_address_space *vm;
> -
> - /** This object's place on the active/inactive lists */
> - struct list_head mm_list;
> -
> - struct list_head vma_link; /* Link in the object's VMA list */
> -
> - /** This vma's place in the batchbuffer or on the eviction list */
> - struct list_head exec_list;
> -
> - /**
> - * Used for performing relocations during execbuffer insertion.
> - */
> - struct hlist_node exec_node;
> - unsigned long exec_handle;
> - struct drm_i915_gem_exec_object2 *exec_entry;
> -
> - /**
> - * How many users have pinned this object in GTT space. The following
> - * users can each hold at most one reference: pwrite/pread, pin_ioctl
> - * (via user_pin_count), execbuffer (objects are not allowed multiple
> - * times for the same batchbuffer), and the framebuffer code. When
> - * switching/pageflipping, the framebuffer code has at most two buffers
> - * pinned per crtc.
> - *
> - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> - * bits with absolutely no headroom. So use 4 bits. */
> - unsigned int pin_count:4;
> -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
> -
> - /** Unmap an object from an address space. This usually consists of
> - * setting the valid PTE entries to a reserved scratch page. */
> - void (*unbind_vma)(struct i915_vma *vma);
> - /* Map an object into an address space with the given cache flags. */
> -#define GLOBAL_BIND (1<<0)
> - void (*bind_vma)(struct i915_vma *vma,
> - enum i915_cache_level cache_level,
> - u32 flags);
> -};
> -
> -struct i915_address_space {
> - struct drm_mm mm;
> - struct drm_device *dev;
> - struct list_head global_link;
> - unsigned long start; /* Start offset always 0 for dri2 */
> - size_t total; /* size addr space maps (ex. 2GB for ggtt) */
> -
> - struct {
> - dma_addr_t addr;
> - struct page *page;
> - } scratch;
> -
> - /**
> - * List of objects currently involved in rendering.
> - *
> - * Includes buffers having the contents of their GPU caches
> - * flushed, not necessarily primitives. last_rendering_seqno
> - * represents when the rendering involved will be completed.
> - *
> - * A reference is held on the buffer while on this list.
> - */
> - struct list_head active_list;
> -
> - /**
> - * LRU list of objects which are not in the ringbuffer and
> - * are ready to unbind, but are still in the GTT.
> - *
> - * last_rendering_seqno is 0 while an object is in this list.
> - *
> - * A reference is not held on the buffer while on this list,
> - * as merely being GTT-bound shouldn't prevent its being
> - * freed, and we'll pull it off the list in the free path.
> - */
> - struct list_head inactive_list;
> -
> - /* FIXME: Need a more generic return type */
> - gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
> - enum i915_cache_level level,
> - bool valid); /* Create a valid PTE */
> - void (*clear_range)(struct i915_address_space *vm,
> - uint64_t start,
> - uint64_t length,
> - bool use_scratch);
> - void (*insert_entries)(struct i915_address_space *vm,
> - struct sg_table *st,
> - uint64_t start,
> - enum i915_cache_level cache_level);
> - void (*cleanup)(struct i915_address_space *vm);
> -};
> -
> -/* The Graphics Translation Table is the way in which GEN hardware translates a
> - * Graphics Virtual Address into a Physical Address. In addition to the normal
> - * collateral associated with any va->pa translations GEN hardware also has a
> - * portion of the GTT which can be mapped by the CPU and remain both coherent
> - * and correct (in cases like swizzling). That region is referred to as GMADR in
> - * the spec.
> - */
> -struct i915_gtt {
> - struct i915_address_space base;
> - size_t stolen_size; /* Total size of stolen memory */
> -
> - unsigned long mappable_end; /* End offset that we can CPU map */
> - struct io_mapping *mappable; /* Mapping to our CPU mappable region */
> - phys_addr_t mappable_base; /* PA of our GMADR */
> -
> - /** "Graphics Stolen Memory" holds the global PTEs */
> - void __iomem *gsm;
> -
> - bool do_idle_maps;
> -
> - int mtrr;
> -
> - /* global gtt ops */
> - int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
> - size_t *stolen, phys_addr_t *mappable_base,
> - unsigned long *mappable_end);
> -};
> -#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
> -
> -#define GEN8_LEGACY_PDPS 4
> -struct i915_hw_ppgtt {
> - struct i915_address_space base;
> - struct kref ref;
> - struct drm_mm_node node;
> - unsigned num_pd_entries;
> - unsigned num_pd_pages; /* gen8+ */
> - union {
> - struct page **pt_pages;
> - struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
> - };
> - struct page *pd_pages;
> - union {
> - uint32_t pd_offset;
> - dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
> - };
> - union {
> - dma_addr_t *pt_dma_addr;
> - dma_addr_t *gen8_pt_dma_addr[GEN8_LEGACY_PDPS];
> - };
> -
> - int (*enable)(struct i915_hw_ppgtt *ppgtt);
> - int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> - struct intel_ring_buffer *ring,
> - bool synchronous);
> - void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
> -};
> -
> struct i915_ctx_hang_stats {
> /* This context had batch pending when hang was declared */
> unsigned batch_pending;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 5f73284..a239196 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -53,60 +53,6 @@ bool intel_enable_ppgtt(struct drm_device *dev, bool full)
> return HAS_ALIASING_PPGTT(dev);
> }
>
> -#define GEN6_PPGTT_PD_ENTRIES 512
> -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> -typedef uint64_t gen8_gtt_pte_t;
> -typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> -
> -/* PPGTT stuff */
> -#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
> -#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
> -
> -#define GEN6_PDE_VALID (1 << 0)
> -/* gen6+ has bit 11-4 for physical addr bit 39-32 */
> -#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
> -
> -#define GEN6_PTE_VALID (1 << 0)
> -#define GEN6_PTE_UNCACHED (1 << 1)
> -#define HSW_PTE_UNCACHED (0)
> -#define GEN6_PTE_CACHE_LLC (2 << 1)
> -#define GEN7_PTE_CACHE_L3_LLC (3 << 1)
> -#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
> -#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
> -
> -/* Cacheability Control is a 4-bit value. The low three bits are stored in *
> - * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
> - */
> -#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \
> - (((bits) & 0x8) << (11 - 3)))
> -#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2)
> -#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3)
> -#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
> -#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8)
> -#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
> -#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7)
> -
> -#define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> -#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> -
> -/* GEN8 legacy style addressis defined as a 3 level page table:
> - * 31:30 | 29:21 | 20:12 | 11:0
> - * PDPE | PDE | PTE | offset
> - * The difference as compared to normal x86 3 level page table is the PDPEs are
> - * programmed via register.
> - */
> -#define GEN8_PDPE_SHIFT 30
> -#define GEN8_PDPE_MASK 0x3
> -#define GEN8_PDE_SHIFT 21
> -#define GEN8_PDE_MASK 0x1ff
> -#define GEN8_PTE_SHIFT 12
> -#define GEN8_PTE_MASK 0x1ff
> -
> -#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
> -#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
> -#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */
> -#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
> -
> static void ppgtt_bind_vma(struct i915_vma *vma,
> enum i915_cache_level cache_level,
> u32 flags);
> @@ -185,9 +131,6 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
> return pte;
> }
>
> -#define BYT_PTE_WRITEABLE (1 << 1)
> -#define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2)
> -
> static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
> enum i915_cache_level level,
> bool valid)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> new file mode 100644
> index 0000000..c8d5c77
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -0,0 +1,225 @@
> +#ifndef _I915_GEM_GTT_H
> +#define _I915_GEM_GTT_H
> +
> +#define GEN6_PPGTT_PD_ENTRIES 512
> +#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> +typedef uint32_t gen6_gtt_pte_t;
> +typedef uint64_t gen8_gtt_pte_t;
> +typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> +
> +/* PPGTT stuff */
> +#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
> +#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
> +
> +#define GEN6_PDE_VALID (1 << 0)
> +/* gen6+ has bit 11-4 for physical addr bit 39-32 */
> +#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
> +
> +#define GEN6_PTE_VALID (1 << 0)
> +#define GEN6_PTE_UNCACHED (1 << 1)
> +#define HSW_PTE_UNCACHED (0)
> +#define GEN6_PTE_CACHE_LLC (2 << 1)
> +#define GEN7_PTE_CACHE_L3_LLC (3 << 1)
> +#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
> +#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
> +
> +#define BYT_PTE_WRITEABLE (1 << 1)
> +#define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2)
> +
> +/* Cacheability Control is a 4-bit value. The low three bits are stored in *
> + * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
> + */
> +#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \
> + (((bits) & 0x8) << (11 - 3)))
> +#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2)
> +#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3)
> +#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
> +#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8)
> +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
> +#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7)
> +
> +#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
> +#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
> +#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */
> +#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
> +
> +#define GEN8_LEGACY_PDPS 4
> +#define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> +#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> +
> +/* GEN8 legacy style addressis defined as a 3 level page table:
> + * 31:30 | 29:21 | 20:12 | 11:0
> + * PDPE | PDE | PTE | offset
> + * The difference as compared to normal x86 3 level page table is the PDPEs are
> + * programmed via register.
> + *
> + * The x86 pagetable code is flexible in its ability to handle varying page
> + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
> + * instead replicate the interesting functionality.
> + */
> +#define GEN8_PDPE_SHIFT 30
> +#define GEN8_PDPE_MASK 0x3
> +#define GEN8_PDE_SHIFT 21
> +#define GEN8_PDE_MASK 0x1ff
> +#define GEN8_PTE_SHIFT 12
> +#define GEN8_PTE_MASK 0x1ff
> +
> +enum i915_cache_level;
> +/**
> + * A VMA represents a GEM BO that is bound into an address space. Therefore, a
> + * VMA's presence cannot be guaranteed before binding, or after unbinding the
> + * object into/from the address space.
> + *
> + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
> + * will always be <= an objects lifetime. So object refcounting should cover us.
> + */
> +struct i915_vma {
> + struct drm_mm_node node;
> + struct drm_i915_gem_object *obj;
> + struct i915_address_space *vm;
> +
> + /** This object's place on the active/inactive lists */
> + struct list_head mm_list;
> +
> + struct list_head vma_link; /* Link in the object's VMA list */
> +
> + /** This vma's place in the batchbuffer or on the eviction list */
> + struct list_head exec_list;
> +
> + /**
> + * Used for performing relocations during execbuffer insertion.
> + */
> + struct hlist_node exec_node;
> + unsigned long exec_handle;
> + struct drm_i915_gem_exec_object2 *exec_entry;
> +
> + /**
> + * How many users have pinned this object in GTT space. The following
> + * users can each hold at most one reference: pwrite/pread, pin_ioctl
> + * (via user_pin_count), execbuffer (objects are not allowed multiple
> + * times for the same batchbuffer), and the framebuffer code. When
> + * switching/pageflipping, the framebuffer code has at most two buffers
> + * pinned per crtc.
> + *
> + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
> + * bits with absolutely no headroom. So use 4 bits. */
> + unsigned int pin_count:4;
> +#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
> +
> + /** Unmap an object from an address space. This usually consists of
> + * setting the valid PTE entries to a reserved scratch page. */
> + void (*unbind_vma)(struct i915_vma *vma);
> + /* Map an object into an address space with the given cache flags. */
> +#define GLOBAL_BIND (1<<0)
> + void (*bind_vma)(struct i915_vma *vma,
> + enum i915_cache_level cache_level,
> + u32 flags);
> +};
> +
> +struct i915_address_space {
> + struct drm_mm mm;
> + struct drm_device *dev;
> + struct list_head global_link;
> + unsigned long start; /* Start offset always 0 for dri2 */
> + size_t total; /* size addr space maps (ex. 2GB for ggtt) */
> +
> + struct {
> + dma_addr_t addr;
> + struct page *page;
> + } scratch;
> +
> + /**
> + * List of objects currently involved in rendering.
> + *
> + * Includes buffers having the contents of their GPU caches
> + * flushed, not necessarily primitives. last_rendering_seqno
> + * represents when the rendering involved will be completed.
> + *
> + * A reference is held on the buffer while on this list.
> + */
> + struct list_head active_list;
> +
> + /**
> + * LRU list of objects which are not in the ringbuffer and
> + * are ready to unbind, but are still in the GTT.
> + *
> + * last_rendering_seqno is 0 while an object is in this list.
> + *
> + * A reference is not held on the buffer while on this list,
> + * as merely being GTT-bound shouldn't prevent its being
> + * freed, and we'll pull it off the list in the free path.
> + */
> + struct list_head inactive_list;
> +
> + /* FIXME: Need a more generic return type */
> + gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
> + enum i915_cache_level level,
> + bool valid); /* Create a valid PTE */
> + void (*clear_range)(struct i915_address_space *vm,
> + uint64_t start,
> + uint64_t length,
> + bool use_scratch);
> + void (*insert_entries)(struct i915_address_space *vm,
> + struct sg_table *st,
> + uint64_t start,
> + enum i915_cache_level cache_level);
> + void (*cleanup)(struct i915_address_space *vm);
> +};
> +
> +/* The Graphics Translation Table is the way in which GEN hardware translates a
> + * Graphics Virtual Address into a Physical Address. In addition to the normal
> + * collateral associated with any va->pa translations GEN hardware also has a
> + * portion of the GTT which can be mapped by the CPU and remain both coherent
> + * and correct (in cases like swizzling). That region is referred to as GMADR in
> + * the spec.
> + */
> +struct i915_gtt {
> + struct i915_address_space base;
> + size_t stolen_size; /* Total size of stolen memory */
> +
> + unsigned long mappable_end; /* End offset that we can CPU map */
> + struct io_mapping *mappable; /* Mapping to our CPU mappable region */
> + phys_addr_t mappable_base; /* PA of our GMADR */
> +
> + /** "Graphics Stolen Memory" holds the global PTEs */
> + void __iomem *gsm;
> +
> + bool do_idle_maps;
> +
> + int mtrr;
> +
> + /* global gtt ops */
> + int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
> + size_t *stolen, phys_addr_t *mappable_base,
> + unsigned long *mappable_end);
> +};
> +#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
> +
> +struct i915_hw_ppgtt {
> + struct i915_address_space base;
> + struct kref ref;
> + struct drm_mm_node node;
> + unsigned num_pd_entries;
> + unsigned num_pd_pages; /* gen8+ */
> + union {
> + struct page **pt_pages;
> + struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
> + };
> + struct page *pd_pages;
> + union {
> + uint32_t pd_offset;
> + dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
> + };
> + union {
> + dma_addr_t *pt_dma_addr;
> + dma_addr_t *gen8_pt_dma_addr[GEN8_LEGACY_PDPS];
> + };
> +
> + int (*enable)(struct i915_hw_ppgtt *ppgtt);
> + int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> + struct intel_ring_buffer *ring,
> + bool synchronous);
> + void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
> +};
> +
> +#endif
> --
> 1.9.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list