[Nouveau] [PATCH 2/2] drm/nvc0: implement VRAM compression
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sat Jan 21 14:13:27 PST 2012
Safety margins checked on GTX470, not verified on other cards with
a different number of memory partitions.
---
drivers/gpu/drm/nouveau/nouveau_state.c | 35 +++++++-------
drivers/gpu/drm/nouveau/nvc0_fb.c | 81 +++++++++++++++++++++++++++++++
drivers/gpu/drm/nouveau/nvc0_vm.c | 12 ++++-
drivers/gpu/drm/nouveau/nvc0_vram.c | 78 ++++++++++++++++++++---------
4 files changed, 164 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 13e6102..9511009 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -602,18 +602,26 @@ nouveau_card_init(struct drm_device *dev)
goto out_mc;
/* PFB */
- ret = engine->fb.init(dev);
+ ret = engine->vram.init(dev);
if (ret)
goto out_timer;
- ret = engine->vram.init(dev);
+ ret = nouveau_mem_vram_init(dev);
if (ret)
- goto out_fb;
+ goto out_vram;
+
+ ret = nouveau_mem_gart_init(dev);
+ if (ret)
+ goto out_ttmvram;
+
+ ret = engine->fb.init(dev);
+ if (ret)
+ goto out_ttmgart;
/* PGPIO */
ret = nouveau_gpio_create(dev);
if (ret)
- goto out_vram;
+ goto out_fb;
ret = nouveau_gpuobj_init(dev);
if (ret)
@@ -623,14 +631,6 @@ nouveau_card_init(struct drm_device *dev)
if (ret)
goto out_gpuobj;
- ret = nouveau_mem_vram_init(dev);
- if (ret)
- goto out_instmem;
-
- ret = nouveau_mem_gart_init(dev);
- if (ret)
- goto out_ttmvram;
-
nouveau_pm_init(dev);
if (!dev_priv->noaccel) {
@@ -783,19 +783,20 @@ out_engine:
}
}
nouveau_pm_fini(dev);
- nouveau_mem_gart_fini(dev);
-out_ttmvram:
- nouveau_mem_vram_fini(dev);
out_instmem:
engine->instmem.takedown(dev);
out_gpuobj:
nouveau_gpuobj_takedown(dev);
out_gpio:
nouveau_gpio_destroy(dev);
-out_vram:
- engine->vram.takedown(dev);
out_fb:
engine->fb.takedown(dev);
+out_ttmgart:
+ nouveau_mem_gart_fini(dev);
+out_ttmvram:
+ nouveau_mem_vram_fini(dev);
+out_vram:
+ engine->vram.takedown(dev);
out_timer:
engine->timer.takedown(dev);
out_mc:
diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c
index 5bf5503..b9bb143 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fb.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fb.c
@@ -30,6 +30,8 @@
struct nvc0_fb_priv {
struct page *r100c10_page;
dma_addr_t r100c10;
+ struct nouveau_bo *tag_ram;
+ uint8_t l2_part_nr;
};
static inline void
@@ -56,6 +58,74 @@ nvc0_mfb_isr(struct drm_device *dev)
}
}
+static int
+nvc0_init_tag_ram(struct drm_device *dev)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nvc0_fb_priv *priv = (struct nvc0_fb_priv *)pfb->priv;
+ int ret;
+ u32 align, buffer;
+ u32 tag_size;
+ u32 num_tags;
+ u64 tag_base;
+
+ priv->l2_part_nr = nv_rd32(dev, 0x121c74);
+
+ nv_wr32(dev, 0x17e8d8, priv->l2_part_nr);
+ nv_wr32(dev, 0x100800, priv->l2_part_nr);
+
+ /* need to align to l2_part_count * 0x800 */
+ align = ((priv->l2_part_nr + 1) & ~1) * 0x800; /* even -> page sized */
+
+ buffer = align; /* buffer zone to account for address mangling */
+
+ /* allocate tag space for 1/4 of VRAM */
+ num_tags = (dev_priv->vram_size >> 17) >> 2;
+ num_tags = (num_tags + 63) & ~63;
+
+ ret = drm_mm_init(&pfb->tag_heap, 0, num_tags);
+ if (ret)
+ return ret;
+
+ tag_size = (num_tags / 64) * 0x4000 + align + buffer;
+
+ ret = nouveau_bo_new(dev, tag_size, 1 << 12, TTM_PL_FLAG_VRAM,
+ 0, 0, &priv->tag_ram);
+ if (!ret)
+ ret = nouveau_bo_pin(priv->tag_ram, TTM_PL_FLAG_VRAM);
+ if (ret) {
+ nouveau_bo_ref(NULL, &priv->tag_ram);
+ return ret;
+ }
+
+ tag_base = (priv->tag_ram->bo.mem.start << PAGE_SHIFT) + buffer;
+ tag_base = tag_base + priv->l2_part_nr * 0x800 - 1;
+ tag_base = tag_base / (priv->l2_part_nr * 0x800);
+
+ nv_wr32(dev, 0x17e8d4, tag_base);
+
+ return 0;
+}
+
+void
+nvc0_tag_ram_clear(struct drm_device *dev, u32 first, u32 count)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nvc0_fb_priv *priv;
+ int p, i;
+
+ priv = (struct nvc0_fb_priv *)dev_priv->engine.fb.priv;
+
+ nv_wr32(dev, 0x17e8cc, first);
+ nv_wr32(dev, 0x17e8d0, first + count - 1);
+ nv_wr32(dev, 0x17e8c8, 4);
+
+ for (p = 0; p < priv->l2_part_nr; ++p)
+ for (i = 0; i < 2; ++i)
+ nv_wait(dev, 0x1410c8 + p * 0x2000 + i * 0x400, ~0, 0);
+}
+
static void
nvc0_fb_destroy(struct drm_device *dev)
{
@@ -63,6 +133,14 @@ nvc0_fb_destroy(struct drm_device *dev)
struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
struct nvc0_fb_priv *priv = pfb->priv;
+ if (drm_mm_initialized(&pfb->tag_heap))
+ drm_mm_takedown(&pfb->tag_heap);
+
+ if (priv->tag_ram) {
+ nouveau_bo_unpin(priv->tag_ram);
+ nouveau_bo_ref(NULL, &priv->tag_ram);
+ }
+
nouveau_irq_unregister(dev, 25);
if (priv->r100c10_page) {
@@ -118,6 +196,9 @@ nvc0_fb_init(struct drm_device *dev)
}
priv = dev_priv->engine.fb.priv;
+ if (nvc0_init_tag_ram(dev))
+ return ret;
+
nv_wr32(dev, 0x100c10, priv->r100c10 >> 8);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nvc0_vm.c b/drivers/gpu/drm/nouveau/nvc0_vm.c
index 8360dc8..0f70f47 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vm.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vm.c
@@ -27,6 +27,8 @@
#include "nouveau_drv.h"
#include "nouveau_vm.h"
+void nvc0_tag_ram_clear(struct drm_device *dev, u32 first, u32 count);
+
void
nvc0_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 index,
struct nouveau_gpuobj *pgt[2])
@@ -61,9 +63,17 @@ void
nvc0_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
struct nouveau_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta)
{
- u32 next = 1 << (vma->node->type - 8);
+ u64 next = 1 << (vma->node->type - 8);
phys = nvc0_vm_addr(vma, phys, mem->memtype, 0);
+
+ if (mem->tag) {
+ u32 tag = mem->tag->start + (delta >> 17);
+ phys |= (u64)tag << (32 + 12);
+ next |= (u64)1 << (32 + 12);
+ nvc0_tag_ram_clear(vma->vm->dev, tag, cnt);
+ }
+
pte <<= 3;
while (cnt--) {
nv_wo32(pgt, pte + 4, upper_32_bits(phys));
diff --git a/drivers/gpu/drm/nouveau/nvc0_vram.c b/drivers/gpu/drm/nouveau/nvc0_vram.c
index fd687ee..85632bf 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vram.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vram.c
@@ -26,34 +26,48 @@
#include "nouveau_drv.h"
#include "nouveau_mm.h"
-/* 0 = unsupported
- * 1 = non-compressed
- * 3 = compressed
- */
-static const u8 types[256] = {
- 1, 1, 3, 3, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0,
- 0, 1, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
- 3, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
- 3, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3,
- 3, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 3,
- 3, 0, 3, 3, 3, 3, 3, 0, 0, 3, 0, 3, 0, 3, 3, 0,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0
+/* Map from compressed to corresponding uncompressed storage type. */
+static const u8 storage_type_map[256] =
+{
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, -1, 0x01, /* 0x00 */
+ 0x01, 0x01, 0x01, -1, -1, -1, -1, -1,
+ -1, 0x11, -1, -1, -1, -1, -1, 0x11, /* 0x10 */
+ 0x11, 0x11, 0x11, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 0x26, 0x27, /* 0x20 */
+ 0x28, 0x29, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x30 */
+ -1, -1, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27,
+ 0x28, 0x29, -1, -1, -1, -1, 0x46, -1, /* 0x40 */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 0x46, 0x46, 0x46, 0x46, -1, -1, -1, /* 0x50 */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x60 */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x70 */
+ -1, -1, -1, 0x7b, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 0x7b, 0x7b, /* 0x80 */
+ 0x7b, 0x7b, -1, 0x8b, 0x8c, 0x8d, 0x8e, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x90 */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */
+ 0xa8, 0xa9, 0xaa, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 0xb0 */
+ -1, -1, -1, -1, -1, -1, -1, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xc3, -1, -1, -1, -1, /* 0xc0 */
+ -1, -1, -1, -1, 0xfe, 0xfe, 0xc3, 0xc3,
+ 0xc3, 0xc3, -1, -1, -1, -1, -1, -1, /* 0xd0 */
+ 0xfe, -1, -1, 0xfe, -1, 0xfe, -1, 0xfe,
+ 0xfe, -1, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, -1, /* 0xe0 */
+ -1, 0xfe, -1, 0xfe, -1, 0xfe, 0xfe, -1,
+ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */
+ 0xfe, 0xfe, 0xfe, 0xfe, -1, 0xfe, 0xfe, -1
};
bool
nvc0_vram_flags_valid(struct drm_device *dev, u32 tile_flags)
{
u8 memtype = (tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) >> 8;
- return likely((types[memtype] == 1));
+ return likely(storage_type_map[memtype] != (u8)-1);
}
int
@@ -65,6 +79,8 @@ nvc0_vram_new(struct drm_device *dev, u64 size, u32 align, u32 ncmin,
struct nouveau_mm_node *r;
struct nouveau_mem *mem;
int ret;
+ u8 memtype = type & 0xff;
+ u8 memtype_noncomp = storage_type_map[memtype];
size >>= 12;
align >>= 12;
@@ -74,12 +90,26 @@ nvc0_vram_new(struct drm_device *dev, u64 size, u32 align, u32 ncmin,
if (!mem)
return -ENOMEM;
+ mutex_lock(&mm->mutex);
+ if (memtype != memtype_noncomp) {
+ if (align == 32) {
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ int n = size >> 5;
+
+ mem->tag = drm_mm_search_free(&pfb->tag_heap, n, 0, 0);
+ if (mem->tag)
+ mem->tag = drm_mm_get_block(mem->tag, n, 0);
+ }
+
+ if (unlikely(!mem->tag))
+ memtype = memtype_noncomp;
+ }
+
INIT_LIST_HEAD(&mem->regions);
mem->dev = dev_priv->dev;
- mem->memtype = (type & 0xff);
+ mem->memtype = memtype;
mem->size = size;
- mutex_lock(&mm->mutex);
do {
ret = nouveau_mm_get(mm, 1, size, ncmin, align, &r);
if (ret) {
--
1.7.3.4
More information about the Nouveau
mailing list