[Intel-gfx] Failure with swiotlb
Zhenyu Wang
zhenyuw at linux.intel.com
Mon Jan 4 10:27:45 CET 2010
On 2009.12.31 12:33:06 +0800, Zhenyu Wang wrote:
> On 2009.12.30 10:26:27 +0000, David Woodhouse wrote:
> > On Wed, 2009-12-30 at 11:02 +0800, Zhenyu Wang wrote:
> > > We have .31->.32 regression as reported in
> > > http://bugs.freedesktop.org/show_bug.cgi?id=25690
> > > http://bugzilla.kernel.org/show_bug.cgi?id=14627
> > >
> > > It's triggered on non VT-d machine (or machine that should have VT-d,
> > > but no way to turn it on in BIOS.) and with large memory, and swiotlb
> > > is used for PCI dma ops. swiotlb uses a bounce buffer to copy between
> > > CPU pages and real pages made for DMA, but we can't make it real coherent
> > > as we don't call pci_dma_sync_single_for_cpu() alike APIs. And in GEM
> > > domain change, we also can't flush pages for bounce buffer. It looks like
> > > our usual non-cache-coherent graphics device can't love swiotlb.
> > >
> > > This patch trys to only handle pci dma mapping in case of real iommu
> > > hardware detected, the only case for that is VT-d. And fallback to origin
> > > method to insert physical page directly in other case. This fixes the
> > > GPU hang on our Q965 with 8G memory in 64-bit OS. Comments?
> >
> > I don't understand. Why is swiotlb doing anything here anyway, when the
> > device has a dma_mask of 36 bits?
> >
> > Shouldn't dma_capable() return 1, causing swiotlb_map_page() to return
> > the original address unmangled?
>
> Good point, I didn't look into swiotlb code, coz my debug showed it returned
> mangled dma address. So looks the real problem is 36 bit dma mask got corrupted
> somehow, which matches first report in fd.o bug 25690.
>
> Looks we should setup dma mask in drm/i915 driver too, as they both operate on
> graphics device. But I can't test that on our 8G mem machine until after new year.
>
Finally caught it! It's within drm_pci_alloc() which will try to setup dma mask
for pci_dev again! That is used for physical address based hardware status page
for 965G (i915_init_phys_hws()), as alloc with pci coherent interface. But trying
to set mask again in an alloc function looks wrong to me, and driver should setup
their own consistent dma mask according to hw.
So following patch trys to remove mask setting in drm_pci_alloc(), which fixed
the origin problem as dma mask now has the right 36bit setting on intel hw. I
can't test if ati bits looks correct, Dave?
As intel hws page does support 36bit physical address, that will be another patch
for setup pci consistent 36bit mask for it. Any comment?
diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index 628eae3..a1fce68 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -39,8 +39,7 @@ static int drm_ati_alloc_pcigart_table(struct drm_device *dev,
struct drm_ati_pcigart_info *gart_info)
{
gart_info->table_handle = drm_pci_alloc(dev, gart_info->table_size,
- PAGE_SIZE,
- gart_info->table_mask);
+ PAGE_SIZE);
if (gart_info->table_handle == NULL)
return -ENOMEM;
@@ -112,6 +111,13 @@ int drm_ati_pcigart_init(struct drm_device *dev, struct drm_ati_pcigart_info *ga
if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) {
DRM_DEBUG("PCI: no table in VRAM: using normal RAM\n");
+ if (pci_set_dma_mask(dev->pdev, gart_info->table_mask)) {
+ DRM_ERROR("fail to set dma mask to 0x%Lx\n",
+ gart_info->table_mask);
+ ret = 1;
+ goto done;
+ }
+
ret = drm_ati_alloc_pcigart_table(dev, gart_info);
if (ret) {
DRM_ERROR("cannot allocate PCI GART page!\n");
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 3d09e30..8417cc4 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -326,7 +326,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
* As we're limiting the address to 2^32-1 (or less),
* casting it down to 32 bits is no problem, but we
* need to point to a 64bit variable first. */
- dmah = drm_pci_alloc(dev, map->size, map->size, 0xffffffffUL);
+ dmah = drm_pci_alloc(dev, map->size, map->size);
if (!dmah) {
kfree(map);
return -ENOMEM;
@@ -885,7 +885,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
while (entry->buf_count < count) {
- dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000, 0xfffffffful);
+ dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000);
if (!dmah) {
/* Set count correctly so we free the proper amount. */
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 577094f..e68ebf9 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -47,8 +47,7 @@
/**
* \brief Allocate a PCI consistent memory block, for DMA.
*/
-drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align,
- dma_addr_t maxaddr)
+drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align)
{
drm_dma_handle_t *dmah;
#if 1
@@ -63,11 +62,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
if (align > size)
return NULL;
- if (pci_set_dma_mask(dev->pdev, maxaddr) != 0) {
- DRM_ERROR("Setting pci dma mask failed\n");
- return NULL;
- }
-
dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL);
if (!dmah)
return NULL;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 701bfea..02607ed 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -123,7 +123,7 @@ static int i915_init_phys_hws(struct drm_device *dev)
drm_i915_private_t *dev_priv = dev->dev_private;
/* Program Hardware Status Page */
dev_priv->status_page_dmah =
- drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, 0xffffffff);
+ drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE);
if (!dev_priv->status_page_dmah) {
DRM_ERROR("Can not allocate hardware status page\n");
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8c463cf..0d81f88 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4708,7 +4708,7 @@ int i915_gem_init_phys_object(struct drm_device *dev,
phys_obj->id = id;
- phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
+ phys_obj->handle = drm_pci_alloc(dev, size, 0);
if (!phys_obj->handle) {
ret = -ENOMEM;
goto kfree_obj;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 71dafb6..ffac157 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1408,7 +1408,7 @@ extern int drm_ati_pcigart_cleanup(struct drm_device *dev,
struct drm_ati_pcigart_info * gart_info);
extern drm_dma_handle_t *drm_pci_alloc(struct drm_device *dev, size_t size,
- size_t align, dma_addr_t maxaddr);
+ size_t align);
extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20100104/f9f70706/attachment.sig>
More information about the Intel-gfx
mailing list