[Nouveau] [PATCH 1/2] drm/nouveau: move io_reserve_lru handling into the driver v2

Christian König ckoenig.leichtzumerken at gmail.com
Thu Feb 13 12:58:42 UTC 2020


Hi Ben,

sorry for the delayed response. Haven been rather busy recently.

Am 28.01.20 um 06:49 schrieb Ben Skeggs:
> On Sat, 25 Jan 2020 at 00:30, Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> From: Christian König <ckoenig.leichtzumerken at gmail.com>
>>
>> While working on TTM cleanups I've found that the io_reserve_lru used by
>> Nouveau is actually not working at all.
>>
>> In general we should remove driver specific handling from the memory
>> management, so this patch moves the io_reserve_lru handling into Nouveau
>> instead.
>>
>> The patch should be functional correct, but is only compile tested!
> NACK on this as it currently stands.  It not only causes invalid io
> accesses somehow on module load, but while attempting to track down
> why, I realised there's a more severe issue.  This removes the
> distinction between kmap() and mapping into userspace, the former of
> which should not be placed onto the LRU as an eviction candidate.

Yeah, I already feared that this won't work of hand. It's just a bit 
hard to write stuff without being able to test it.

> We *do* require the LRU, so it's not something that can just be
> dropped completely.  There's a user report where they're getting a
> SIGBUS due to the bug you noticed causing it to not work right now.

Well that you require an LRU is obvious. But since this is completely 
Nouveau specificthat handling doesn't needs to be and actually should 
not be in TTM.

And as Daniel pointed out the current handling in the combination of TTM 
and Nouveau is fundamentally broken.

Userspace can either always force other applications into a SIGBUS or 
the page faulting into a live lock. Both of that is rather bad.


To outline how this could work correctly:

1. You split up your PCIe BAR into pages/segments of fixed size. I'm not 
sure what the NVidia hardware can handle, but I would use something like 
2MB segments for that to match the x86 huge page size.

2. Whenever you get a page fault or a kmap request for a buffer you walk 
your LRU and grab a free segment.

3. kmap requests obviously are not put back on the LRU (or are skipped 
while grabbed). Segments for page fault requests are put on the LRU 
immediately again.


The idea here is that you have at least enough segments (in this example 
256 MB BAR / 2 MB segment size = 128 segments) so that you always have 
more segments than page faults can happen at the same time.

I'm completely fine with keeping the code around for now, but as 
outlined above the whole handling is rather broken.

Regards,
Christian.

>
> Ben.
>
>> v2: don't call ttm_bo_unmap_virtual in nouveau_ttm_io_mem_reserve
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> Acked-by: Daniel Vetter <daniel.vetter at ffwll.ch>
>> ---
>>   drivers/gpu/drm/nouveau/nouveau_bo.c  | 107 ++++++++++++++++++++------
>>   drivers/gpu/drm/nouveau/nouveau_bo.h  |   3 +
>>   drivers/gpu/drm/nouveau/nouveau_drv.h |   2 +
>>   drivers/gpu/drm/nouveau/nouveau_ttm.c |  43 ++++++++++-
>>   4 files changed, 131 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> index 81668104595f..acee054f77ed 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> @@ -137,6 +137,7 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
>>          struct nouveau_bo *nvbo = nouveau_bo(bo);
>>
>>          WARN_ON(nvbo->pin_refcnt > 0);
>> +       nouveau_bo_del_io_reserve_lru(bo);
>>          nv10_bo_put_tile_region(dev, nvbo->tile, NULL);
>>
>>          /*
>> @@ -304,6 +305,7 @@ nouveau_bo_init(struct nouveau_bo *nvbo, u64 size, int align, u32 flags,
>>
>>          nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
>>          nouveau_bo_placement_set(nvbo, flags, 0);
>> +       INIT_LIST_HEAD(&nvbo->io_reserve_lru);
>>
>>          ret = ttm_bo_init(nvbo->bo.bdev, &nvbo->bo, size, type,
>>                            &nvbo->placement, align >> PAGE_SHIFT, false,
>> @@ -574,6 +576,26 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
>>                                          PAGE_SIZE, DMA_FROM_DEVICE);
>>   }
>>
>> +void nouveau_bo_add_io_reserve_lru(struct ttm_buffer_object *bo)
>> +{
>> +       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>> +       struct nouveau_bo *nvbo = nouveau_bo(bo);
>> +
>> +       mutex_lock(&drm->ttm.io_reserve_mutex);
>> +       list_move_tail(&nvbo->io_reserve_lru, &drm->ttm.io_reserve_lru);
>> +       mutex_unlock(&drm->ttm.io_reserve_mutex);
>> +}
>> +
>> +void nouveau_bo_del_io_reserve_lru(struct ttm_buffer_object *bo)
>> +{
>> +       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>> +       struct nouveau_bo *nvbo = nouveau_bo(bo);
>> +
>> +       mutex_lock(&drm->ttm.io_reserve_mutex);
>> +       list_del_init(&nvbo->io_reserve_lru);
>> +       mutex_unlock(&drm->ttm.io_reserve_mutex);
>> +}
>> +
>>   int
>>   nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>>                      bool no_wait_gpu)
>> @@ -675,8 +697,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
>>                          }
>>
>>                          man->func = &nouveau_vram_manager;
>> -                       man->io_reserve_fastpath = false;
>> -                       man->use_io_reserve_lru = true;
>>                  } else {
>>                          man->func = &ttm_bo_manager_func;
>>                  }
>> @@ -1305,6 +1325,8 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
>>          if (bo->destroy != nouveau_bo_del_ttm)
>>                  return;
>>
>> +       nouveau_bo_del_io_reserve_lru(bo);
>> +
>>          if (mem && new_reg->mem_type != TTM_PL_SYSTEM &&
>>              mem->mem.page == nvbo->page) {
>>                  list_for_each_entry(vma, &nvbo->vma_list, head) {
>> @@ -1427,6 +1449,30 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
>>                                            filp->private_data);
>>   }
>>
>> +static void
>> +nouveau_ttm_io_mem_free_locked(struct nouveau_drm *drm, struct ttm_mem_reg *reg)
>> +{
>> +       struct nouveau_mem *mem = nouveau_mem(reg);
>> +
>> +       if (!reg->bus.base)
>> +               return; /* already freed */
>> +
>> +       if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) {
>> +               switch (reg->mem_type) {
>> +               case TTM_PL_TT:
>> +                       if (mem->kind)
>> +                               nvif_object_unmap_handle(&mem->mem.object);
>> +                       break;
>> +               case TTM_PL_VRAM:
>> +                       nvif_object_unmap_handle(&mem->mem.object);
>> +                       break;
>> +               default:
>> +                       break;
>> +               }
>> +       }
>> +       reg->bus.base = 0;
>> +}
>> +
>>   static int
>>   nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
>>   {
>> @@ -1434,18 +1480,26 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
>>          struct nouveau_drm *drm = nouveau_bdev(bdev);
>>          struct nvkm_device *device = nvxx_device(&drm->client.device);
>>          struct nouveau_mem *mem = nouveau_mem(reg);
>> +       struct nouveau_bo *nvbo;
>> +       int ret;
>> +
>> +       if (reg->bus.base)
>> +               return 0; /* already mapped */
>>
>>          reg->bus.addr = NULL;
>>          reg->bus.offset = 0;
>>          reg->bus.size = reg->num_pages << PAGE_SHIFT;
>> -       reg->bus.base = 0;
>>          reg->bus.is_iomem = false;
>>          if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
>>                  return -EINVAL;
>> +
>> +       mutex_lock(&drm->ttm.io_reserve_mutex);
>> +retry:
>>          switch (reg->mem_type) {
>>          case TTM_PL_SYSTEM:
>>                  /* System memory */
>> -               return 0;
>> +               ret = 0;
>> +               goto out;
>>          case TTM_PL_TT:
>>   #if IS_ENABLED(CONFIG_AGP)
>>                  if (drm->agp.bridge) {
>> @@ -1469,7 +1523,6 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
>>                          } args;
>>                          u64 handle, length;
>>                          u32 argc = 0;
>> -                       int ret;
>>
>>                          switch (mem->mem.object.oclass) {
>>                          case NVIF_CLASS_MEM_NV50:
>> @@ -1493,38 +1546,46 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
>>                          ret = nvif_object_map_handle(&mem->mem.object,
>>                                                       &args, argc,
>>                                                       &handle, &length);
>> -                       if (ret != 1)
>> -                               return ret ? ret : -EINVAL;
>> +                       if (ret != 1) {
>> +                               ret = ret ? ret : -EINVAL;
>> +                               goto out;
>> +                       }
>> +                       ret = 0;
>>
>>                          reg->bus.base = 0;
>>                          reg->bus.offset = handle;
>>                  }
>>                  break;
>>          default:
>> -               return -EINVAL;
>> +               ret = -EINVAL;
>>          }
>> -       return 0;
>> +
>> +out:
>> +       if (ret == -EAGAIN) {
>> +               nvbo = list_first_entry_or_null(&drm->ttm.io_reserve_lru,
>> +                                               typeof(*nvbo),
>> +                                               io_reserve_lru);
>> +               if (nvbo) {
>> +                       list_del_init(&nvbo->io_reserve_lru);
>> +                       drm_vma_node_unmap(&nvbo->bo.base.vma_node,
>> +                                          bdev->dev_mapping);
>> +                       nouveau_ttm_io_mem_free_locked(drm, &nvbo->bo.mem);
>> +                       goto retry;
>> +               }
>> +
>> +       }
>> +       mutex_unlock(&drm->ttm.io_reserve_mutex);
>> +       return ret;
>>   }
>>
>>   static void
>>   nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
>>   {
>>          struct nouveau_drm *drm = nouveau_bdev(bdev);
>> -       struct nouveau_mem *mem = nouveau_mem(reg);
>>
>> -       if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) {
>> -               switch (reg->mem_type) {
>> -               case TTM_PL_TT:
>> -                       if (mem->kind)
>> -                               nvif_object_unmap_handle(&mem->mem.object);
>> -                       break;
>> -               case TTM_PL_VRAM:
>> -                       nvif_object_unmap_handle(&mem->mem.object);
>> -                       break;
>> -               default:
>> -                       break;
>> -               }
>> -       }
>> +       mutex_lock(&drm->ttm.io_reserve_mutex);
>> +       nouveau_ttm_io_mem_free_locked(drm, reg);
>> +       mutex_unlock(&drm->ttm.io_reserve_mutex);
>>   }
>>
>>   static int
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
>> index 38f9d8350963..c47fcdf80ade 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
>> @@ -17,6 +17,7 @@ struct nouveau_bo {
>>          bool force_coherent;
>>          struct ttm_bo_kmap_obj kmap;
>>          struct list_head head;
>> +       struct list_head io_reserve_lru;
>>
>>          /* protected by ttm_bo_reserve() */
>>          struct drm_file *reserved_by;
>> @@ -92,6 +93,8 @@ int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
>>                           bool no_wait_gpu);
>>   void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
>>   void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
>> +void nouveau_bo_add_io_reserve_lru(struct ttm_buffer_object *bo);
>> +void nouveau_bo_del_io_reserve_lru(struct ttm_buffer_object *bo);
>>
>>   /* TODO: submit equivalent to TTM generic API upstream? */
>>   static inline void __iomem *
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
>> index da8c46e09943..cd19c8ce5939 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
>> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
>> @@ -158,6 +158,8 @@ struct nouveau_drm {
>>                  int type_vram;
>>                  int type_host[2];
>>                  int type_ncoh[2];
>> +               struct mutex io_reserve_mutex;
>> +               struct list_head io_reserve_lru;
>>          } ttm;
>>
>>          /* GEM interface support */
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
>> index 77a0c6ad3cef..50518b48e9b4 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
>> @@ -162,13 +162,51 @@ const struct ttm_mem_type_manager_func nv04_gart_manager = {
>>          .debug = nouveau_manager_debug
>>   };
>>
>> +static vm_fault_t nouveau_ttm_fault(struct vm_fault *vmf)
>> +{
>> +       struct vm_area_struct *vma = vmf->vma;
>> +       struct ttm_buffer_object *bo = vma->vm_private_data;
>> +       pgprot_t prot;
>> +       vm_fault_t ret;
>> +
>> +       ret = ttm_bo_vm_reserve(bo, vmf);
>> +       if (ret)
>> +               return ret;
>> +
>> +       nouveau_bo_del_io_reserve_lru(bo);
>> +
>> +       prot = vm_get_page_prot(vma->vm_flags);
>> +       ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
>> +       if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
>> +               return ret;
>> +
>> +       nouveau_bo_add_io_reserve_lru(bo);
>> +
>> +       dma_resv_unlock(bo->base.resv);
>> +
>> +       return ret;
>> +}
>> +
>> +static struct vm_operations_struct nouveau_ttm_vm_ops = {
>> +       .fault = nouveau_ttm_fault,
>> +       .open = ttm_bo_vm_open,
>> +       .close = ttm_bo_vm_close,
>> +       .access = ttm_bo_vm_access
>> +};
>> +
>>   int
>>   nouveau_ttm_mmap(struct file *filp, struct vm_area_struct *vma)
>>   {
>>          struct drm_file *file_priv = filp->private_data;
>>          struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev);
>> +       int ret;
>>
>> -       return ttm_bo_mmap(filp, vma, &drm->ttm.bdev);
>> +       ret = ttm_bo_mmap(filp, vma, &drm->ttm.bdev);
>> +       if (ret)
>> +               return ret;
>> +
>> +       vma->vm_ops = &nouveau_ttm_vm_ops;
>> +       return 0;
>>   }
>>
>>   static int
>> @@ -273,6 +311,9 @@ nouveau_ttm_init(struct nouveau_drm *drm)
>>                  return ret;
>>          }
>>
>> +       mutex_init(&drm->ttm.io_reserve_mutex);
>> +       INIT_LIST_HEAD(&drm->ttm.io_reserve_lru);
>> +
>>          NV_INFO(drm, "VRAM: %d MiB\n", (u32)(drm->gem.vram_available >> 20));
>>          NV_INFO(drm, "GART: %d MiB\n", (u32)(drm->gem.gart_available >> 20));
>>          return 0;
>> --
>> 2.17.1
>>
>> _______________________________________________
>> Nouveau mailing list
>> Nouveau at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/nouveau



More information about the dri-devel mailing list