[Nouveau] [PATCH 1/2] nouveau: make nouveau importing global buffers completely thread-safe, with tests
Emil Velikov
emil.l.velikov at gmail.com
Wed Feb 25 06:14:36 PST 2015
On 24 February 2015 at 09:01, Maarten Lankhorst
<maarten.lankhorst at ubuntu.com> wrote:
> While I've closed off most races in a previous patch, a small race still existed
> where importing then unreffing cound cause an invalid bo. Add a test for this case.
>
> Racing sequence fixed:
>
> - thread 1 releases bo, refcount drops to zero, blocks on acquiring nvdev->lock.
> - thread 2 increases refcount to 1.
> - thread 2 decreases refcount to zero, blocks on acquiring nvdev->lock.
>
> At this point the 2 threads will clean up the same bo.
>
> How is this fixed? When thread 2 increases refcount to 1 it removes
> the bo from the list, and creates a new bo. The original thread
> will notice refcount was increased to 1 and skip deletion from list
> and closing the handle.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at ubuntu.com>
> ---
> configure.ac | 1 +
> nouveau/nouveau.c | 69 +++++++++++------------
> tests/Makefile.am | 4 ++
> tests/nouveau/.gitignore | 1 +
> tests/nouveau/Makefile.am | 15 +++++
> tests/nouveau/threaded.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++
> xf86atomic.h | 6 +-
> 7 files changed, 198 insertions(+), 38 deletions(-)
> create mode 100644 tests/nouveau/.gitignore
> create mode 100644 tests/nouveau/Makefile.am
> create mode 100644 tests/nouveau/threaded.c
>
> diff --git a/configure.ac b/configure.ac
> index 8afee83..6dc5044 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -441,6 +441,7 @@ AC_CONFIG_FILES([
> tests/vbltest/Makefile
> tests/exynos/Makefile
> tests/tegra/Makefile
> + tests/nouveau/Makefile
> man/Makefile
> libdrm.pc])
> AC_OUTPUT
> diff --git a/nouveau/nouveau.c b/nouveau/nouveau.c
> index c6c153a..1c723b9 100644
> --- a/nouveau/nouveau.c
> +++ b/nouveau/nouveau.c
> @@ -351,29 +351,18 @@ nouveau_bo_del(struct nouveau_bo *bo)
>
> pthread_mutex_lock(&nvdev->lock);
> if (nvbo->name) {
> - if (atomic_read(&nvbo->refcnt)) {
> + if (atomic_read(&nvbo->refcnt) == 0) {
> + DRMLISTDEL(&nvbo->head);
> /*
> - * bo has been revived by a race with
> - * nouveau_bo_prime_handle_ref, or nouveau_bo_name_ref.
> - *
> - * In theory there's still a race possible with
> - * nouveau_bo_wrap, but when using this function
> - * the lifetime of the handle is probably already
> - * handled in another way. If there are races
> - * you're probably using nouveau_bo_wrap wrong.
> + * This bo has to be closed with the lock held because
> + * gem handles are not refcounted. If a shared bo is
> + * closed and re-opened in another thread a race
> + * against DRM_IOCTL_GEM_OPEN or drmPrimeFDToHandle
> + * might cause the bo to be closed accidentally while
> + * re-importing.
> */
> - pthread_mutex_unlock(&nvdev->lock);
> - return;
> + drmIoctl(bo->device->fd, DRM_IOCTL_GEM_CLOSE, &req);
> }
> - DRMLISTDEL(&nvbo->head);
> - /*
> - * This bo has to be closed with the lock held because gem
> - * handles are not refcounted. If a shared bo is closed and
> - * re-opened in another thread a race against
> - * DRM_IOCTL_GEM_OPEN or drmPrimeFDToHandle might cause the
> - * bo to be closed accidentally while re-importing.
> - */
> - drmIoctl(bo->device->fd, DRM_IOCTL_GEM_CLOSE, &req);
> pthread_mutex_unlock(&nvdev->lock);
> } else {
> DRMLISTDEL(&nvbo->head);
> @@ -418,7 +407,7 @@ nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, uint32_t align,
>
> static int
> nouveau_bo_wrap_locked(struct nouveau_device *dev, uint32_t handle,
> - struct nouveau_bo **pbo)
> + struct nouveau_bo **pbo, int name)
> {
> struct nouveau_device_priv *nvdev = nouveau_device(dev);
> struct drm_nouveau_gem_info req = { .handle = handle };
> @@ -427,8 +416,24 @@ nouveau_bo_wrap_locked(struct nouveau_device *dev, uint32_t handle,
>
> DRMLISTFOREACHENTRY(nvbo, &nvdev->bo_list, head) {
> if (nvbo->base.handle == handle) {
> - *pbo = NULL;
> - nouveau_bo_ref(&nvbo->base, pbo);
> + if (atomic_inc_return(&nvbo->refcnt) == 1) {
> + /*
> + * Uh oh, this bo is dead and someone else
> + * will free it, but because refcnt is
> + * now non-zero fortunately they won't
> + * call the ioctl to close the bo.
> + *
> + * Remove this bo from the list so other
> + * calls to nouveau_bo_wrap_locked will
> + * see our replacement nvbo.
> + */
> + DRMLISTDEL(&nvbo->head);
> + if (!name)
> + name = nvbo->name;
> + break;
> + }
> +
> + *pbo = &nvbo->base;
> return 0;
> }
> }
> @@ -443,6 +448,7 @@ nouveau_bo_wrap_locked(struct nouveau_device *dev, uint32_t handle,
> atomic_set(&nvbo->refcnt, 1);
> nvbo->base.device = dev;
> abi16_bo_info(&nvbo->base, &req);
> + nvbo->name = name;
> DRMLISTADD(&nvbo->head, &nvdev->bo_list);
> *pbo = &nvbo->base;
> return 0;
> @@ -458,7 +464,7 @@ nouveau_bo_wrap(struct nouveau_device *dev, uint32_t handle,
> struct nouveau_device_priv *nvdev = nouveau_device(dev);
> int ret;
> pthread_mutex_lock(&nvdev->lock);
> - ret = nouveau_bo_wrap_locked(dev, handle, pbo);
> + ret = nouveau_bo_wrap_locked(dev, handle, pbo, 0);
> pthread_mutex_unlock(&nvdev->lock);
> return ret;
> }
> @@ -468,24 +474,13 @@ nouveau_bo_name_ref(struct nouveau_device *dev, uint32_t name,
> struct nouveau_bo **pbo)
> {
> struct nouveau_device_priv *nvdev = nouveau_device(dev);
> - struct nouveau_bo_priv *nvbo;
> struct drm_gem_open req = { .name = name };
> int ret;
>
> pthread_mutex_lock(&nvdev->lock);
> - DRMLISTFOREACHENTRY(nvbo, &nvdev->bo_list, head) {
> - if (nvbo->name == name) {
> - *pbo = NULL;
> - nouveau_bo_ref(&nvbo->base, pbo);
> - pthread_mutex_unlock(&nvdev->lock);
> - return 0;
> - }
> - }
> -
> ret = drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req);
> if (ret == 0) {
> - ret = nouveau_bo_wrap_locked(dev, req.handle, pbo);
> - nouveau_bo((*pbo))->name = name;
> + ret = nouveau_bo_wrap_locked(dev, req.handle, pbo, name);
> }
>
> pthread_mutex_unlock(&nvdev->lock);
> @@ -537,7 +532,7 @@ nouveau_bo_prime_handle_ref(struct nouveau_device *dev, int prime_fd,
> pthread_mutex_lock(&nvdev->lock);
> ret = drmPrimeFDToHandle(dev->fd, prime_fd, &handle);
> if (ret == 0) {
> - ret = nouveau_bo_wrap_locked(dev, handle, bo);
> + ret = nouveau_bo_wrap_locked(dev, handle, bo, 0);
> if (!ret) {
> struct nouveau_bo_priv *nvbo = nouveau_bo(*bo);
> if (!nvbo->name) {
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 37b8d3a..38e4094 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -28,6 +28,10 @@ if HAVE_TEGRA
> SUBDIRS += tegra
> endif
>
> +if HAVE_NOUVEAU
> +SUBDIRS += nouveau
> +endif
> +
> if HAVE_LIBUDEV
>
> check_LTLIBRARIES = libdrmtest.la
> diff --git a/tests/nouveau/.gitignore b/tests/nouveau/.gitignore
> new file mode 100644
> index 0000000..837bfb9
> --- /dev/null
> +++ b/tests/nouveau/.gitignore
> @@ -0,0 +1 @@
> +threaded
> diff --git a/tests/nouveau/Makefile.am b/tests/nouveau/Makefile.am
> new file mode 100644
> index 0000000..8e3392e
> --- /dev/null
> +++ b/tests/nouveau/Makefile.am
> @@ -0,0 +1,15 @@
> +AM_CPPFLAGS = \
> + -I$(top_srcdir)/include/drm \
> + -I$(top_srcdir)/nouveau \
> + -I$(top_srcdir)
> +
> +AM_CFLAGS = -Wall -Werror
Please use $(WARN_CFLAGS)
> +AM_LDFLAGS = -pthread
> +
I assume that adding -lpthread to the LDADD below will be better, but
I'm not 100% sure.
> +LDADD = -ldl \
> + ../../nouveau/libdrm_nouveau.la \
> + ../../libdrm.la
> +
Move -ldl at the bottom of the list so that it doesn't bite us in the future.
> +noinst_PROGRAMS = \
> + threaded
If you want you can add this to the installed tests, and/or make check
> +
> diff --git a/tests/nouveau/threaded.c b/tests/nouveau/threaded.c
> new file mode 100644
> index 0000000..490a7fa
> --- /dev/null
> +++ b/tests/nouveau/threaded.c
> @@ -0,0 +1,140 @@
> +/*
> + * Copyright © 2015 Canonical Ltd. (Maarten Lankhorst)
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifdef HAVE_CONFIG_H
> +# include "config.h"
> +#endif
> +
> +#include <sys/ioctl.h>
> +#include <dlfcn.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <pthread.h>
> +
> +#include "xf86drm.h"
> +#include "nouveau.h"
> +
> +static const char default_device[] = "/dev/dri/renderD128";
> +
Reuse the defines in xf86drm.h ?
> +static typeof(ioctl) *old_ioctl;
> +static int failed;
> +
> +static int import_fd;
> +
> +int ioctl(int fd, unsigned long request, ...)
> +{
> + va_list va;
> + int ret;
> + void *arg;
> +
> + va_start(va, request);
> + arg = va_arg(va, void *);
> + ret = old_ioctl(fd, request, arg);
> + va_end(va);
> +
> + if (ret < 0 && request == DRM_IOCTL_GEM_CLOSE && errno == EINVAL)
> + failed = 1;
> +
> + return ret;
> +}
> +
> +static void *
> +openclose(void *dev)
> +{
> + struct nouveau_device *nvdev = dev;
> + struct nouveau_bo *bo = NULL;
> + int i;
> +
> + for (i = 0; i < 100000; ++i) {
> + if (!nouveau_bo_prime_handle_ref(nvdev, import_fd, &bo))
> + nouveau_bo_ref(NULL, &bo);
> + }
> + return NULL;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + drmVersionPtr version;
> + const char *device;
> + int err, fd, fd2;
> + struct nouveau_device *nvdev, *nvdev2;
> + struct nouveau_bo *bo;
> +
> + old_ioctl = dlsym(RTLD_NEXT, "ioctl");
> +
> + pthread_t t1, t2;
> +
> + if (argc < 2)
> + device = default_device;
> + else
> + device = argv[1];
> +
> + fd = open(device, O_RDWR);
> + fd2 = open(device, O_RDWR);
> + if (fd < 0 || fd2 < 0)
> + return 1;
> +
> + version = drmGetVersion(fd);
> + if (version) {
> + printf("Version: %d.%d.%d\n", version->version_major,
> + version->version_minor, version->version_patchlevel);
> + printf(" Name: %s\n", version->name);
> + printf(" Date: %s\n", version->date);
> + printf(" Description: %s\n", version->desc);
> +
> + drmFreeVersion(version);
> + }
> +
> + err = nouveau_device_wrap(fd, 0, &nvdev);
> + if (!err)
> + err = nouveau_device_wrap(fd2, 0, &nvdev2);
> + if (err < 0)
> + return 1;
> +
> + err = nouveau_bo_new(nvdev2, NOUVEAU_BO_GART, 0, 4096, NULL, &bo);
> + if (!err)
> + err = nouveau_bo_set_prime(bo, &import_fd);
> +
> + if (!err) {
> + pthread_create(&t1, NULL, openclose, nvdev);
> + pthread_create(&t2, NULL, openclose, nvdev);
> + }
> +
> + pthread_join(t1, NULL);
> + pthread_join(t2, NULL);
> +
> + close(import_fd);
> + nouveau_bo_ref(NULL, &bo);
> +
> + nouveau_device_del(&nvdev2);
> + nouveau_device_del(&nvdev);
> + close(fd2);
> + close(fd);
> +
> + if (failed)
> + fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed with EINVAL,\n"
> + "race in opening/closing bo is likely.\n");
> +
> + return failed;
> +}
> diff --git a/xf86atomic.h b/xf86atomic.h
> index 8c4b696..66a8d9a 100644
> --- a/xf86atomic.h
> +++ b/xf86atomic.h
> @@ -49,7 +49,8 @@ typedef struct {
> # define atomic_read(x) ((x)->atomic)
> # define atomic_set(x, val) ((x)->atomic = (val))
> # define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
> -# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
> +# define atomic_inc_return(x) (__sync_add_and_fetch (&(x)->atomic, 1))
> +# define atomic_dec_and_test(x) (__sync_add_and_fetch (&(x)->atomic, -1) == 0)
The atomic_dec_and_test change seems like unrelated bugfix. Split it
out perhaps ?
Introduction of atomic_inc_return could/should be a separate commit as well.
> # define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v)))
> # define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v)))
> # define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
> @@ -68,6 +69,7 @@ typedef struct {
> # define atomic_read(x) AO_load_full(&(x)->atomic)
> # define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
> # define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
> +# define atomic_inc_return(x) (AO_fetch_and_add1_full(&(x)->atomic) + 1)
> # define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v)))
> # define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v)))
> # define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
> @@ -91,6 +93,7 @@ typedef struct { LIBDRM_ATOMIC_TYPE atomic; } atomic_t;
> # define atomic_read(x) (int) ((x)->atomic)
> # define atomic_set(x, val) ((x)->atomic = (LIBDRM_ATOMIC_TYPE)(val))
> # define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
> +# define atomic_inc_return (atomic_inc_uint_nv(&(x)->atomic))
> # define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 0)
> # define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v)))
> # define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v)))
> @@ -112,3 +115,4 @@ static inline int atomic_add_unless(atomic_t *v, int add, int unless)
> }
>
> #endif
> +
Extra white-space.
IMHO sending the series for dri-devel might be nice, for at least the
xf86atomic.h changes.
Cheers
Emil
P.S. Bless you dude for going through the lovely experience to fix this.
More information about the Nouveau
mailing list