[RFC] Rebasing the IDR

Daniel Vetter daniel.vetter at ffwll.ch
Thu Nov 30 19:56:43 UTC 2017


Adding dri-devel, I think a pile of those are in drm.

On Thu, Nov 30, 2017 at 6:36 PM, Matthew Wilcox <willy at infradead.org> wrote:
> About 40 of the approximately 180 users of the IDR in the kernel are
> "1-based" instead of "0-based".  That is, they never want to have ID 0
> allocated; they want to see IDs allocated between 1 and N.  Usually, that's
> expressed like this:
>
>         /* Get the user-visible handle using idr. */
>         ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
>
> The current implementation of this grieves me.  You see, we mark each
> node of the radix tree according to whether it has any free entries
> or not, and entry 0 is always free!  If we've already allocated 10,000
> entries from this IDR, and see this call, we'll walk all the way down
> the left side of the tree looking for entry 1, get to the bottom,
> see that entries 1-63 are allocated, then walk up to the next level,
> see that 64-4095 are allocated, walk up to the next level, see that
> 8192-12287 has a free entry, and then start walking down again.
>
> It'd be somewhere around two to three times quicker to know not to
> look down the left hand side of the tree, see that 1-8191 are used and
> start looking in the range 8192-12287.  I considered a function like
> idr_reserve(idr, N) to reserve the first N entries (we have at least one
> consumer in the tree that is 2-based, not 1-based), but that struck me
> as inelegant.  We also have the cool optimisation that if there's only
> one element in the radix tree at offset 0, then we don't even need
> to allocate a node to store it, we just store it right in the head.
> And that optimisation was never available to the 1-based users.
>
> I've come up with this solution instead, idr_base.  It's free in terms
> of memory consumption on 64-bit as it fits in the gap at the end of the
> struct idr.  Essentially, we just subtract off idr_base when looking
> up the ID, and add it back on when reporting the ID.  We need to do some
> saturating arithmetic in idr_get_next(), because starting a walk at 4
> billion or negative 8 quintillion doesn't work out terribly well.
>
> It does require the user to call idr_init_base() instead of idr_init().
> That should be a relatively small conversion effort.  Opinions?  Feedback?
> Better test cases for the test-suite (ahem)?

Just quick context on why we do this: We need to marshal/demarshal
NULL in a ton of our ioctls, mapping that to 0 is natural.

And yes I very much like this, and totally fine with trading an
idr_init_base when we can nuke the explicit index ranges at every
alloc side instead. So if you give me an idr_alloc function that
doesn't take a range as icing, that would be great.

Cheers, Daniel

>
> diff --git a/include/linux/idr.h b/include/linux/idr.h
> index 91d27a9bcdf4..a657b1f925f8 100644
> --- a/include/linux/idr.h
> +++ b/include/linux/idr.h
> @@ -18,6 +18,7 @@
>
>  struct idr {
>         struct radix_tree_root  idr_rt;
> +       unsigned int            idr_base;
>         unsigned int            idr_next;
>  };
>
> @@ -30,9 +31,10 @@ struct idr {
>  /* Set the IDR flag and the IDR_FREE tag */
>  #define IDR_RT_MARKER          ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
>
> -#define IDR_INIT                                                       \
> -{                                                                      \
> -       .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)                        \
> +#define IDR_INIT {                                                     \
> +       .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER),                       \
> +       .idr_base = 0,                                                  \
> +       .idr_next = 0,                                                  \
>  }
>  #define DEFINE_IDR(name)       struct idr name = IDR_INIT
>
> @@ -123,12 +125,20 @@ static inline int __must_check idr_alloc_u32(struct idr *idr, void *ptr,
>
>  static inline void *idr_remove(struct idr *idr, unsigned long id)
>  {
> -       return radix_tree_delete_item(&idr->idr_rt, id, NULL);
> +       return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
>  }
>
>  static inline void idr_init(struct idr *idr)
>  {
>         INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
> +       idr->idr_base = 0;
> +       idr->idr_next = 0;
> +}
> +
> +static inline void idr_init_base(struct idr *idr, int base)
> +{
> +       INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
> +       idr->idr_base = base;
>         idr->idr_next = 0;
>  }
>
> @@ -163,7 +173,7 @@ static inline void idr_preload_end(void)
>   */
>  static inline void *idr_find(const struct idr *idr, unsigned long id)
>  {
> -       return radix_tree_lookup(&idr->idr_rt, id);
> +       return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
>  }
>
>  /**
> diff --git a/lib/idr.c b/lib/idr.c
> index 1aaeb5a8c426..a1d3531bd62f 100644
> --- a/lib/idr.c
> +++ b/lib/idr.c
> @@ -33,21 +33,22 @@ int idr_alloc_ul(struct idr *idr, void *ptr, unsigned long *nextid,
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       int base = idr->idr_base;
>
>         if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
>                 return -EINVAL;
>         if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
>                 idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
>
> -       radix_tree_iter_init(&iter, *nextid);
> -       slot = idr_get_free(&idr->idr_rt, &iter, gfp, max);
> +       radix_tree_iter_init(&iter, *nextid - base);
> +       slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
>         if (IS_ERR(slot))
>                 return PTR_ERR(slot);
>
>         radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
>         radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
>
> -       *nextid = iter.index;
> +       *nextid = iter.index + base;
>         return 0;
>  }
>  EXPORT_SYMBOL_GPL(idr_alloc_ul);
> @@ -143,13 +144,14 @@ int idr_for_each(const struct idr *idr,
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       int base = idr->idr_base;
>
>         radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
>                 int ret;
>
>                 if (WARN_ON_ONCE(iter.index > INT_MAX))
>                         break;
> -               ret = fn(iter.index, rcu_dereference_raw(*slot), data);
> +               ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
>                 if (ret)
>                         return ret;
>         }
> @@ -172,15 +174,19 @@ void *idr_get_next(struct idr *idr, int *nextid)
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       int base = idr->idr_base;
> +       int id = *nextid;
>
> -       slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
> +       id = (id < base) ? 0 : id - base;
> +       slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
>         if (!slot)
>                 return NULL;
> +       id = iter.index + base;
>
> -       if (WARN_ON_ONCE(iter.index > INT_MAX))
> +       if (WARN_ON_ONCE(id > INT_MAX))
>                 return NULL;
>
> -       *nextid = iter.index;
> +       *nextid = id;
>         return rcu_dereference_raw(*slot);
>  }
>  EXPORT_SYMBOL(idr_get_next);
> @@ -199,12 +205,15 @@ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       unsigned long base = idr->idr_base;
> +       unsigned long id = *nextid;
>
> -       slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
> +       id = (id < base) ? 0 : id - base;
> +       slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
>         if (!slot)
>                 return NULL;
>
> -       *nextid = iter.index;
> +       *nextid = iter.index + base;
>         return rcu_dereference_raw(*slot);
>  }
>  EXPORT_SYMBOL(idr_get_next_ul);
> @@ -231,6 +240,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
>
>         if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
>                 return ERR_PTR(-EINVAL);
> +       id -= idr->idr_base;
>
>         entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
>         if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
> diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
> index 36437ade429c..44ef9eba5a7a 100644
> --- a/tools/testing/radix-tree/idr-test.c
> +++ b/tools/testing/radix-tree/idr-test.c
> @@ -153,11 +153,12 @@ void idr_nowait_test(void)
>         idr_destroy(&idr);
>  }
>
> -void idr_get_next_test(void)
> +void idr_get_next_test(int base)
>  {
>         unsigned long i;
>         int nextid;
>         DEFINE_IDR(idr);
> +       idr_init_base(&idr, base);
>
>         int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
>
> @@ -244,7 +245,9 @@ void idr_checks(void)
>         idr_alloc_test();
>         idr_null_test();
>         idr_nowait_test();
> -       idr_get_next_test();
> +       idr_get_next_test(0);
> +       idr_get_next_test(1);
> +       idr_get_next_test(4);
>  }
>
>  /*



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


More information about the dri-devel mailing list