[Intel-gfx] [PATCH] intel: Add more intermediate sizes of cache buckets between powers of 2.
Eric Anholt
eric at anholt.net
Mon Jun 7 19:35:38 CEST 2010
On Mon, 7 Jun 2010 13:54:05 +1000 (EST), "Robert Lowery" <rglowery at exemail.com.au> wrote:
> > We had two cases recently where the rounding to powers of two hurt
> > badly: 4:2:0 YUV HD video frames would round up from 2.2MB to 4MB,
> > Urban Terror was hitting aperture size limitations. Mipmap trees for
> > power of two sizes will land right in the middle between two cache
> > buckets.
> >
> > By giving a few more sizes between powers of two, Urban Terror on my
> > 945 ends up consuming 207MB of GEM objects instead of 272MB.
> > ---
> > intel/intel_bufmgr_gem.c | 64
> > +++++++++++++++++++++++++++++++++++-----------
> > 1 files changed, 49 insertions(+), 15 deletions(-)
> >
> > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> > index b76fd7e..c3e189e 100644
> > --- a/intel/intel_bufmgr_gem.c
> > +++ b/intel/intel_bufmgr_gem.c
> > @@ -66,6 +66,8 @@
> > fprintf(stderr, __VA_ARGS__); \
> > } while (0)
> >
> > +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
> > +
> > typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
> >
> > struct drm_intel_gem_bo_bucket {
> > @@ -73,10 +75,6 @@ struct drm_intel_gem_bo_bucket {
> > unsigned long size;
> > };
> >
> > -/* Only cache objects up to 64MB. Bigger than that, and the rounding of
> > the
> > - * size makes many operations fail that wouldn't otherwise.
> > - */
> > -#define DRM_INTEL_GEM_BO_BUCKETS 14
> > typedef struct _drm_intel_bufmgr_gem {
> > drm_intel_bufmgr bufmgr;
> >
> > @@ -93,7 +91,8 @@ typedef struct _drm_intel_bufmgr_gem {
> > int exec_count;
> >
> > /** Array of lists of cached gem objects of power-of-two sizes */
> > - struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS];
> > + struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
> > + int num_buckets;
> >
> > uint64_t gtt_size;
> > int available_fences;
> > @@ -285,7 +284,7 @@ drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem
> > *bufmgr_gem,
> > {
> > int i;
> >
> > - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
> > + for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> > struct drm_intel_gem_bo_bucket *bucket =
> > &bufmgr_gem->cache_bucket[i];
> > if (bucket->size >= size) {
> > @@ -822,7 +821,7 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem
> > *bufmgr_gem, time_t time)
> > {
> > int i;
> >
> > - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
> > + for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> > struct drm_intel_gem_bo_bucket *bucket =
> > &bufmgr_gem->cache_bucket[i];
> >
> > @@ -1250,7 +1249,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr
> > *bufmgr)
> > pthread_mutex_destroy(&bufmgr_gem->lock);
> >
> > /* Free any cached buffer objects we were going to reuse */
> > - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
> > + for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> > struct drm_intel_gem_bo_bucket *bucket =
> > &bufmgr_gem->cache_bucket[i];
> > drm_intel_bo_gem *bo_gem;
> > @@ -1960,6 +1959,46 @@ drm_intel_gem_bo_references(drm_intel_bo *bo,
> > drm_intel_bo *target_bo)
> > return 0;
> > }
> >
> > +static void
> > +add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
> > +{
> > + unsigned int i = bufmgr_gem->num_buckets;
> > +
> > + assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
> > +
> > + DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
> > + bufmgr_gem->cache_bucket[i].size = size;
> > + bufmgr_gem->num_buckets++;
> > +}
> > +
> > +static void
> > +init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
> > +{
> > + unsigned long size, cache_max_size = 64 * 1024 * 1024;
> > +
> > + /* Initialize the linked lists for BO reuse cache. */
> > + for (size = 4096; size <= cache_max_size; size *= 2) {
> > + add_bucket(bufmgr_gem, size);
> > +
> > + /* OK, so power of two buckets was too wasteful of
> > + * memory. Give 3 other sizes between each power of
> > + * two, to hopefully cover things accurately enough.
> > + * (The alternative is probably to just go for exact
> > + * matching of sizes, and assume that for things like
> > + * composited window resize the tiled width/height
> > + * alignment and rounding of sizes to pages will get
> > + * us useful cache hit rates anyway)
> > + */
> > + if (size == 8192) {
> > + add_bucket(bufmgr_gem, size + size / 2);
> > + } else if (size < cache_max_size) {
> > + add_bucket(bufmgr_gem, size + size * 1 / 4);
> > + add_bucket(bufmgr_gem, size + size * 2 / 4);
> > + add_bucket(bufmgr_gem, size + size * 3 / 4);
> > + }
> > + }
> > +}
> > +
>
> Are bucket sizes that are not a multiple of 4096 supported and/or worthwhile?
>
> The above code will create buckets size 5120, 6144 and 7168 in the size =
> 4096 case.
>
> Apologies for the noise if I'm off the mark here.
Yeah, krh noted this bug on irc. I'm looking at the alternate patch to
not use buckets at all currently.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20100607/f256a71b/attachment.sig>
More information about the Intel-gfx
mailing list