[Intel-gfx] [PATCH i-g-t 07/10] gem_wsim: Log max and active working set sizes in verbose mode

Chris Wilson chris at chris-wilson.co.uk
Wed Jun 17 17:07:22 UTC 2020


Quoting Tvrtko Ursulin (2020-06-17 17:01:17)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> It is useful to know how much memory workload is allocating.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  benchmarks/gem_wsim.c | 100 +++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 95 insertions(+), 5 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index 025385a144b8..96ee923fb699 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -852,7 +852,8 @@ static uint64_t engine_list_mask(const char *_str)
>         return mask;
>  }
>  
> -static void allocate_working_set(struct workload *wrk, struct working_set *set);
> +static unsigned long
> +allocate_working_set(struct workload *wrk, struct working_set *set);
>  
>  static long __duration(long dur, double scale)
>  {
> @@ -1270,8 +1271,14 @@ add_step:
>          * Allocate shared working sets.
>          */
>         for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
> -               if (w->type == WORKINGSET && w->working_set.shared)
> -                       allocate_working_set(wrk, &w->working_set);
> +               if (w->type == WORKINGSET && w->working_set.shared) {
> +                       unsigned long total =
> +                               allocate_working_set(wrk, &w->working_set);
> +
> +                       if (verbose > 1)
> +                               printf("%u: %lu bytes in shared working set %u\n",
> +                                      wrk->id, total, w->working_set.id);
> +               }
>         }

The total total might be nice; although that doesn't reflect usage so
might be misleading as to what is the active RSS is at any time.
  
>         wrk->max_working_set_id = -1;
> @@ -1731,8 +1738,10 @@ get_buffer_size(struct workload *wrk, const struct work_buffer_size *sz)
>                        (sz->max + 1 - sz->min);
>  }
>  
> -static void allocate_working_set(struct workload *wrk, struct working_set *set)
> +static unsigned long
> +allocate_working_set(struct workload *wrk, struct working_set *set)
>  {
> +       unsigned long total = 0;
>         unsigned int i;
>  
>         set->handles = calloc(set->nr, sizeof(*set->handles));
> @@ -1741,7 +1750,82 @@ static void allocate_working_set(struct workload *wrk, struct working_set *set)
>         for (i = 0; i < set->nr; i++) {
>                 set->sizes[i].size = get_buffer_size(wrk, &set->sizes[i]);
>                 set->handles[i] = alloc_bo(fd, set->sizes[i].size);
> +               total += set->sizes[i].size;
> +       }
> +
> +       return total;
> +}
> +
> +static bool
> +find_dep(struct dep_entry *deps, unsigned int nr, struct dep_entry dep)
> +{
> +       unsigned int i;
> +
> +       for (i = 0; i < nr; i++) {
> +               if (deps[i].working_set == dep.working_set &&
> +                   deps[i].target == dep.target)
> +                       return true;
>         }
> +
> +       return false;
> +}
> +
> +static void measure_active_set(struct workload *wrk)
> +{
> +       unsigned long total = 0, batch_sizes = 0;
> +       struct dep_entry *deps = NULL;
> +       unsigned int nr = 0, i, j;
> +       struct w_step *w;
> +
> +       if (verbose < 3)
> +               return;
> +
> +       for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
> +               if (w->type != BATCH)
> +                       continue;
> +
> +               batch_sizes += w->bb_sz;
> +
> +               for (j = 0; j < w->data_deps.nr; j++) {
> +                       struct dep_entry *dep = &w->data_deps.list[j];
> +                       struct dep_entry _dep = *dep;
> +
> +                       if (dep->working_set == -1 && dep->target < 0) {
> +                               int idx = w->idx + dep->target;
> +
> +                               igt_assert(idx >= 0 && idx < w->idx);
> +                               igt_assert(wrk->steps[idx].type == BATCH);
> +
> +                               _dep.target = wrk->steps[idx].obj[0].handle;
> +                       }
> +
> +                       if (!find_dep(deps, nr, _dep)) {
> +                               if (dep->working_set == -1) {
> +                                       total += 4096;
> +                               } else {
> +                                       struct working_set *set;
> +
> +                                       igt_assert(dep->working_set <=
> +                                                  wrk->max_working_set_id);
> +
> +                                       set = wrk->working_sets[dep->working_set];
> +                                       igt_assert(set->nr);
> +                                       igt_assert(dep->target < set->nr);
> +                                       igt_assert(set->sizes[dep->target].size);
> +
> +                                       total += set->sizes[dep->target].size;
> +                               }
> +
> +                               deps = realloc(deps, (nr + 1) * sizeof(*deps));
> +                               deps[nr++] = *dep;
> +                       }
> +               }
> +       }

So a sum of all the unique handles used by all the steps.
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris


More information about the Intel-gfx mailing list