[PATCH] drm/radeon: Adding UVD handle basis fps estimation v2

Alex Deucher alexdeucher at gmail.com
Mon Aug 11 07:52:21 PDT 2014


On Mon, Aug 11, 2014 at 5:08 AM, Christian König
<deathsimple at vodafone.de> wrote:
> Am 07.08.2014 um 21:43 schrieb Alex Deucher:
>
>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König
>> <deathsimple at vodafone.de> wrote:
>>>
>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher:
>>>
>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König
>>>> <deathsimple at vodafone.de>
>>>> wrote:
>>>>>
>>>>> From: Marco A Benatto <marco.antonio.780 at gmail.com>
>>>>>
>>>>> Adding a Frames Per Second estimation logic on UVD handles
>>>>> when it has being used. This estimation is per handle basis
>>>>> and will help on DPM profile calculation.
>>>>>
>>>>> v2 (chk): fix timestamp type, move functions around and
>>>>>             cleanup code a bit.
>>>>
>>>> Will this really help much?  I thought the problem was mainly due to
>>>> sclk and mclk for post processing.
>>>
>>>
>>> It should at least handle the UVD side for upclocking when you get a lot
>>> of
>>> streams / fps. And at on my NI the patch seems to do exactly that.
>>>
>>> Switching sclk and mclk for post processing is a different task, and I
>>> actually have no idea what to do with them.
>>
>> At this point we always choose the plain UVD state anyway so this
>> patch would only take effect if we re-enabled the dynamic UVD state
>> selection.
>
>
> Hui? I thought we already re-enabled the dynamic UVD state selection, but
> double checking this I found it disabled again.
>
> What was the problem with that? Looks like I somehow missed the discussion
> around it.

We did, but after doing so a number of people complained about a
regression on IRC because when apps like xmbc enabled post processing,
performance went down.

Alex


>
> Christian.
>
>
>> For the post processing, we probably need a hint we can
>> pass to the driver in the CS ioctl to denote what state we need.
>> Although if we did that, this could would largely be moot.  That said,
>> newer asics support dynamic UVD clocks so we really only need
>> something like that for older asics and I guess VCE.
>>
>> Alex
>>
>>> Christian.
>>>
>>>
>>>> Alex
>>>>
>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780 at gmail.com>
>>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/radeon/radeon.h     | 10 ++++++
>>>>>    drivers/gpu/drm/radeon/radeon_uvd.c | 64
>>>>> +++++++++++++++++++++++++++++++++----
>>>>>    2 files changed, 68 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>>>> b/drivers/gpu/drm/radeon/radeon.h
>>>>> index 9e1732e..e92f6cb 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct
>>>>> radeon_device
>>>>> *rdev,
>>>>>    #define RADEON_UVD_STACK_SIZE  (1024*1024)
>>>>>    #define RADEON_UVD_HEAP_SIZE   (1024*1024)
>>>>>
>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8
>>>>> +#define RADEON_UVD_DEFAULT_FPS 60
>>>>> +
>>>>> +struct radeon_uvd_fps {
>>>>> +       uint64_t        timestamp;
>>>>> +       uint8_t         event_index;
>>>>> +       uint8_t         events[RADEON_UVD_FPS_EVENTS_MAX];
>>>>> +};
>>>>> +
>>>>>    struct radeon_uvd {
>>>>>           struct radeon_bo        *vcpu_bo;
>>>>>           void                    *cpu_addr;
>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd {
>>>>>           struct drm_file         *filp[RADEON_MAX_UVD_HANDLES];
>>>>>           unsigned                img_size[RADEON_MAX_UVD_HANDLES];
>>>>>           struct delayed_work     idle_work;
>>>>> +       struct radeon_uvd_fps   fps_info[RADEON_MAX_UVD_HANDLES];
>>>>>    };
>>>>>
>>>>>    int radeon_uvd_init(struct radeon_device *rdev);
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c
>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c
>>>>> index 6bf55ec..ef5667a 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c
>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c
>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct
>>>>> radeon_bo *rbo)
>>>>>           rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
>>>>>    }
>>>>>
>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev,
>>>>> int
>>>>> idx)
>>>>> +{
>>>>> +       struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx];
>>>>> +       unsigned i;
>>>>> +
>>>>> +       fps->timestamp = jiffies_64;
>>>>> +       fps->event_index = 0;
>>>>> +       for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++)
>>>>> +               fps->events[i] = 0;
>>>>> +}
>>>>> +
>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int
>>>>> idx)
>>>>> +{
>>>>> +       struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx];
>>>>> +       uint64_t timestamp = jiffies_64;
>>>>> +       unsigned rate = 0;
>>>>> +
>>>>> +       uint8_t index = fps->event_index++;
>>>>> +       fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX;
>>>>> +
>>>>> +       rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL));
>>>>> +
>>>>> +       fps->timestamp = timestamp;
>>>>> +       fps->events[index] = min(rate, 120u);
>>>>> +}
>>>>> +
>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev,
>>>>> int
>>>>> idx)
>>>>> +{
>>>>> +       struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx];
>>>>> +       unsigned i, valid = 0, count = 0;
>>>>> +
>>>>> +       for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) {
>>>>> +               /* We should ignore zero values */
>>>>> +               if (fps->events[i] != 0) {
>>>>> +                       count += fps->events[i];
>>>>> +                       valid++;
>>>>> +               }
>>>>> +       }
>>>>> +
>>>>> +       if (valid > 0)
>>>>> +               return count / valid;
>>>>> +       else
>>>>> +               return RADEON_UVD_DEFAULT_FPS;
>>>>> +}
>>>>> +
>>>>>    void radeon_uvd_free_handles(struct radeon_device *rdev, struct
>>>>> drm_file *filp)
>>>>>    {
>>>>>           int i, r;
>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct
>>>>> radeon_cs_parser
>>>>> *p, struct radeon_bo *bo,
>>>>>
>>>>>           /* create or decode, validate the handle */
>>>>>           for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
>>>>> -               if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
>>>>> +               if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
>>>>> +                       radeon_uvd_fps_note_event(p->rdev, i);
>>>>>                           return 0;
>>>>> +               }
>>>>>           }
>>>>>
>>>>>           /* handle not found try to alloc a new one */
>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct
>>>>> radeon_cs_parser
>>>>> *p, struct radeon_bo *bo,
>>>>>                   if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0,
>>>>> handle)) {
>>>>>                           p->rdev->uvd.filp[i] = p->filp;
>>>>>                           p->rdev->uvd.img_size[i] = img_size;
>>>>> +                       radeon_uvd_fps_clear_events(p->rdev, i);
>>>>>                           return 0;
>>>>>                   }
>>>>>           }
>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device
>>>>> *rdev, int ring,
>>>>>    static void radeon_uvd_count_handles(struct radeon_device *rdev,
>>>>>                                        unsigned *sd, unsigned *hd)
>>>>>    {
>>>>> -       unsigned i;
>>>>> +       unsigned i, fps_rate = 0;
>>>>>
>>>>>           *sd = 0;
>>>>>           *hd = 0;
>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct
>>>>> radeon_device *rdev,
>>>>>                   if (!atomic_read(&rdev->uvd.handles[i]))
>>>>>                           continue;
>>>>>
>>>>> -               if (rdev->uvd.img_size[i] >= 720*576)
>>>>> -                       ++(*hd);
>>>>> -               else
>>>>> -                       ++(*sd);
>>>>> +               fps_rate = radeon_uvd_estimate_fps(rdev, i);
>>>>> +
>>>>> +               if (rdev->uvd.img_size[i] >= 720*576) {
>>>>> +                       (*hd) += fps_rate > 30 ? 1 : 2;
>>>>> +               } else {
>>>>> +                       (*sd) += fps_rate > 30 ? 1 : 2;
>>>>> +               }
>>>>>           }
>>>>>    }
>>>>>
>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device
>>>>> *rdev)
>>>>>           set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
>>>>>
>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
>>>>>
>>>>> +
>>>>>           if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
>>>>> rdev->pm.dpm_enabled) {
>>>>>                   unsigned hd = 0, sd = 0;
>>>>>                   radeon_uvd_count_handles(rdev, &sd, &hd);
>>>>> --
>>>>> 1.9.1
>>>>>
>


More information about the dri-devel mailing list