[PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers (v2)

Christian König ckoenig.leichtzumerken at gmail.com
Wed Aug 25 11:09:00 UTC 2021


I suggest to give this command here at least a try (remembered the name 
after a moment):

pahole drivers/gpu/drm/amd/amdgpu/amdgpu.o -C amdgpu_debugfs_regs2_iocdata

It has a rather nifty output with padding holes, byte addresses, cache 
lines etc for your structure.

Christian.

Am 25.08.21 um 13:04 schrieb Tom St Denis:
> I tested it by forcing bit patterns into the ioctl data and printing 
> it out in the kernel log.  I'm not siloed into it one way or the 
> other.  I'll just change it to u32.
>
> On Wed, Aug 25, 2021 at 7:03 AM Christian König 
> <ckoenig.leichtzumerken at gmail.com 
> <mailto:ckoenig.leichtzumerken at gmail.com>> wrote:
>
>     Using u8 is ok as well, just make sure that you don't have any
>     hidden padding.
>
>     Nirmoy had a tool to double check for paddings which I once more
>     forgot the name of.
>
>     Christian.
>
>     Am 25.08.21 um 12:40 schrieb Tom St Denis:
>>     The struct works as is but I'll change them to u32.  The offset
>>     is an artefact of the fact this was an IOCTL originally.  I'm
>>     working both ends in parallel trying to make the changes at the
>>     same time because I'm only submitting the kernel patch if I've
>>     tested it in userspace.
>>
>>     I'll send a v4 in a bit this morning....
>>
>>     Tom
>>
>>     On Wed, Aug 25, 2021 at 2:35 AM Christian König
>>     <ckoenig.leichtzumerken at gmail.com
>>     <mailto:ckoenig.leichtzumerken at gmail.com>> wrote:
>>
>>
>>
>>         Am 24.08.21 um 15:36 schrieb Tom St Denis:
>>         > This new debugfs interface uses an IOCTL interface in order
>>         to pass
>>         > along state information like SRBM and GRBM bank switching. 
>>         This
>>         > new interface also allows a full 32-bit MMIO address range
>>         which
>>         > the previous didn't.  With this new design we have room to grow
>>         > the flexibility of the file as need be.
>>         >
>>         > (v2): Move read/write to .read/.write, fix style, add comment
>>         >        for IOCTL data structure
>>         >
>>         > Signed-off-by: Tom St Denis <tom.stdenis at amd.com
>>         <mailto:tom.stdenis at amd.com>>
>>         > ---
>>         >   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 162
>>         ++++++++++++++++++++
>>         >   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++
>>         >   2 files changed, 194 insertions(+)
>>         >
>>         > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>         b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>         > index 277128846dd1..8e8f5743c8f5 100644
>>         > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>         > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>         > @@ -279,6 +279,156 @@ static ssize_t
>>         amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
>>         >       return amdgpu_debugfs_process_reg_op(false, f, (char
>>         __user *)buf, size, pos);
>>         >   }
>>         >
>>         > +static int amdgpu_debugfs_regs2_open(struct inode *inode,
>>         struct file *file)
>>         > +{
>>         > +     struct amdgpu_debugfs_regs2_data *rd;
>>         > +
>>         > +     rd = kzalloc(sizeof *rd, GFP_KERNEL);
>>         > +     if (!rd)
>>         > +             return -ENOMEM;
>>         > +     rd->adev = file_inode(file)->i_private;
>>         > +     file->private_data = rd;
>>         > +
>>         > +     return 0;
>>         > +}
>>         > +
>>         > +static int amdgpu_debugfs_regs2_release(struct inode
>>         *inode, struct file *file)
>>         > +{
>>         > +     kfree(file->private_data);
>>         > +     return 0;
>>         > +}
>>         > +
>>         > +static ssize_t amdgpu_debugfs_regs2_op(struct file *f,
>>         char __user *buf, size_t size, int write_en)
>>         > +{
>>         > +     struct amdgpu_debugfs_regs2_data *rd = f->private_data;
>>         > +     struct amdgpu_device *adev = rd->adev;
>>         > +     ssize_t result = 0;
>>         > +     int r;
>>         > +     uint32_t value;
>>         > +
>>         > +     if (size & 0x3 || rd->state.offset & 0x3)
>>         > +             return -EINVAL;
>>         > +
>>         > +     if (rd->state.id.use_grbm) {
>>         > +             if (rd->state.id.grbm.se
>>         <http://state.id.grbm.se> == 0x3FF)
>>         > +                     rd->state.id.grbm.se
>>         <http://state.id.grbm.se> = 0xFFFFFFFF;
>>         > +             if (rd->state.id.grbm.sh
>>         <http://state.id.grbm.sh> == 0x3FF)
>>         > +                     rd->state.id.grbm.sh
>>         <http://state.id.grbm.sh> = 0xFFFFFFFF;
>>         > +             if (rd->state.id.grbm.instance == 0x3FF)
>>         > +  rd->state.id.grbm.instance = 0xFFFFFFFF;
>>         > +     }
>>         > +
>>         > +     r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>         > +     if (r < 0) {
>>         > +  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>         > +             return r;
>>         > +     }
>>         > +
>>         > +     r = amdgpu_virt_enable_access_debugfs(adev);
>>         > +     if (r < 0) {
>>         > +  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>         > +             return r;
>>         > +     }
>>         > +
>>         > +     if (rd->state.id.use_grbm) {
>>         > +             if ((rd->state.id.grbm.sh
>>         <http://state.id.grbm.sh> != 0xFFFFFFFF &&
>>         rd->state.id.grbm.sh <http://state.id.grbm.sh> >=
>>         adev->gfx.config.max_sh_per_se) ||
>>         > +                 (rd->state.id.grbm.se
>>         <http://state.id.grbm.se> != 0xFFFFFFFF &&
>>         rd->state.id.grbm.se <http://state.id.grbm.se> >=
>>         adev->gfx.config.max_shader_engines)) {
>>         > +  pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>         > +  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>         > +  amdgpu_virt_disable_access_debugfs(adev);
>>         > +                     return -EINVAL;
>>         > +             }
>>         > +  mutex_lock(&adev->grbm_idx_mutex);
>>         > +             amdgpu_gfx_select_se_sh(adev,
>>         rd->state.id.grbm.se <http://state.id.grbm.se>,
>>         > +              rd->state.id.grbm.sh <http://state.id.grbm.sh>,
>>         > +              rd->state.id.grbm.instance);
>>         > +     }
>>         > +
>>         > +     if (rd->state.id.use_srbm) {
>>         > +  mutex_lock(&adev->srbm_mutex);
>>         > +             amdgpu_gfx_select_me_pipe_q(adev,
>>         rd->state.id.srbm.me <http://state.id.srbm.me>,
>>         rd->state.id.srbm.pipe,
>>         > +                      rd->state.id.srbm.queue,
>>         rd->state.id.srbm.vmid);
>>         > +     }
>>         > +
>>         > +     if (rd->state.id.pg_lock)
>>         > +             mutex_lock(&adev->pm.mutex);
>>         > +
>>         > +     while (size) {
>>         > +             if (!write_en) {
>>         > +                     value = RREG32(rd->state.offset >> 2);
>>         > +                     r = put_user(value, (uint32_t *)buf);
>>         > +             } else {
>>         > +                     r = get_user(value, (uint32_t *)buf);
>>         > +                     if (!r)
>>         > +  amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> 2, value);
>>         > +             }
>>         > +             if (r) {
>>         > +                     result = r;
>>         > +                     goto end;
>>         > +             }
>>         > +             rd->state.offset += 4;
>>         > +             size -= 4;
>>         > +             result += 4;
>>         > +             buf += 4;
>>         > +     }
>>         > +end:
>>         > +     if (rd->state.id.use_grbm) {
>>         > +             amdgpu_gfx_select_se_sh(adev, 0xffffffff,
>>         0xffffffff, 0xffffffff);
>>         > +  mutex_unlock(&adev->grbm_idx_mutex);
>>         > +     }
>>         > +
>>         > +     if (rd->state.id.use_srbm) {
>>         > +             amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
>>         > +  mutex_unlock(&adev->srbm_mutex);
>>         > +     }
>>         > +
>>         > +     if (rd->state.id.pg_lock)
>>         > +  mutex_unlock(&adev->pm.mutex);
>>         > +
>>         > +     // in umr (the likely user of this) flags are set per
>>         file operation
>>         > +     // which means they're never "unset" explicitly.  To
>>         avoid breaking
>>         > +     // this convention we unset the flags after each
>>         operation
>>         > +     // flags are for a single call (need to be set for
>>         every read/write)
>>         > +     rd->state.id.use_grbm = 0;
>>         > +     rd->state.id.use_srbm = 0;
>>         > +     rd->state.id.pg_lock  = 0;
>>         > +
>>         > +  pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>         > +  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>         > +
>>         > +     amdgpu_virt_disable_access_debugfs(adev);
>>         > +     return result;
>>         > +}
>>         > +
>>         > +static long amdgpu_debugfs_regs2_ioctl(struct file *f,
>>         unsigned int cmd, unsigned long data)
>>         > +{
>>         > +     struct amdgpu_debugfs_regs2_data *rd = f->private_data;
>>         > +
>>         > +     switch (cmd) {
>>         > +     case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
>>         > +             if (copy_from_user(&rd->state.id
>>         <http://state.id>, (struct amdgpu_debugfs_regs2_iocdata
>>         *)data, sizeof rd->state.id <http://state.id>))
>>         > +                     return -EINVAL;
>>         > +             break;
>>         > +     default:
>>         > +             return -EINVAL;
>>         > +     }
>>         > +     return 0;
>>         > +}
>>         > +
>>         > +static ssize_t amdgpu_debugfs_regs2_read(struct file *f,
>>         char __user *buf, size_t size, loff_t *pos)
>>         > +{
>>         > +     struct amdgpu_debugfs_regs2_data *rd = f->private_data;
>>         > +     rd->state.offset = *pos;
>>         > +     return amdgpu_debugfs_regs2_op(f, buf, size, 0);
>>         > +}
>>         > +
>>         > +static ssize_t amdgpu_debugfs_regs2_write(struct file *f,
>>         const char __user *buf, size_t size, loff_t *pos)
>>         > +{
>>         > +     struct amdgpu_debugfs_regs2_data *rd = f->private_data;
>>         > +     rd->state.offset = *pos;
>>         > +     return amdgpu_debugfs_regs2_op(f, (char __user *)buf,
>>         size, 1);
>>         > +}
>>         > +
>>         >
>>         >   /**
>>         >    * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
>>         > @@ -1091,6 +1241,16 @@ static ssize_t
>>         amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
>>         >       return result;
>>         >   }
>>         >
>>         > +static const struct file_operations
>>         amdgpu_debugfs_regs2_fops = {
>>         > +     .owner = THIS_MODULE,
>>         > +     .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl,
>>         > +     .read = amdgpu_debugfs_regs2_read,
>>         > +     .write = amdgpu_debugfs_regs2_write,
>>         > +     .open = amdgpu_debugfs_regs2_open,
>>         > +     .release = amdgpu_debugfs_regs2_release,
>>         > +     .llseek = default_llseek
>>         > +};
>>         > +
>>         >   static const struct file_operations
>>         amdgpu_debugfs_regs_fops = {
>>         >       .owner = THIS_MODULE,
>>         >       .read = amdgpu_debugfs_regs_read,
>>         > @@ -1148,6 +1308,7 @@ static const struct file_operations
>>         amdgpu_debugfs_gfxoff_fops = {
>>         >
>>         >   static const struct file_operations *debugfs_regs[] = {
>>         >       &amdgpu_debugfs_regs_fops,
>>         > +     &amdgpu_debugfs_regs2_fops,
>>         >       &amdgpu_debugfs_regs_didt_fops,
>>         >       &amdgpu_debugfs_regs_pcie_fops,
>>         >       &amdgpu_debugfs_regs_smc_fops,
>>         > @@ -1160,6 +1321,7 @@ static const struct file_operations
>>         *debugfs_regs[] = {
>>         >
>>         >   static const char *debugfs_regs_names[] = {
>>         >       "amdgpu_regs",
>>         > +     "amdgpu_regs2",
>>         >       "amdgpu_regs_didt",
>>         >       "amdgpu_regs_pcie",
>>         >       "amdgpu_regs_smc",
>>         > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>>         b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>>         > index 141a8474e24f..ec044df5d428 100644
>>         > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>>         > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>>         > @@ -22,6 +22,8 @@
>>         >    * OTHER DEALINGS IN THE SOFTWARE.
>>         >    *
>>         >    */
>>         > +#include <linux/ioctl.h>
>>         > +#include <uapi/drm/amdgpu_drm.h>
>>         >
>>         >   /*
>>         >    * Debugfs
>>         > @@ -38,3 +40,33 @@ void amdgpu_debugfs_fence_init(struct
>>         amdgpu_device *adev);
>>         >   void amdgpu_debugfs_firmware_init(struct amdgpu_device
>>         *adev);
>>         >   void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
>>         >   int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
>>         > +
>>         > +/*
>>         > + * MMIO debugfs IOCTL structure
>>         > + */
>>         > +struct amdgpu_debugfs_regs2_iocdata {
>>         > +     __u8 use_srbm, use_grbm, pg_lock;
>>
>>         You should consider using u32 here as well or add explicitly
>>         padding.
>>
>>         > +     struct {
>>         > +             __u32 se, sh, instance;
>>         > +     } grbm;
>>         > +     struct {
>>         > +             __u32 me, pipe, queue, vmid;
>>         > +     } srbm;
>>         > +};
>>         > +
>>         > +/*
>>         > + * MMIO debugfs state data (per file* handle)
>>         > + */
>>         > +struct amdgpu_debugfs_regs2_data {
>>         > +     struct amdgpu_device *adev;
>>         > +     struct {
>>         > +             struct amdgpu_debugfs_regs2_iocdata id;
>>         > +             __u32 offset;
>>
>>         What is the offset good for here?
>>
>>         Regards,
>>         Christian.
>>
>>         > +     } state;
>>         > +};
>>         > +
>>         > +enum AMDGPU_DEBUGFS_REGS2_CMDS {
>>         > +     AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
>>         > +};
>>         > +
>>         > +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20,
>>         AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct
>>         amdgpu_debugfs_regs2_iocdata)
>>
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20210825/43491f9d/attachment-0001.htm>


More information about the amd-gfx mailing list