[PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue

Oded Gabbay oded.gabbay at gmail.com
Fri May 11 20:07:43 UTC 2018


applied this patch instead of original, thanks.

On Wed, Apr 25, 2018 at 12:42 AM, Felix Kuehling <felix.kuehling at amd.com> wrote:
> A minor update to this patch is attached. The rest of the series is
> unchanged and rebased cleanly on 4.17-rc2 on my system.
>
> Regards,
>   Felix
>
>
> On 2018-04-10 05:33 PM, Felix Kuehling wrote:
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c         | 10 +++++++++
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c     | 25 +++++++++++++++++------
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h     |  7 ++++++-
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c |  9 ++++++++
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  9 ++++++++
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  9 ++++++++
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h             |  1 +
>>  7 files changed, 63 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> index 36c9269e..5d7cccc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
>>       }
>>  }
>>
>> +void write_kernel_doorbell64(void __iomem *db, u64 value)
>> +{
>> +     if (db) {
>> +             WARN(((unsigned long)db & 7) != 0,
>> +                  "Unaligned 64-bit doorbell");
>> +             writeq(value, (u64 __iomem *)db);
>> +             pr_debug("writing %llu to doorbell address 0x%p\n", value, db);+        }
>> +}
>> +
>>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>>                                       struct kfd_process *process,
>>                                       unsigned int doorbell_id)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> index 9f38161..476951d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
>>       kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
>>       kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
>>
>> -     retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
>> +     retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>>                                       &kq->wptr_mem);
>>
>>       if (retval != 0)
>> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>       size_t available_size;
>>       size_t queue_size_dwords;
>>       uint32_t wptr, rptr;
>> +     uint64_t wptr64;
>>       unsigned int *queue_address;
>>
>>       /* When rptr == wptr, the buffer is empty.
>> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>        * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
>>        */
>>       rptr = *kq->rptr_kernel;
>> -     wptr = *kq->wptr_kernel;
>> +     wptr = kq->pending_wptr;
>> +     wptr64 = kq->pending_wptr64;
>>       queue_address = (unsigned int *)kq->pq_kernel_addr;
>>       queue_size_dwords = kq->queue->properties.queue_size / 4;
>>
>> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>               while (wptr > 0) {
>>                       queue_address[wptr] = kq->nop_packet;
>>                       wptr = (wptr + 1) % queue_size_dwords;
>> +                     wptr64++;
>>               }
>>       }
>>
>>       *buffer_ptr = &queue_address[wptr];
>>       kq->pending_wptr = wptr + packet_size_in_dwords;
>> +     kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
>>
>>       return 0;
>>
>> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
>>       pr_debug("\n");
>>  #endif
>>
>> -     *kq->wptr_kernel = kq->pending_wptr;
>> -     write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> -                             kq->pending_wptr);
>> +     kq->ops_asic_specific.submit_packet(kq);
>>  }
>>
>>  static void rollback_packet(struct kernel_queue *kq)
>>  {
>> -     kq->pending_wptr = *kq->wptr_kernel;
>> +     if (kq->dev->device_info->doorbell_size == 8) {
>> +             kq->pending_wptr64 = *kq->wptr64_kernel;
>> +             kq->pending_wptr = *kq->wptr_kernel %
>> +                     (kq->queue->properties.queue_size / 4);
>> +     } else {
>> +             kq->pending_wptr = *kq->wptr_kernel;
>> +     }
>>  }
>>
>>  struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>>       case CHIP_HAWAII:
>>               kernel_queue_init_cik(&kq->ops_asic_specific);
>>               break;
>> +
>> +     case CHIP_VEGA10:
>> +     case CHIP_RAVEN:
>> +             kernel_queue_init_v9(&kq->ops_asic_specific);
>> +             break;
>>       default:
>>               WARN(1, "Unexpected ASIC family %u",
>>                    dev->device_info->asic_family);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
>> index 5940531..97aff20 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
>> @@ -72,6 +72,7 @@ struct kernel_queue {
>>       struct kfd_dev          *dev;
>>       struct mqd_manager      *mqd;
>>       struct queue            *queue;
>> +     uint64_t                pending_wptr64;
>>       uint32_t                pending_wptr;
>>       unsigned int            nop_packet;
>>
>> @@ -79,7 +80,10 @@ struct kernel_queue {
>>       uint32_t                *rptr_kernel;
>>       uint64_t                rptr_gpu_addr;
>>       struct kfd_mem_obj      *wptr_mem;
>> -     uint32_t                *wptr_kernel;
>> +     union {
>> +             uint64_t        *wptr64_kernel;
>> +             uint32_t        *wptr_kernel;
>> +     };
>>       uint64_t                wptr_gpu_addr;
>>       struct kfd_mem_obj      *pq;
>>       uint64_t                pq_gpu_addr;
>> @@ -97,5 +101,6 @@ struct kernel_queue {
>>
>>  void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>>  void kernel_queue_init_vi(struct kernel_queue_ops *ops);
>> +void kernel_queue_init_v9(struct kernel_queue_ops *ops);
>>
>>  #endif /* KFD_KERNEL_QUEUE_H_ */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
>> index a90eb44..19e54ac 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
>> @@ -26,11 +26,13 @@
>>  static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>>                       enum kfd_queue_type type, unsigned int queue_size);
>>  static void uninitialize_cik(struct kernel_queue *kq);
>> +static void submit_packet_cik(struct kernel_queue *kq);
>>
>>  void kernel_queue_init_cik(struct kernel_queue_ops *ops)
>>  {
>>       ops->initialize = initialize_cik;
>>       ops->uninitialize = uninitialize_cik;
>> +     ops->submit_packet = submit_packet_cik;
>>  }
>>
>>  static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>> @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>>  static void uninitialize_cik(struct kernel_queue *kq)
>>  {
>>  }
>> +
>> +static void submit_packet_cik(struct kernel_queue *kq)
>> +{
>> +     *kq->wptr_kernel = kq->pending_wptr;
>> +     write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> +                             kq->pending_wptr);
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>> index ece7d59..684a3bf 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>> @@ -29,11 +29,13 @@
>>  static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>>                       enum kfd_queue_type type, unsigned int queue_size);
>>  static void uninitialize_v9(struct kernel_queue *kq);
>> +static void submit_packet_v9(struct kernel_queue *kq);
>>
>>  void kernel_queue_init_v9(struct kernel_queue_ops *ops)
>>  {
>>       ops->initialize = initialize_v9;
>>       ops->uninitialize = uninitialize_v9;
>> +     ops->submit_packet = submit_packet_v9;
>>  }
>>
>>  static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>> @@ -58,6 +60,13 @@ static void uninitialize_v9(struct kernel_queue *kq)
>>       kfd_gtt_sa_free(kq->dev, kq->eop_mem);
>>  }
>>
>> +static void submit_packet_v9(struct kernel_queue *kq)
>> +{
>> +     *kq->wptr64_kernel = kq->pending_wptr64;
>> +     write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
>> +                             kq->pending_wptr64);
>> +}
>> +
>>  static int pm_map_process_v9(struct packet_manager *pm,
>>               uint32_t *buffer, struct qcm_process_device *qpd)
>>  {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
>> index f9019ef..bf20c6d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
>> @@ -29,11 +29,13 @@
>>  static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
>>                       enum kfd_queue_type type, unsigned int queue_size);
>>  static void uninitialize_vi(struct kernel_queue *kq);
>> +static void submit_packet_vi(struct kernel_queue *kq);
>>
>>  void kernel_queue_init_vi(struct kernel_queue_ops *ops)
>>  {
>>       ops->initialize = initialize_vi;
>>       ops->uninitialize = uninitialize_vi;
>> +     ops->submit_packet = submit_packet_vi;
>>  }
>>
>>  static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
>> @@ -58,6 +60,13 @@ static void uninitialize_vi(struct kernel_queue *kq)
>>       kfd_gtt_sa_free(kq->dev, kq->eop_mem);
>>  }
>>
>> +static void submit_packet_vi(struct kernel_queue *kq)
>> +{
>> +     *kq->wptr_kernel = kq->pending_wptr;
>> +     write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> +                             kq->pending_wptr);
>> +}
>> +
>>  unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
>>  {
>>       union PM4_MES_TYPE_3_HEADER header;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 06b210b..10d5b54 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -769,6 +769,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
>>  void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
>>  u32 read_kernel_doorbell(u32 __iomem *db);
>>  void write_kernel_doorbell(void __iomem *db, u32 value);
>> +void write_kernel_doorbell64(void __iomem *db, u64 value);
>>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>>                                       struct kfd_process *process,
>>                                       unsigned int doorbell_id);
>


More information about the amd-gfx mailing list