[PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue
Oded Gabbay
oded.gabbay at gmail.com
Fri May 11 20:07:43 UTC 2018
applied this patch instead of original, thanks.
On Wed, Apr 25, 2018 at 12:42 AM, Felix Kuehling <felix.kuehling at amd.com> wrote:
> A minor update to this patch is attached. The rest of the series is
> unchanged and rebased cleanly on 4.17-rc2 on my system.
>
> Regards,
> Felix
>
>
> On 2018-04-10 05:33 PM, Felix Kuehling wrote:
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 +++++++++
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 +++++++++++++++++------
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 7 ++++++-
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 9 ++++++++
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 9 ++++++++
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 9 ++++++++
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
>> 7 files changed, 63 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> index 36c9269e..5d7cccc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
>> }
>> }
>>
>> +void write_kernel_doorbell64(void __iomem *db, u64 value)
>> +{
>> + if (db) {
>> + WARN(((unsigned long)db & 7) != 0,
>> + "Unaligned 64-bit doorbell");
>> + writeq(value, (u64 __iomem *)db);
>> + pr_debug("writing %llu to doorbell address 0x%p\n", value, db);+ }
>> +}
>> +
>> unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>> struct kfd_process *process,
>> unsigned int doorbell_id)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> index 9f38161..476951d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
>> kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
>> kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
>>
>> - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
>> + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>> &kq->wptr_mem);
>>
>> if (retval != 0)
>> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>> size_t available_size;
>> size_t queue_size_dwords;
>> uint32_t wptr, rptr;
>> + uint64_t wptr64;
>> unsigned int *queue_address;
>>
>> /* When rptr == wptr, the buffer is empty.
>> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>> * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
>> */
>> rptr = *kq->rptr_kernel;
>> - wptr = *kq->wptr_kernel;
>> + wptr = kq->pending_wptr;
>> + wptr64 = kq->pending_wptr64;
>> queue_address = (unsigned int *)kq->pq_kernel_addr;
>> queue_size_dwords = kq->queue->properties.queue_size / 4;
>>
>> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>> while (wptr > 0) {
>> queue_address[wptr] = kq->nop_packet;
>> wptr = (wptr + 1) % queue_size_dwords;
>> + wptr64++;
>> }
>> }
>>
>> *buffer_ptr = &queue_address[wptr];
>> kq->pending_wptr = wptr + packet_size_in_dwords;
>> + kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
>>
>> return 0;
>>
>> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
>> pr_debug("\n");
>> #endif
>>
>> - *kq->wptr_kernel = kq->pending_wptr;
>> - write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> - kq->pending_wptr);
>> + kq->ops_asic_specific.submit_packet(kq);
>> }
>>
>> static void rollback_packet(struct kernel_queue *kq)
>> {
>> - kq->pending_wptr = *kq->wptr_kernel;
>> + if (kq->dev->device_info->doorbell_size == 8) {
>> + kq->pending_wptr64 = *kq->wptr64_kernel;
>> + kq->pending_wptr = *kq->wptr_kernel %
>> + (kq->queue->properties.queue_size / 4);
>> + } else {
>> + kq->pending_wptr = *kq->wptr_kernel;
>> + }
>> }
>>
>> struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>> case CHIP_HAWAII:
>> kernel_queue_init_cik(&kq->ops_asic_specific);
>> break;
>> +
>> + case CHIP_VEGA10:
>> + case CHIP_RAVEN:
>> + kernel_queue_init_v9(&kq->ops_asic_specific);
>> + break;
>> default:
>> WARN(1, "Unexpected ASIC family %u",
>> dev->device_info->asic_family);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
>> index 5940531..97aff20 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
>> @@ -72,6 +72,7 @@ struct kernel_queue {
>> struct kfd_dev *dev;
>> struct mqd_manager *mqd;
>> struct queue *queue;
>> + uint64_t pending_wptr64;
>> uint32_t pending_wptr;
>> unsigned int nop_packet;
>>
>> @@ -79,7 +80,10 @@ struct kernel_queue {
>> uint32_t *rptr_kernel;
>> uint64_t rptr_gpu_addr;
>> struct kfd_mem_obj *wptr_mem;
>> - uint32_t *wptr_kernel;
>> + union {
>> + uint64_t *wptr64_kernel;
>> + uint32_t *wptr_kernel;
>> + };
>> uint64_t wptr_gpu_addr;
>> struct kfd_mem_obj *pq;
>> uint64_t pq_gpu_addr;
>> @@ -97,5 +101,6 @@ struct kernel_queue {
>>
>> void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>> void kernel_queue_init_vi(struct kernel_queue_ops *ops);
>> +void kernel_queue_init_v9(struct kernel_queue_ops *ops);
>>
>> #endif /* KFD_KERNEL_QUEUE_H_ */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
>> index a90eb44..19e54ac 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
>> @@ -26,11 +26,13 @@
>> static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>> enum kfd_queue_type type, unsigned int queue_size);
>> static void uninitialize_cik(struct kernel_queue *kq);
>> +static void submit_packet_cik(struct kernel_queue *kq);
>>
>> void kernel_queue_init_cik(struct kernel_queue_ops *ops)
>> {
>> ops->initialize = initialize_cik;
>> ops->uninitialize = uninitialize_cik;
>> + ops->submit_packet = submit_packet_cik;
>> }
>>
>> static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>> @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
>> static void uninitialize_cik(struct kernel_queue *kq)
>> {
>> }
>> +
>> +static void submit_packet_cik(struct kernel_queue *kq)
>> +{
>> + *kq->wptr_kernel = kq->pending_wptr;
>> + write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> + kq->pending_wptr);
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>> index ece7d59..684a3bf 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>> @@ -29,11 +29,13 @@
>> static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>> enum kfd_queue_type type, unsigned int queue_size);
>> static void uninitialize_v9(struct kernel_queue *kq);
>> +static void submit_packet_v9(struct kernel_queue *kq);
>>
>> void kernel_queue_init_v9(struct kernel_queue_ops *ops)
>> {
>> ops->initialize = initialize_v9;
>> ops->uninitialize = uninitialize_v9;
>> + ops->submit_packet = submit_packet_v9;
>> }
>>
>> static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>> @@ -58,6 +60,13 @@ static void uninitialize_v9(struct kernel_queue *kq)
>> kfd_gtt_sa_free(kq->dev, kq->eop_mem);
>> }
>>
>> +static void submit_packet_v9(struct kernel_queue *kq)
>> +{
>> + *kq->wptr64_kernel = kq->pending_wptr64;
>> + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
>> + kq->pending_wptr64);
>> +}
>> +
>> static int pm_map_process_v9(struct packet_manager *pm,
>> uint32_t *buffer, struct qcm_process_device *qpd)
>> {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
>> index f9019ef..bf20c6d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
>> @@ -29,11 +29,13 @@
>> static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
>> enum kfd_queue_type type, unsigned int queue_size);
>> static void uninitialize_vi(struct kernel_queue *kq);
>> +static void submit_packet_vi(struct kernel_queue *kq);
>>
>> void kernel_queue_init_vi(struct kernel_queue_ops *ops)
>> {
>> ops->initialize = initialize_vi;
>> ops->uninitialize = uninitialize_vi;
>> + ops->submit_packet = submit_packet_vi;
>> }
>>
>> static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
>> @@ -58,6 +60,13 @@ static void uninitialize_vi(struct kernel_queue *kq)
>> kfd_gtt_sa_free(kq->dev, kq->eop_mem);
>> }
>>
>> +static void submit_packet_vi(struct kernel_queue *kq)
>> +{
>> + *kq->wptr_kernel = kq->pending_wptr;
>> + write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> + kq->pending_wptr);
>> +}
>> +
>> unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
>> {
>> union PM4_MES_TYPE_3_HEADER header;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 06b210b..10d5b54 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -769,6 +769,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
>> void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
>> u32 read_kernel_doorbell(u32 __iomem *db);
>> void write_kernel_doorbell(void __iomem *db, u32 value);
>> +void write_kernel_doorbell64(void __iomem *db, u64 value);
>> unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>> struct kfd_process *process,
>> unsigned int doorbell_id);
>
More information about the amd-gfx
mailing list