[PATCH] drm/amdgpu: reserve GDS resources statically
Marek Olšák
maraeo at gmail.com
Thu Sep 13 15:27:29 UTC 2018
That's OK. We don't need IBs to get the same VMID.
Marek
On Thu, Sep 13, 2018 at 4:40 AM, Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
> As discussed internally that doesn't work because threads don't necessary
> get the same VMID assigned.
>
> Christian.
>
> Am 12.09.2018 um 22:33 schrieb Marek Olšák:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> I've chosen to do it like this because it's easy and allows an arbitrary
>> number of processes.
>>
>> Signed-off-by: Marek Olšák <marek.olsak at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 --
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 --
>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ----------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 --
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +-----
>> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125 +++++---------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++--------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++-------------
>> include/uapi/drm/amdgpu_drm.h | 15 +--
>> 19 files changed, 109 insertions(+), 519 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index b80243d3972e..7264a4930b88 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> struct drm_file *filp,
>> / sizeof(struct amdgpu_bo_list_entry))
>> return -EINVAL;
>> size = sizeof(struct amdgpu_bo_list);
>> size += num_entries * sizeof(struct amdgpu_bo_list_entry);
>> list = kvmalloc(size, GFP_KERNEL);
>> if (!list)
>> return -ENOMEM;
>> kref_init(&list->refcount);
>> - list->gds_obj = adev->gds.gds_gfx_bo;
>> - list->gws_obj = adev->gds.gws_gfx_bo;
>> - list->oa_obj = adev->gds.oa_gfx_bo;
>> array = amdgpu_bo_list_array_entry(list, 0);
>> memset(array, 0, num_entries * sizeof(struct
>> amdgpu_bo_list_entry));
>> for (i = 0; i < num_entries; ++i) {
>> struct amdgpu_bo_list_entry *entry;
>> struct drm_gem_object *gobj;
>> struct amdgpu_bo *bo;
>> struct mm_struct *usermm;
>> @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device
>> *adev, struct drm_file *filp,
>> } else {
>> entry = &array[last_entry++];
>> }
>> entry->robj = bo;
>> entry->priority = min(info[i].bo_priority,
>> AMDGPU_BO_LIST_MAX_PRIORITY);
>> entry->tv.bo = &entry->robj->tbo;
>> entry->tv.shared = !entry->robj->prime_shared_count;
>> - if (entry->robj->preferred_domains ==
>> AMDGPU_GEM_DOMAIN_GDS)
>> - list->gds_obj = entry->robj;
>> - if (entry->robj->preferred_domains ==
>> AMDGPU_GEM_DOMAIN_GWS)
>> - list->gws_obj = entry->robj;
>> - if (entry->robj->preferred_domains ==
>> AMDGPU_GEM_DOMAIN_OA)
>> - list->oa_obj = entry->robj;
>> -
>> total_size += amdgpu_bo_size(entry->robj);
>> trace_amdgpu_bo_list_set(list, entry->robj);
>> }
>> list->first_userptr = first_userptr;
>> list->num_entries = num_entries;
>> trace_amdgpu_cs_bo_status(list->num_entries, total_size);
>> *result = list;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index 61b089768e1c..30f12a60aa28 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry {
>> struct ttm_validate_buffer tv;
>> struct amdgpu_bo_va *bo_va;
>> uint32_t priority;
>> struct page **user_pages;
>> int user_invalidated;
>> };
>> struct amdgpu_bo_list {
>> struct rcu_head rhead;
>> struct kref refcount;
>> - struct amdgpu_bo *gds_obj;
>> - struct amdgpu_bo *gws_obj;
>> - struct amdgpu_bo *oa_obj;
>> unsigned first_userptr;
>> unsigned num_entries;
>> };
>> int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
>> struct amdgpu_bo_list **result);
>> void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>> struct list_head *validated);
>> void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>> int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 1081fd00b059..88b58facf29e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct
>> amdgpu_cs_parser *p,
>> return 0;
>> }
>> static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> union drm_amdgpu_cs *cs)
>> {
>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> struct amdgpu_vm *vm = &fpriv->vm;
>> struct amdgpu_bo_list_entry *e;
>> struct list_head duplicates;
>> - struct amdgpu_bo *gds;
>> - struct amdgpu_bo *gws;
>> - struct amdgpu_bo *oa;
>> unsigned tries = 10;
>> int r;
>> INIT_LIST_HEAD(&p->validated);
>> /* p->bo_list could already be assigned if
>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>> if (cs->in.bo_list_handle) {
>> if (p->bo_list)
>> return -EINVAL;
>> @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct
>> amdgpu_cs_parser *p,
>> r = amdgpu_cs_list_validate(p, &p->validated);
>> if (r) {
>> DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
>> goto error_validate;
>> }
>> amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>> p->bytes_moved_vis);
>> - gds = p->bo_list->gds_obj;
>> - gws = p->bo_list->gws_obj;
>> - oa = p->bo_list->oa_obj;
>> -
>> amdgpu_bo_list_for_each_entry(e, p->bo_list)
>> e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
>> - if (gds) {
>> - p->job->gds_base = amdgpu_bo_gpu_offset(gds);
>> - p->job->gds_size = amdgpu_bo_size(gds);
>> - }
>> - if (gws) {
>> - p->job->gws_base = amdgpu_bo_gpu_offset(gws);
>> - p->job->gws_size = amdgpu_bo_size(gws);
>> - }
>> - if (oa) {
>> - p->job->oa_base = amdgpu_bo_gpu_offset(oa);
>> - p->job->oa_size = amdgpu_bo_size(oa);
>> - }
>> -
>> if (!r && p->uf_entry.robj) {
>> struct amdgpu_bo *uf = p->uf_entry.robj;
>> r = amdgpu_ttm_alloc_gart(&uf->tbo);
>> p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>> }
>> error_validate:
>> if (r)
>> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> index e73728d90388..69ba25c2e921 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> @@ -17,48 +17,33 @@
>> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES
>> OR
>> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> * OTHER DEALINGS IN THE SOFTWARE.
>> *
>> */
>> #ifndef __AMDGPU_GDS_H__
>> #define __AMDGPU_GDS_H__
>> -/* Because TTM request that alloacted buffer should be PAGE_SIZE
>> aligned,
>> - * we should report GDS/GWS/OA size as PAGE_SIZE aligned
>> - * */
>> -#define AMDGPU_GDS_SHIFT 2
>> -#define AMDGPU_GWS_SHIFT PAGE_SHIFT
>> -#define AMDGPU_OA_SHIFT PAGE_SHIFT
>> -
>> struct amdgpu_ring;
>> struct amdgpu_bo;
>> struct amdgpu_gds_asic_info {
>> uint32_t total_size;
>> - uint32_t gfx_partition_size;
>> - uint32_t cs_partition_size;
>> + uint32_t gfx_size_per_vmid;
>> + uint32_t kfd_size_per_vmid;
>> };
>> struct amdgpu_gds {
>> struct amdgpu_gds_asic_info mem;
>> struct amdgpu_gds_asic_info gws;
>> struct amdgpu_gds_asic_info oa;
>> - /* At present, GDS, GWS and OA resources for gfx (graphics)
>> - * is always pre-allocated and available for graphics operation.
>> - * Such resource is shared between all gfx clients.
>> - * TODO: move this operation to user space
>> - * */
>> - struct amdgpu_bo* gds_gfx_bo;
>> - struct amdgpu_bo* gws_gfx_bo;
>> - struct amdgpu_bo* oa_gfx_bo;
>> };
>> struct amdgpu_gds_reg_offset {
>> uint32_t mem_base;
>> uint32_t mem_size;
>> uint32_t gws;
>> uint32_t oa;
>> };
>> #endif /* __AMDGPU_GDS_H__ */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d30a0838851b..c87ad4b4d0b6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>> void *data,
>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>> AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
>> AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
>> return -EINVAL;
>> /* reject invalid gem domains */
>> - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>> + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
>> + AMDGPU_GEM_DOMAIN_GTT |
>> + AMDGPU_GEM_DOMAIN_VRAM))
>> return -EINVAL;
>> - /* create a gem object to contain this object in */
>> - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
>> - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
>> - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
>> - /* if gds bo is created from user space, it must
>> be
>> - * passed to bo list
>> - */
>> - DRM_ERROR("GDS bo cannot be per-vm-bo\n");
>> - return -EINVAL;
>> - }
>> - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
>> - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
>> - size = size << AMDGPU_GDS_SHIFT;
>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
>> - size = size << AMDGPU_GWS_SHIFT;
>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
>> - size = size << AMDGPU_OA_SHIFT;
>> - else
>> - return -EINVAL;
>> - }
>> size = roundup(size, PAGE_SIZE);
>> if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
>> r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> if (r)
>> return r;
>> resv = vm->root.base.bo->tbo.resv;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> index 3a072a7a39f0..c2e6a1a11d7f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device
>> *adev,
>> * Reset saved GDW, GWS and OA to force switch on next flush.
>> */
>> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
>> unsigned vmid)
>> {
>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>> struct amdgpu_vmid *id = &id_mgr->ids[vmid];
>> mutex_lock(&id_mgr->lock);
>> id->owner = 0;
>> - id->gds_base = 0;
>> - id->gds_size = 0;
>> - id->gws_base = 0;
>> - id->gws_size = 0;
>> - id->oa_base = 0;
>> - id->oa_size = 0;
>> mutex_unlock(&id_mgr->lock);
>> }
>> /**
>> * amdgpu_vmid_reset_all - reset VMID to zero
>> *
>> * @adev: amdgpu device structure
>> *
>> * Reset VMID to force flush on next use
>> */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> index 7625419f0fc2..06078e665532 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> @@ -44,27 +44,20 @@ struct amdgpu_vmid {
>> struct amdgpu_sync active;
>> struct dma_fence *last_flush;
>> uint64_t owner;
>> uint64_t pd_gpu_addr;
>> /* last flushed PD/PT update */
>> struct dma_fence *flushed_updates;
>> uint32_t current_gpu_reset_count;
>> - uint32_t gds_base;
>> - uint32_t gds_size;
>> - uint32_t gws_base;
>> - uint32_t gws_size;
>> - uint32_t oa_base;
>> - uint32_t oa_size;
>> -
>> unsigned pasid;
>> struct dma_fence *pasid_mapping;
>> };
>> struct amdgpu_vmid_mgr {
>> struct mutex lock;
>> unsigned num_ids;
>> struct list_head ids_lru;
>> struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
>> atomic_t reserved_vmid_num;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> index 57cfe78a262b..3db553f6ad01 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> @@ -42,23 +42,20 @@ struct amdgpu_job {
>> struct amdgpu_sync sched_sync;
>> struct amdgpu_ib *ibs;
>> struct dma_fence *fence; /* the hw fence */
>> uint32_t preamble_status;
>> uint32_t num_ibs;
>> void *owner;
>> bool vm_needs_flush;
>> uint64_t vm_pd_addr;
>> unsigned vmid;
>> unsigned pasid;
>> - uint32_t gds_base, gds_size;
>> - uint32_t gws_base, gws_size;
>> - uint32_t oa_base, oa_size;
>> uint32_t vram_lost_counter;
>> /* user fence handling */
>> uint64_t uf_addr;
>> uint64_t uf_sequence;
>> };
>> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
>> struct amdgpu_job **job, struct amdgpu_vm *vm);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 29ac3873eeb0..209954290954 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
>> void *data, struct drm_file
>> case AMDGPU_INFO_VIS_VRAM_USAGE:
>> ui64 =
>> amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
>> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT :
>> 0;
>> case AMDGPU_INFO_GTT_USAGE:
>> ui64 =
>> amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
>> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT :
>> 0;
>> case AMDGPU_INFO_GDS_CONFIG: {
>> struct drm_amdgpu_info_gds gds_info;
>> memset(&gds_info, 0, sizeof(gds_info));
>> - gds_info.gds_gfx_partition_size =
>> adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
>> - gds_info.compute_partition_size =
>> adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
>> - gds_info.gds_total_size = adev->gds.mem.total_size >>
>> AMDGPU_GDS_SHIFT;
>> - gds_info.gws_per_gfx_partition =
>> adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
>> - gds_info.gws_per_compute_partition =
>> adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
>> - gds_info.oa_per_gfx_partition =
>> adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
>> - gds_info.oa_per_compute_partition =
>> adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
>> + gds_info.gds_gfx_partition_size =
>> adev->gds.mem.gfx_size_per_vmid;
>> + gds_info.compute_partition_size =
>> adev->gds.mem.kfd_size_per_vmid;
>> + gds_info.gds_total_size = adev->gds.mem.total_size;
>> + gds_info.gws_per_gfx_partition =
>> adev->gds.gws.gfx_size_per_vmid;
>> + gds_info.gws_per_compute_partition =
>> adev->gds.gws.kfd_size_per_vmid;
>> + gds_info.oa_per_gfx_partition =
>> adev->gds.oa.gfx_size_per_vmid;
>> + gds_info.oa_per_compute_partition =
>> adev->gds.oa.kfd_size_per_vmid;
>> return copy_to_user(out, &gds_info,
>> min((size_t)size, sizeof(gds_info))) ?
>> -EFAULT : 0;
>> }
>> case AMDGPU_INFO_VRAM_GTT: {
>> struct drm_amdgpu_info_vram_gtt vram_gtt;
>> vram_gtt.vram_size = adev->gmc.real_vram_size -
>> atomic64_read(&adev->vram_pin_size);
>> vram_gtt.vram_cpu_accessible_size =
>> adev->gmc.visible_vram_size -
>> atomic64_read(&adev->visible_pin_size);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index de990bdcdd6c..76770a8c29a5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> places[c].lpfn = 0;
>> places[c].flags = TTM_PL_FLAG_SYSTEM;
>> if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>> places[c].flags |= TTM_PL_FLAG_WC |
>> TTM_PL_FLAG_UNCACHED;
>> else
>> places[c].flags |= TTM_PL_FLAG_CACHED;
>> c++;
>> }
>> - if (domain & AMDGPU_GEM_DOMAIN_GDS) {
>> - places[c].fpfn = 0;
>> - places[c].lpfn = 0;
>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>> AMDGPU_PL_FLAG_GDS;
>> - c++;
>> - }
>> -
>> - if (domain & AMDGPU_GEM_DOMAIN_GWS) {
>> - places[c].fpfn = 0;
>> - places[c].lpfn = 0;
>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>> AMDGPU_PL_FLAG_GWS;
>> - c++;
>> - }
>> -
>> - if (domain & AMDGPU_GEM_DOMAIN_OA) {
>> - places[c].fpfn = 0;
>> - places[c].lpfn = 0;
>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>> AMDGPU_PL_FLAG_OA;
>> - c++;
>> - }
>> -
>> if (!c) {
>> places[c].fpfn = 0;
>> places[c].lpfn = 0;
>> places[c].flags = TTM_PL_MASK_CACHING |
>> TTM_PL_FLAG_SYSTEM;
>> c++;
>> }
>> BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
>> placement->num_placement = c;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 907fdf46d895..e089964cbcb7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -120,26 +120,20 @@ static inline struct amdgpu_bo
>> *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
>> */
>> static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
>> {
>> switch (mem_type) {
>> case TTM_PL_VRAM:
>> return AMDGPU_GEM_DOMAIN_VRAM;
>> case TTM_PL_TT:
>> return AMDGPU_GEM_DOMAIN_GTT;
>> case TTM_PL_SYSTEM:
>> return AMDGPU_GEM_DOMAIN_CPU;
>> - case AMDGPU_PL_GDS:
>> - return AMDGPU_GEM_DOMAIN_GDS;
>> - case AMDGPU_PL_GWS:
>> - return AMDGPU_GEM_DOMAIN_GWS;
>> - case AMDGPU_PL_OA:
>> - return AMDGPU_GEM_DOMAIN_OA;
>> default:
>> break;
>> }
>> return 0;
>> }
>> /**
>> * amdgpu_bo_reserve - reserve bo
>> * @bo: bo structure
>> * @no_intr: don't return -ERESTARTSYS on pending signal
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 9cc239968e40..f6ea9604e611 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs {
>> /* command emit functions */
>> void (*emit_ib)(struct amdgpu_ring *ring,
>> struct amdgpu_ib *ib,
>> unsigned vmid, bool ctx_switch);
>> void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
>> uint64_t seq, unsigned flags);
>> void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
>> void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
>> uint64_t pd_addr);
>> void (*emit_hdp_flush)(struct amdgpu_ring *ring);
>> - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
>> - uint32_t gds_base, uint32_t gds_size,
>> - uint32_t gws_base, uint32_t gws_size,
>> - uint32_t oa_base, uint32_t oa_size);
>> /* testing functions */
>> int (*test_ring)(struct amdgpu_ring *ring);
>> int (*test_ib)(struct amdgpu_ring *ring, long timeout);
>> /* insert NOP packets */
>> void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
>> void (*insert_start)(struct amdgpu_ring *ring);
>> void (*insert_end)(struct amdgpu_ring *ring);
>> /* pad the indirect buffer to the necessary number of dw */
>> void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>> unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
>> @@ -226,21 +222,20 @@ struct amdgpu_ring {
>> #define amdgpu_ring_patch_cs_in_place(r, p, ib)
>> ((r)->funcs->patch_cs_in_place((p), (ib)))
>> #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
>> #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
>> #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>> #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>> #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
>> #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r),
>> (ib), (vmid), (c))
>> #define amdgpu_ring_emit_pipeline_sync(r)
>> (r)->funcs->emit_pipeline_sync((r))
>> #define amdgpu_ring_emit_vm_flush(r, vmid, addr)
>> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>> #define amdgpu_ring_emit_fence(r, addr, seq, flags)
>> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
>> -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as)
>> (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
>> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
>> #define amdgpu_ring_emit_switch_buffer(r)
>> (r)->funcs->emit_switch_buffer((r))
>> #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r),
>> (d))
>> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d),
>> (v))
>> #define amdgpu_ring_emit_reg_wait(r, d, v, m)
>> (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m)
>> (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 8a158ee922f7..2cc62b0e7ea8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct ttm_bo_device
>> *bdev, uint32_t type,
>> break;
>> case TTM_PL_VRAM:
>> /* "On-card" video ram */
>> man->func = &amdgpu_vram_mgr_func;
>> man->gpu_offset = adev->gmc.vram_start;
>> man->flags = TTM_MEMTYPE_FLAG_FIXED |
>> TTM_MEMTYPE_FLAG_MAPPABLE;
>> man->available_caching = TTM_PL_FLAG_UNCACHED |
>> TTM_PL_FLAG_WC;
>> man->default_caching = TTM_PL_FLAG_WC;
>> break;
>> - case AMDGPU_PL_GDS:
>> - case AMDGPU_PL_GWS:
>> - case AMDGPU_PL_OA:
>> - /* On-chip GDS memory*/
>> - man->func = &ttm_bo_manager_func;
>> - man->gpu_offset = 0;
>> - man->flags = TTM_MEMTYPE_FLAG_FIXED |
>> TTM_MEMTYPE_FLAG_CMA;
>> - man->available_caching = TTM_PL_FLAG_UNCACHED;
>> - man->default_caching = TTM_PL_FLAG_UNCACHED;
>> - break;
>> default:
>> DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
>> return -EINVAL;
>> }
>> return 0;
>> }
>> /**
>> * amdgpu_evict_flags - Compute placement flags
>> *
>> @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt
>> *ttm,
>> if (r) {
>> DRM_ERROR("failed to pin userptr\n");
>> return r;
>> }
>> }
>> if (!ttm->num_pages) {
>> WARN(1, "nothing to bind %lu pages for mreg %p back
>> %p!\n",
>> ttm->num_pages, bo_mem, ttm);
>> }
>> - if (bo_mem->mem_type == AMDGPU_PL_GDS ||
>> - bo_mem->mem_type == AMDGPU_PL_GWS ||
>> - bo_mem->mem_type == AMDGPU_PL_OA)
>> - return -EINVAL;
>> -
>> if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
>> gtt->offset = AMDGPU_BO_INVALID_OFFSET;
>> return 0;
>> }
>> /* compute PTE flags relevant to this BO memory */
>> flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
>> /* bind pages into GART page tables */
>> gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) -
>> adev->gmc.gart_start;
>> @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>> /* Initialize GTT memory pool */
>> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >>
>> PAGE_SHIFT);
>> if (r) {
>> DRM_ERROR("Failed initializing GTT heap.\n");
>> return r;
>> }
>> DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
>> (unsigned)(gtt_size / (1024 * 1024)));
>> - /* Initialize various on-chip memory pools */
>> - adev->gds.mem.total_size = adev->gds.mem.total_size <<
>> AMDGPU_GDS_SHIFT;
>> - adev->gds.mem.gfx_partition_size =
>> adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
>> - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size
>> << AMDGPU_GDS_SHIFT;
>> - adev->gds.gws.total_size = adev->gds.gws.total_size <<
>> AMDGPU_GWS_SHIFT;
>> - adev->gds.gws.gfx_partition_size =
>> adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
>> - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size
>> << AMDGPU_GWS_SHIFT;
>> - adev->gds.oa.total_size = adev->gds.oa.total_size <<
>> AMDGPU_OA_SHIFT;
>> - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size
>> << AMDGPU_OA_SHIFT;
>> - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size <<
>> AMDGPU_OA_SHIFT;
>> - /* GDS Memory */
>> - if (adev->gds.mem.total_size) {
>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
>> - adev->gds.mem.total_size >>
>> PAGE_SHIFT);
>> - if (r) {
>> - DRM_ERROR("Failed initializing GDS heap.\n");
>> - return r;
>> - }
>> - }
>> -
>> - /* GWS */
>> - if (adev->gds.gws.total_size) {
>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
>> - adev->gds.gws.total_size >>
>> PAGE_SHIFT);
>> - if (r) {
>> - DRM_ERROR("Failed initializing gws heap.\n");
>> - return r;
>> - }
>> - }
>> -
>> - /* OA */
>> - if (adev->gds.oa.total_size) {
>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
>> - adev->gds.oa.total_size >> PAGE_SHIFT);
>> - if (r) {
>> - DRM_ERROR("Failed initializing oa heap.\n");
>> - return r;
>> - }
>> - }
>> -
>> /* Register debugfs entries for amdgpu_ttm */
>> r = amdgpu_ttm_debugfs_init(adev);
>> if (r) {
>> DRM_ERROR("Failed to init debugfs\n");
>> return r;
>> }
>> return 0;
>> }
>> /**
>> @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>> return;
>> amdgpu_ttm_debugfs_fini(adev);
>> amdgpu_ttm_fw_reserve_vram_fini(adev);
>> if (adev->mman.aper_base_kaddr)
>> iounmap(adev->mman.aper_base_kaddr);
>> adev->mman.aper_base_kaddr = NULL;
>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
>> - if (adev->gds.mem.total_size)
>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
>> - if (adev->gds.gws.total_size)
>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
>> - if (adev->gds.oa.total_size)
>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
>> ttm_bo_device_release(&adev->mman.bdev);
>> amdgpu_ttm_global_fini(adev);
>> adev->mman.initialized = false;
>> DRM_INFO("amdgpu: ttm finalized\n");
>> }
>> /**
>> * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer
>> functions
>> *
>> * @adev: amdgpu_device pointer
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index fe8f276e9811..04557a382b19 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -20,28 +20,20 @@
>> * OTHER DEALINGS IN THE SOFTWARE.
>> *
>> */
>> #ifndef __AMDGPU_TTM_H__
>> #define __AMDGPU_TTM_H__
>> #include "amdgpu.h"
>> #include <drm/gpu_scheduler.h>
>> -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
>> -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
>> -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
>> -
>> -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0)
>> -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1)
>> -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2)
>> -
>> #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
>> #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
>> struct amdgpu_mman {
>> struct ttm_bo_global_ref bo_global_ref;
>> struct drm_global_reference mem_global_ref;
>> struct ttm_bo_device bdev;
>> bool mem_global_referenced;
>> bool initialized;
>> void __iomem *aper_base_kaddr;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index be1659fedf94..c66f1c6f0ba8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct
>> amdgpu_device *adev)
>> * Returns:
>> * True if sync is needed.
>> */
>> bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>> struct amdgpu_job *job)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> unsigned vmhub = ring->funcs->vmhub;
>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>> struct amdgpu_vmid *id;
>> - bool gds_switch_needed;
>> bool vm_flush_needed = job->vm_needs_flush ||
>> ring->has_compute_vm_bug;
>> if (job->vmid == 0)
>> return false;
>> id = &id_mgr->ids[job->vmid];
>> - gds_switch_needed = ring->funcs->emit_gds_switch && (
>> - id->gds_base != job->gds_base ||
>> - id->gds_size != job->gds_size ||
>> - id->gws_base != job->gws_base ||
>> - id->gws_size != job->gws_size ||
>> - id->oa_base != job->oa_base ||
>> - id->oa_size != job->oa_size);
>> if (amdgpu_vmid_had_gpu_reset(adev, id))
>> return true;
>> - return vm_flush_needed || gds_switch_needed;
>> + return vm_flush_needed;
>> }
>> /**
>> * amdgpu_vm_flush - hardware flush the vm
>> *
>> * @ring: ring to use for flush
>> * @job: related job
>> * @need_pipe_sync: is pipe sync needed
>> *
>> * Emit a VM flush when it is necessary.
>> *
>> * Returns:
>> * 0 on success, errno otherwise.
>> */
>> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>> bool need_pipe_sync)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> unsigned vmhub = ring->funcs->vmhub;
>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>> - bool gds_switch_needed = ring->funcs->emit_gds_switch && (
>> - id->gds_base != job->gds_base ||
>> - id->gds_size != job->gds_size ||
>> - id->gws_base != job->gws_base ||
>> - id->gws_size != job->gws_size ||
>> - id->oa_base != job->oa_base ||
>> - id->oa_size != job->oa_size);
>> bool vm_flush_needed = job->vm_needs_flush;
>> bool pasid_mapping_needed = id->pasid != job->pasid ||
>> !id->pasid_mapping ||
>> !dma_fence_is_signaled(id->pasid_mapping);
>> struct dma_fence *fence = NULL;
>> unsigned patch_offset = 0;
>> int r;
>> if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>> - gds_switch_needed = true;
>> vm_flush_needed = true;
>> pasid_mapping_needed = true;
>> }
>> - gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>> vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
>> ring->funcs->emit_wreg;
>> - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
>> + if (!vm_flush_needed && !need_pipe_sync)
>> return 0;
>> if (ring->funcs->init_cond_exec)
>> patch_offset = amdgpu_ring_init_cond_exec(ring);
>> if (need_pipe_sync)
>> amdgpu_ring_emit_pipeline_sync(ring);
>> if (vm_flush_needed) {
>> trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
>> @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
>> amdgpu_job *job, bool need_
>> mutex_unlock(&id_mgr->lock);
>> }
>> if (pasid_mapping_needed) {
>> id->pasid = job->pasid;
>> dma_fence_put(id->pasid_mapping);
>> id->pasid_mapping = dma_fence_get(fence);
>> }
>> dma_fence_put(fence);
>> - if (ring->funcs->emit_gds_switch && gds_switch_needed) {
>> - id->gds_base = job->gds_base;
>> - id->gds_size = job->gds_size;
>> - id->gws_base = job->gws_base;
>> - id->gws_size = job->gws_size;
>> - id->oa_base = job->oa_base;
>> - id->oa_size = job->oa_size;
>> - amdgpu_ring_emit_gds_switch(ring, job->vmid,
>> job->gds_base,
>> - job->gds_size, job->gws_base,
>> - job->gws_size, job->oa_base,
>> - job->oa_size);
>> - }
>> -
>> if (ring->funcs->patch_cond_exec)
>> amdgpu_ring_patch_cond_exec(ring, patch_offset);
>> /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC
>> */
>> if (ring->funcs->emit_switch_buffer) {
>> amdgpu_ring_emit_switch_buffer(ring);
>> amdgpu_ring_emit_switch_buffer(ring);
>> }
>> return 0;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> index a15d9c0f233b..f5228e169c3a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct
>> amdgpu_device *adev)
>> *
>> * @adev: amdgpu_device pointer
>> *
>> * Configures the 3D engine and tiling configuration
>> * registers so that the 3D engine is usable.
>> */
>> static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
>> {
>> u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
>> u32 tmp;
>> - int i;
>> + int i, vmid;
>> WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>> gfx_v7_0_tiling_mode_table_init(adev);
>> gfx_v7_0_setup_rb(adev);
>> @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device
>> *adev)
>> tmp = RREG32(mmSPI_ARB_PRIORITY);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
>> WREG32(mmSPI_ARB_PRIORITY, tmp);
>> mutex_unlock(&adev->grbm_idx_mutex);
>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
>> + unsigned gds_size, gws_size, oa_size;
>> +
>> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids)
>> {
>> + gds_size = adev->gds.mem.gfx_size_per_vmid;
>> + gws_size = adev->gds.gws.gfx_size_per_vmid;
>> + oa_size = adev->gds.oa.gfx_size_per_vmid;
>> + } else {
>> + gds_size = adev->gds.mem.kfd_size_per_vmid;
>> + gws_size = adev->gds.gws.kfd_size_per_vmid;
>> + oa_size = adev->gds.oa.kfd_size_per_vmid;
>> + }
>> +
>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid *
>> gds_size);
>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
>> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
>> + (vmid * gws_size) |
>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
>> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
>> + ((1 << oa_size) - 1) << (vmid * oa_size));
>> + }
>> +
>> udelay(50);
>> }
>> /*
>> * GPU scratch registers helpers function.
>> */
>> /**
>> * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
>> *
>> * @adev: amdgpu_device pointer
>> @@ -4157,68 +4179,20 @@ static uint64_t
>> gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>> uint64_t clock;
>> mutex_lock(&adev->gfx.gpu_clock_mutex);
>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>> mutex_unlock(&adev->gfx.gpu_clock_mutex);
>> return clock;
>> }
>> -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>> - uint32_t vmid,
>> - uint32_t gds_base, uint32_t
>> gds_size,
>> - uint32_t gws_base, uint32_t
>> gws_size,
>> - uint32_t oa_base, uint32_t
>> oa_size)
>> -{
>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
>> -
>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
>> -
>> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
>> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
>> -
>> - /* GDS Base */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, gds_base);
>> -
>> - /* GDS Size */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, gds_size);
>> -
>> - /* GWS */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT |
>> gws_base);
>> -
>> - /* OA */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 <<
>> oa_base));
>> -}
>> -
>> static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring,
>> unsigned vmid)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> uint32_t value = 0;
>> value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
>> value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
>> value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
>> value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
>> WREG32(mmSQ_CMD, value);
>> @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle)
>> ring_id,
>> i, k, j);
>> if (r)
>> return r;
>> ring_id++;
>> }
>> }
>> }
>> - /* reserve GDS, GWS and OA resource for gfx */
>> - r = amdgpu_bo_create_kernel(adev,
>> adev->gds.mem.gfx_partition_size,
>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
>> - &adev->gds.gds_gfx_bo, NULL, NULL);
>> - if (r)
>> - return r;
>> -
>> - r = amdgpu_bo_create_kernel(adev,
>> adev->gds.gws.gfx_partition_size,
>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
>> - &adev->gds.gws_gfx_bo, NULL, NULL);
>> - if (r)
>> - return r;
>> -
>> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
>> - &adev->gds.oa_gfx_bo, NULL, NULL);
>> - if (r)
>> - return r;
>> -
>> adev->gfx.ce_ram_size = 0x8000;
>> gfx_v7_0_gpu_early_init(adev);
>> return r;
>> }
>> static int gfx_v7_0_sw_fini(void *handle)
>> {
>> int i;
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>> -
>> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>> for (i = 0; i < adev->gfx.num_compute_rings; i++)
>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>> gfx_v7_0_cp_compute_fini(adev);
>> gfx_v7_0_rlc_fini(adev);
>> gfx_v7_0_mec_fini(adev);
>> amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
>> &adev->gfx.rlc.clear_state_gpu_addr,
>> @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs
>> = {
>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
>> .type = AMDGPU_RING_TYPE_GFX,
>> .align_mask = 0xff,
>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>> .support_64bit_ptrs = false,
>> .get_rptr = gfx_v7_0_ring_get_rptr,
>> .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
>> .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
>> .emit_frame_size =
>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>> 5 + /* hdp invalidate */
>> 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user
>> fence, vm fence */
>> 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /*
>> gfx_v7_0_ring_emit_vm_flush */
>> 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
>> .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
>> .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>> .test_ring = gfx_v7_0_ring_test_ring,
>> .test_ib = gfx_v7_0_ring_test_ib,
>> .insert_nop = amdgpu_ring_insert_nop,
>> .pad_ib = amdgpu_ring_generic_pad_ib,
>> .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
>> .emit_wreg = gfx_v7_0_ring_emit_wreg,
>> .soft_recovery = gfx_v7_0_ring_soft_recovery,
>> };
>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
>> .type = AMDGPU_RING_TYPE_COMPUTE,
>> .align_mask = 0xff,
>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>> .support_64bit_ptrs = false,
>> .get_rptr = gfx_v7_0_ring_get_rptr,
>> .get_wptr = gfx_v7_0_ring_get_wptr_compute,
>> .set_wptr = gfx_v7_0_ring_set_wptr_compute,
>> .emit_frame_size =
>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>> 5 + /* hdp invalidate */
>> 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
>> gfx_v7_0_ring_emit_vm_flush */
>> 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user
>> fence, vm fence */
>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
>> .emit_ib = gfx_v7_0_ring_emit_ib_compute,
>> .emit_fence = gfx_v7_0_ring_emit_fence_compute,
>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>> .test_ring = gfx_v7_0_ring_test_ring,
>> .test_ib = gfx_v7_0_ring_test_ib,
>> .insert_nop = amdgpu_ring_insert_nop,
>> .pad_ib = amdgpu_ring_generic_pad_ib,
>> .emit_wreg = gfx_v7_0_ring_emit_wreg,
>> };
>> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
>> {
>> @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct
>> amdgpu_device *adev)
>> adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
>> }
>> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
>> {
>> /* init asci gds info */
>> adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>> adev->gds.gws.total_size = 64;
>> adev->gds.oa.total_size = 16;
>> - if (adev->gds.mem.total_size == 64 * 1024) {
>> - adev->gds.mem.gfx_partition_size = 4096;
>> - adev->gds.mem.cs_partition_size = 4096;
>> -
>> - adev->gds.gws.gfx_partition_size = 4;
>> - adev->gds.gws.cs_partition_size = 4;
>> -
>> - adev->gds.oa.gfx_partition_size = 4;
>> - adev->gds.oa.cs_partition_size = 1;
>> - } else {
>> - adev->gds.mem.gfx_partition_size = 1024;
>> - adev->gds.mem.cs_partition_size = 1024;
>> -
>> - adev->gds.gws.gfx_partition_size = 16;
>> - adev->gds.gws.cs_partition_size = 16;
>> -
>> - adev->gds.oa.gfx_partition_size = 4;
>> - adev->gds.oa.cs_partition_size = 4;
>> - }
>> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size /
>> AMDGPU_NUM_VMID;
>> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size /
>> AMDGPU_NUM_VMID;
>> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size /
>> AMDGPU_NUM_VMID;
>> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size /
>> AMDGPU_NUM_VMID;
>> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /*
>> gfx only */
>> + adev->gds.oa.kfd_size_per_vmid = 0;
>> }
>> -
>> static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
>> {
>> int i, j, k, counter, active_cu_number = 0;
>> u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
>> struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
>> unsigned disable_masks[4 * 2];
>> u32 ao_cu_num;
>> if (adev->flags & AMD_IS_APU)
>> ao_cu_num = 2;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 3882689b2d8f..b11a54bd0668 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle)
>> kiq = &adev->gfx.kiq;
>> r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>> if (r)
>> return r;
>> /* create MQD for all compute queues as well as KIQ for SRIOV case
>> */
>> r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct
>> vi_mqd_allocation));
>> if (r)
>> return r;
>> - /* reserve GDS, GWS and OA resource for gfx */
>> - r = amdgpu_bo_create_kernel(adev,
>> adev->gds.mem.gfx_partition_size,
>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
>> - &adev->gds.gds_gfx_bo, NULL, NULL);
>> - if (r)
>> - return r;
>> -
>> - r = amdgpu_bo_create_kernel(adev,
>> adev->gds.gws.gfx_partition_size,
>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
>> - &adev->gds.gws_gfx_bo, NULL, NULL);
>> - if (r)
>> - return r;
>> -
>> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
>> - &adev->gds.oa_gfx_bo, NULL, NULL);
>> - if (r)
>> - return r;
>> -
>> adev->gfx.ce_ram_size = 0x8000;
>> r = gfx_v8_0_gpu_early_init(adev);
>> if (r)
>> return r;
>> return 0;
>> }
>> static int gfx_v8_0_sw_fini(void *handle)
>> {
>> int i;
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>> -
>> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>> for (i = 0; i < adev->gfx.num_compute_rings; i++)
>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>> amdgpu_gfx_compute_mqd_sw_fini(adev);
>> amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>> amdgpu_gfx_kiq_fini(adev);
>> gfx_v8_0_mec_fini(adev);
>> @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct
>> amdgpu_device *adev)
>> case CHIP_CARRIZO:
>> case CHIP_STONEY:
>> adev->gfx.config.double_offchip_lds_buf = 0;
>> break;
>> }
>> }
>> static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
>> {
>> u32 tmp, sh_static_mem_cfg;
>> - int i;
>> + int i, vmid;
>> WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>> gfx_v8_0_tiling_mode_table_init(adev);
>> gfx_v8_0_setup_rb(adev);
>> gfx_v8_0_get_cu_info(adev);
>> gfx_v8_0_config_init(adev);
>> @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device
>> *adev)
>> tmp = RREG32(mmSPI_ARB_PRIORITY);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
>> WREG32(mmSPI_ARB_PRIORITY, tmp);
>> mutex_unlock(&adev->grbm_idx_mutex);
>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
>> + unsigned gds_size, gws_size, oa_size;
>> +
>> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids)
>> {
>> + gds_size = adev->gds.mem.gfx_size_per_vmid;
>> + gws_size = adev->gds.gws.gfx_size_per_vmid;
>> + oa_size = adev->gds.oa.gfx_size_per_vmid;
>> + } else {
>> + gds_size = adev->gds.mem.kfd_size_per_vmid;
>> + gws_size = adev->gds.gws.kfd_size_per_vmid;
>> + oa_size = adev->gds.oa.kfd_size_per_vmid;
>> + }
>> +
>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid *
>> gds_size);
>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
>> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
>> + (vmid * gws_size) |
>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
>> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
>> + ((1 << oa_size) - 1) << (vmid * oa_size));
>> + }
>> }
>> static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
>> {
>> u32 i, j, k;
>> u32 mask;
>> mutex_lock(&adev->grbm_idx_mutex);
>> for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>> for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
>> @@ -5383,68 +5381,20 @@ static uint64_t
>> gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>> uint64_t clock;
>> mutex_lock(&adev->gfx.gpu_clock_mutex);
>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>> mutex_unlock(&adev->gfx.gpu_clock_mutex);
>> return clock;
>> }
>> -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>> - uint32_t vmid,
>> - uint32_t gds_base, uint32_t
>> gds_size,
>> - uint32_t gws_base, uint32_t
>> gws_size,
>> - uint32_t oa_base, uint32_t
>> oa_size)
>> -{
>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
>> -
>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
>> -
>> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
>> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
>> -
>> - /* GDS Base */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, gds_base);
>> -
>> - /* GDS Size */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, gds_size);
>> -
>> - /* GWS */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT |
>> gws_base);
>> -
>> - /* OA */
>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> - WRITE_DATA_DST_SEL(0)));
>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
>> - amdgpu_ring_write(ring, 0);
>> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 <<
>> oa_base));
>> -}
>> -
>> static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd,
>> uint32_t wave, uint32_t address)
>> {
>> WREG32(mmSQ_IND_INDEX,
>> (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
>> (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
>> (address << SQ_IND_INDEX__INDEX__SHIFT) |
>> (SQ_IND_INDEX__FORCE_READ_MASK));
>> return RREG32(mmSQ_IND_DATA);
>> }
>> @@ -7132,21 +7082,20 @@ static const struct amdgpu_ring_funcs
>> gfx_v8_0_ring_funcs_gfx = {
>> 31 + /* DE_META */
>> 3 + /* CNTX_CTRL */
>> 5 + /* HDP_INVL */
>> 8 + 8 + /* FENCE x2 */
>> 2, /* SWITCH_BUFFER */
>> .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
>> .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
>> .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
>> .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
>> .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
>> - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
>> .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
>> .test_ring = gfx_v8_0_ring_test_ring,
>> .test_ib = gfx_v8_0_ring_test_ib,
>> .insert_nop = amdgpu_ring_insert_nop,
>> .pad_ib = amdgpu_ring_generic_pad_ib,
>> .emit_switch_buffer = gfx_v8_ring_emit_sb,
>> .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>> .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
>> .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
>> .emit_wreg = gfx_v8_0_ring_emit_wreg,
>> @@ -7155,51 +7104,48 @@ static const struct amdgpu_ring_funcs
>> gfx_v8_0_ring_funcs_gfx = {
>> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
>> .type = AMDGPU_RING_TYPE_COMPUTE,
>> .align_mask = 0xff,
>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>> .support_64bit_ptrs = false,
>> .get_rptr = gfx_v8_0_ring_get_rptr,
More information about the amd-gfx
mailing list