[PATCH] drm/amdgpu: reserve GDS resources statically
Marek Olšák
maraeo at gmail.com
Thu Sep 13 18:20:28 UTC 2018
GDS is a temporary memory. Its purpose depends on the job, but most of
the time, the idea is:
- beginning of IB
- initialize GDS variables
- dispatch compute that works with GDS variables
- when done, copy GDS variables to memory
- repeat ...
- end of IB
GDS is like a pool of global shader GPRs.
GDS is too small for persistent data.
Marek
On Thu, Sep 13, 2018 at 1:26 PM, Christian König
<christian.koenig at amd.com> wrote:
> Are you sure of that? I mean it is rather pointless to have a Global Data
> Share when it can't be used to share anything?
>
> On the other hand I'm not opposed to get rid of all that stuff if we really
> don't need it.
>
> Christian.
>
> Am 13.09.2018 um 17:27 schrieb Marek Olšák:
>>
>> That's OK. We don't need IBs to get the same VMID.
>>
>> Marek
>>
>> On Thu, Sep 13, 2018 at 4:40 AM, Christian König
>> <ckoenig.leichtzumerken at gmail.com> wrote:
>>>
>>> As discussed internally that doesn't work because threads don't necessary
>>> get the same VMID assigned.
>>>
>>> Christian.
>>>
>>> Am 12.09.2018 um 22:33 schrieb Marek Olšák:
>>>>
>>>> From: Marek Olšák <marek.olsak at amd.com>
>>>>
>>>> I've chosen to do it like this because it's easy and allows an arbitrary
>>>> number of processes.
>>>>
>>>> Signed-off-by: Marek Olšák <marek.olsak at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 --
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 -
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ----
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +--
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 -
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 --
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 -
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +--
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ----
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 -
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 -
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ----------
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 --
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +-----
>>>> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125
>>>> +++++---------------
>>>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++--------------
>>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++-------------
>>>> include/uapi/drm/amdgpu_drm.h | 15 +--
>>>> 19 files changed, 109 insertions(+), 519 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> index b80243d3972e..7264a4930b88 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device
>>>> *adev,
>>>> struct drm_file *filp,
>>>> / sizeof(struct amdgpu_bo_list_entry))
>>>> return -EINVAL;
>>>> size = sizeof(struct amdgpu_bo_list);
>>>> size += num_entries * sizeof(struct amdgpu_bo_list_entry);
>>>> list = kvmalloc(size, GFP_KERNEL);
>>>> if (!list)
>>>> return -ENOMEM;
>>>> kref_init(&list->refcount);
>>>> - list->gds_obj = adev->gds.gds_gfx_bo;
>>>> - list->gws_obj = adev->gds.gws_gfx_bo;
>>>> - list->oa_obj = adev->gds.oa_gfx_bo;
>>>> array = amdgpu_bo_list_array_entry(list, 0);
>>>> memset(array, 0, num_entries * sizeof(struct
>>>> amdgpu_bo_list_entry));
>>>> for (i = 0; i < num_entries; ++i) {
>>>> struct amdgpu_bo_list_entry *entry;
>>>> struct drm_gem_object *gobj;
>>>> struct amdgpu_bo *bo;
>>>> struct mm_struct *usermm;
>>>> @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device
>>>> *adev, struct drm_file *filp,
>>>> } else {
>>>> entry = &array[last_entry++];
>>>> }
>>>> entry->robj = bo;
>>>> entry->priority = min(info[i].bo_priority,
>>>> AMDGPU_BO_LIST_MAX_PRIORITY);
>>>> entry->tv.bo = &entry->robj->tbo;
>>>> entry->tv.shared = !entry->robj->prime_shared_count;
>>>> - if (entry->robj->preferred_domains ==
>>>> AMDGPU_GEM_DOMAIN_GDS)
>>>> - list->gds_obj = entry->robj;
>>>> - if (entry->robj->preferred_domains ==
>>>> AMDGPU_GEM_DOMAIN_GWS)
>>>> - list->gws_obj = entry->robj;
>>>> - if (entry->robj->preferred_domains ==
>>>> AMDGPU_GEM_DOMAIN_OA)
>>>> - list->oa_obj = entry->robj;
>>>> -
>>>> total_size += amdgpu_bo_size(entry->robj);
>>>> trace_amdgpu_bo_list_set(list, entry->robj);
>>>> }
>>>> list->first_userptr = first_userptr;
>>>> list->num_entries = num_entries;
>>>> trace_amdgpu_cs_bo_status(list->num_entries, total_size);
>>>> *result = list;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>>> index 61b089768e1c..30f12a60aa28 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>>> @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry {
>>>> struct ttm_validate_buffer tv;
>>>> struct amdgpu_bo_va *bo_va;
>>>> uint32_t priority;
>>>> struct page **user_pages;
>>>> int user_invalidated;
>>>> };
>>>> struct amdgpu_bo_list {
>>>> struct rcu_head rhead;
>>>> struct kref refcount;
>>>> - struct amdgpu_bo *gds_obj;
>>>> - struct amdgpu_bo *gws_obj;
>>>> - struct amdgpu_bo *oa_obj;
>>>> unsigned first_userptr;
>>>> unsigned num_entries;
>>>> };
>>>> int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
>>>> struct amdgpu_bo_list **result);
>>>> void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>>>> struct list_head *validated);
>>>> void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>>> int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in
>>>> *in,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index 1081fd00b059..88b58facf29e 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct
>>>> amdgpu_cs_parser *p,
>>>> return 0;
>>>> }
>>>> static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>>> union drm_amdgpu_cs *cs)
>>>> {
>>>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>>> struct amdgpu_vm *vm = &fpriv->vm;
>>>> struct amdgpu_bo_list_entry *e;
>>>> struct list_head duplicates;
>>>> - struct amdgpu_bo *gds;
>>>> - struct amdgpu_bo *gws;
>>>> - struct amdgpu_bo *oa;
>>>> unsigned tries = 10;
>>>> int r;
>>>> INIT_LIST_HEAD(&p->validated);
>>>> /* p->bo_list could already be assigned if
>>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>>>> if (cs->in.bo_list_handle) {
>>>> if (p->bo_list)
>>>> return -EINVAL;
>>>> @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct
>>>> amdgpu_cs_parser *p,
>>>> r = amdgpu_cs_list_validate(p, &p->validated);
>>>> if (r) {
>>>> DRM_ERROR("amdgpu_cs_list_validate(validated)
>>>> failed.\n");
>>>> goto error_validate;
>>>> }
>>>> amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>>>> p->bytes_moved_vis);
>>>> - gds = p->bo_list->gds_obj;
>>>> - gws = p->bo_list->gws_obj;
>>>> - oa = p->bo_list->oa_obj;
>>>> -
>>>> amdgpu_bo_list_for_each_entry(e, p->bo_list)
>>>> e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
>>>> - if (gds) {
>>>> - p->job->gds_base = amdgpu_bo_gpu_offset(gds);
>>>> - p->job->gds_size = amdgpu_bo_size(gds);
>>>> - }
>>>> - if (gws) {
>>>> - p->job->gws_base = amdgpu_bo_gpu_offset(gws);
>>>> - p->job->gws_size = amdgpu_bo_size(gws);
>>>> - }
>>>> - if (oa) {
>>>> - p->job->oa_base = amdgpu_bo_gpu_offset(oa);
>>>> - p->job->oa_size = amdgpu_bo_size(oa);
>>>> - }
>>>> -
>>>> if (!r && p->uf_entry.robj) {
>>>> struct amdgpu_bo *uf = p->uf_entry.robj;
>>>> r = amdgpu_ttm_alloc_gart(&uf->tbo);
>>>> p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>> }
>>>> error_validate:
>>>> if (r)
>>>> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>>> index e73728d90388..69ba25c2e921 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>>> @@ -17,48 +17,33 @@
>>>> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>>> DAMAGES
>>>> OR
>>>> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>> OTHERWISE,
>>>> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
>>>> OR
>>>> * OTHER DEALINGS IN THE SOFTWARE.
>>>> *
>>>> */
>>>> #ifndef __AMDGPU_GDS_H__
>>>> #define __AMDGPU_GDS_H__
>>>> -/* Because TTM request that alloacted buffer should be PAGE_SIZE
>>>> aligned,
>>>> - * we should report GDS/GWS/OA size as PAGE_SIZE aligned
>>>> - * */
>>>> -#define AMDGPU_GDS_SHIFT 2
>>>> -#define AMDGPU_GWS_SHIFT PAGE_SHIFT
>>>> -#define AMDGPU_OA_SHIFT PAGE_SHIFT
>>>> -
>>>> struct amdgpu_ring;
>>>> struct amdgpu_bo;
>>>> struct amdgpu_gds_asic_info {
>>>> uint32_t total_size;
>>>> - uint32_t gfx_partition_size;
>>>> - uint32_t cs_partition_size;
>>>> + uint32_t gfx_size_per_vmid;
>>>> + uint32_t kfd_size_per_vmid;
>>>> };
>>>> struct amdgpu_gds {
>>>> struct amdgpu_gds_asic_info mem;
>>>> struct amdgpu_gds_asic_info gws;
>>>> struct amdgpu_gds_asic_info oa;
>>>> - /* At present, GDS, GWS and OA resources for gfx (graphics)
>>>> - * is always pre-allocated and available for graphics operation.
>>>> - * Such resource is shared between all gfx clients.
>>>> - * TODO: move this operation to user space
>>>> - * */
>>>> - struct amdgpu_bo* gds_gfx_bo;
>>>> - struct amdgpu_bo* gws_gfx_bo;
>>>> - struct amdgpu_bo* oa_gfx_bo;
>>>> };
>>>> struct amdgpu_gds_reg_offset {
>>>> uint32_t mem_base;
>>>> uint32_t mem_size;
>>>> uint32_t gws;
>>>> uint32_t oa;
>>>> };
>>>> #endif /* __AMDGPU_GDS_H__ */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> index d30a0838851b..c87ad4b4d0b6 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device
>>>> *dev,
>>>> void *data,
>>>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>> AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>> AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
>>>> AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
>>>> return -EINVAL;
>>>> /* reject invalid gem domains */
>>>> - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>>>> + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
>>>> + AMDGPU_GEM_DOMAIN_GTT |
>>>> + AMDGPU_GEM_DOMAIN_VRAM))
>>>> return -EINVAL;
>>>> - /* create a gem object to contain this object in */
>>>> - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
>>>> - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
>>>> - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
>>>> - /* if gds bo is created from user space, it must
>>>> be
>>>> - * passed to bo list
>>>> - */
>>>> - DRM_ERROR("GDS bo cannot be per-vm-bo\n");
>>>> - return -EINVAL;
>>>> - }
>>>> - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
>>>> - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
>>>> - size = size << AMDGPU_GDS_SHIFT;
>>>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
>>>> - size = size << AMDGPU_GWS_SHIFT;
>>>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
>>>> - size = size << AMDGPU_OA_SHIFT;
>>>> - else
>>>> - return -EINVAL;
>>>> - }
>>>> size = roundup(size, PAGE_SIZE);
>>>> if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
>>>> r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>> if (r)
>>>> return r;
>>>> resv = vm->root.base.bo->tbo.resv;
>>>> }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>>> index 3a072a7a39f0..c2e6a1a11d7f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>>> @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct
>>>> amdgpu_device
>>>> *adev,
>>>> * Reset saved GDW, GWS and OA to force switch on next flush.
>>>> */
>>>> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
>>>> unsigned vmid)
>>>> {
>>>> struct amdgpu_vmid_mgr *id_mgr =
>>>> &adev->vm_manager.id_mgr[vmhub];
>>>> struct amdgpu_vmid *id = &id_mgr->ids[vmid];
>>>> mutex_lock(&id_mgr->lock);
>>>> id->owner = 0;
>>>> - id->gds_base = 0;
>>>> - id->gds_size = 0;
>>>> - id->gws_base = 0;
>>>> - id->gws_size = 0;
>>>> - id->oa_base = 0;
>>>> - id->oa_size = 0;
>>>> mutex_unlock(&id_mgr->lock);
>>>> }
>>>> /**
>>>> * amdgpu_vmid_reset_all - reset VMID to zero
>>>> *
>>>> * @adev: amdgpu device structure
>>>> *
>>>> * Reset VMID to force flush on next use
>>>> */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> index 7625419f0fc2..06078e665532 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> @@ -44,27 +44,20 @@ struct amdgpu_vmid {
>>>> struct amdgpu_sync active;
>>>> struct dma_fence *last_flush;
>>>> uint64_t owner;
>>>> uint64_t pd_gpu_addr;
>>>> /* last flushed PD/PT update */
>>>> struct dma_fence *flushed_updates;
>>>> uint32_t current_gpu_reset_count;
>>>> - uint32_t gds_base;
>>>> - uint32_t gds_size;
>>>> - uint32_t gws_base;
>>>> - uint32_t gws_size;
>>>> - uint32_t oa_base;
>>>> - uint32_t oa_size;
>>>> -
>>>> unsigned pasid;
>>>> struct dma_fence *pasid_mapping;
>>>> };
>>>> struct amdgpu_vmid_mgr {
>>>> struct mutex lock;
>>>> unsigned num_ids;
>>>> struct list_head ids_lru;
>>>> struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
>>>> atomic_t reserved_vmid_num;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> index 57cfe78a262b..3db553f6ad01 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> @@ -42,23 +42,20 @@ struct amdgpu_job {
>>>> struct amdgpu_sync sched_sync;
>>>> struct amdgpu_ib *ibs;
>>>> struct dma_fence *fence; /* the hw fence */
>>>> uint32_t preamble_status;
>>>> uint32_t num_ibs;
>>>> void *owner;
>>>> bool vm_needs_flush;
>>>> uint64_t vm_pd_addr;
>>>> unsigned vmid;
>>>> unsigned pasid;
>>>> - uint32_t gds_base, gds_size;
>>>> - uint32_t gws_base, gws_size;
>>>> - uint32_t oa_base, oa_size;
>>>> uint32_t vram_lost_counter;
>>>> /* user fence handling */
>>>> uint64_t uf_addr;
>>>> uint64_t uf_sequence;
>>>> };
>>>> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
>>>> struct amdgpu_job **job, struct amdgpu_vm *vm);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> index 29ac3873eeb0..209954290954 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device
>>>> *dev,
>>>> void *data, struct drm_file
>>>> case AMDGPU_INFO_VIS_VRAM_USAGE:
>>>> ui64 =
>>>> amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
>>>> return copy_to_user(out, &ui64, min(size, 8u)) ?
>>>> -EFAULT :
>>>> 0;
>>>> case AMDGPU_INFO_GTT_USAGE:
>>>> ui64 =
>>>> amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
>>>> return copy_to_user(out, &ui64, min(size, 8u)) ?
>>>> -EFAULT :
>>>> 0;
>>>> case AMDGPU_INFO_GDS_CONFIG: {
>>>> struct drm_amdgpu_info_gds gds_info;
>>>> memset(&gds_info, 0, sizeof(gds_info));
>>>> - gds_info.gds_gfx_partition_size =
>>>> adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
>>>> - gds_info.compute_partition_size =
>>>> adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
>>>> - gds_info.gds_total_size = adev->gds.mem.total_size >>
>>>> AMDGPU_GDS_SHIFT;
>>>> - gds_info.gws_per_gfx_partition =
>>>> adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
>>>> - gds_info.gws_per_compute_partition =
>>>> adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
>>>> - gds_info.oa_per_gfx_partition =
>>>> adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
>>>> - gds_info.oa_per_compute_partition =
>>>> adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
>>>> + gds_info.gds_gfx_partition_size =
>>>> adev->gds.mem.gfx_size_per_vmid;
>>>> + gds_info.compute_partition_size =
>>>> adev->gds.mem.kfd_size_per_vmid;
>>>> + gds_info.gds_total_size = adev->gds.mem.total_size;
>>>> + gds_info.gws_per_gfx_partition =
>>>> adev->gds.gws.gfx_size_per_vmid;
>>>> + gds_info.gws_per_compute_partition =
>>>> adev->gds.gws.kfd_size_per_vmid;
>>>> + gds_info.oa_per_gfx_partition =
>>>> adev->gds.oa.gfx_size_per_vmid;
>>>> + gds_info.oa_per_compute_partition =
>>>> adev->gds.oa.kfd_size_per_vmid;
>>>> return copy_to_user(out, &gds_info,
>>>> min((size_t)size,
>>>> sizeof(gds_info))) ?
>>>> -EFAULT : 0;
>>>> }
>>>> case AMDGPU_INFO_VRAM_GTT: {
>>>> struct drm_amdgpu_info_vram_gtt vram_gtt;
>>>> vram_gtt.vram_size = adev->gmc.real_vram_size -
>>>> atomic64_read(&adev->vram_pin_size);
>>>> vram_gtt.vram_cpu_accessible_size =
>>>> adev->gmc.visible_vram_size -
>>>> atomic64_read(&adev->visible_pin_size);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index de990bdcdd6c..76770a8c29a5 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct
>>>> amdgpu_bo *abo, u32 domain)
>>>> places[c].lpfn = 0;
>>>> places[c].flags = TTM_PL_FLAG_SYSTEM;
>>>> if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>>>> places[c].flags |= TTM_PL_FLAG_WC |
>>>> TTM_PL_FLAG_UNCACHED;
>>>> else
>>>> places[c].flags |= TTM_PL_FLAG_CACHED;
>>>> c++;
>>>> }
>>>> - if (domain & AMDGPU_GEM_DOMAIN_GDS) {
>>>> - places[c].fpfn = 0;
>>>> - places[c].lpfn = 0;
>>>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>>>> AMDGPU_PL_FLAG_GDS;
>>>> - c++;
>>>> - }
>>>> -
>>>> - if (domain & AMDGPU_GEM_DOMAIN_GWS) {
>>>> - places[c].fpfn = 0;
>>>> - places[c].lpfn = 0;
>>>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>>>> AMDGPU_PL_FLAG_GWS;
>>>> - c++;
>>>> - }
>>>> -
>>>> - if (domain & AMDGPU_GEM_DOMAIN_OA) {
>>>> - places[c].fpfn = 0;
>>>> - places[c].lpfn = 0;
>>>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>>>> AMDGPU_PL_FLAG_OA;
>>>> - c++;
>>>> - }
>>>> -
>>>> if (!c) {
>>>> places[c].fpfn = 0;
>>>> places[c].lpfn = 0;
>>>> places[c].flags = TTM_PL_MASK_CACHING |
>>>> TTM_PL_FLAG_SYSTEM;
>>>> c++;
>>>> }
>>>> BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
>>>> placement->num_placement = c;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> index 907fdf46d895..e089964cbcb7 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> @@ -120,26 +120,20 @@ static inline struct amdgpu_bo
>>>> *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
>>>> */
>>>> static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
>>>> {
>>>> switch (mem_type) {
>>>> case TTM_PL_VRAM:
>>>> return AMDGPU_GEM_DOMAIN_VRAM;
>>>> case TTM_PL_TT:
>>>> return AMDGPU_GEM_DOMAIN_GTT;
>>>> case TTM_PL_SYSTEM:
>>>> return AMDGPU_GEM_DOMAIN_CPU;
>>>> - case AMDGPU_PL_GDS:
>>>> - return AMDGPU_GEM_DOMAIN_GDS;
>>>> - case AMDGPU_PL_GWS:
>>>> - return AMDGPU_GEM_DOMAIN_GWS;
>>>> - case AMDGPU_PL_OA:
>>>> - return AMDGPU_GEM_DOMAIN_OA;
>>>> default:
>>>> break;
>>>> }
>>>> return 0;
>>>> }
>>>> /**
>>>> * amdgpu_bo_reserve - reserve bo
>>>> * @bo: bo structure
>>>> * @no_intr: don't return -ERESTARTSYS on pending signal
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> index 9cc239968e40..f6ea9604e611 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs {
>>>> /* command emit functions */
>>>> void (*emit_ib)(struct amdgpu_ring *ring,
>>>> struct amdgpu_ib *ib,
>>>> unsigned vmid, bool ctx_switch);
>>>> void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
>>>> uint64_t seq, unsigned flags);
>>>> void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
>>>> void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
>>>> uint64_t pd_addr);
>>>> void (*emit_hdp_flush)(struct amdgpu_ring *ring);
>>>> - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
>>>> - uint32_t gds_base, uint32_t gds_size,
>>>> - uint32_t gws_base, uint32_t gws_size,
>>>> - uint32_t oa_base, uint32_t oa_size);
>>>> /* testing functions */
>>>> int (*test_ring)(struct amdgpu_ring *ring);
>>>> int (*test_ib)(struct amdgpu_ring *ring, long timeout);
>>>> /* insert NOP packets */
>>>> void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
>>>> void (*insert_start)(struct amdgpu_ring *ring);
>>>> void (*insert_end)(struct amdgpu_ring *ring);
>>>> /* pad the indirect buffer to the necessary number of dw */
>>>> void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>>>> unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
>>>> @@ -226,21 +222,20 @@ struct amdgpu_ring {
>>>> #define amdgpu_ring_patch_cs_in_place(r, p, ib)
>>>> ((r)->funcs->patch_cs_in_place((p), (ib)))
>>>> #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
>>>> #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
>>>> #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>>>> #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>>>> #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
>>>> #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r),
>>>> (ib), (vmid), (c))
>>>> #define amdgpu_ring_emit_pipeline_sync(r)
>>>> (r)->funcs->emit_pipeline_sync((r))
>>>> #define amdgpu_ring_emit_vm_flush(r, vmid, addr)
>>>> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>>>> #define amdgpu_ring_emit_fence(r, addr, seq, flags)
>>>> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
>>>> -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as)
>>>> (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab),
>>>> (as))
>>>> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
>>>> #define amdgpu_ring_emit_switch_buffer(r)
>>>> (r)->funcs->emit_switch_buffer((r))
>>>> #define amdgpu_ring_emit_cntxcntl(r, d)
>>>> (r)->funcs->emit_cntxcntl((r),
>>>> (d))
>>>> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
>>>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r),
>>>> (d),
>>>> (v))
>>>> #define amdgpu_ring_emit_reg_wait(r, d, v, m)
>>>> (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>>> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m)
>>>> (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>>>> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>>>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 8a158ee922f7..2cc62b0e7ea8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct
>>>> ttm_bo_device
>>>> *bdev, uint32_t type,
>>>> break;
>>>> case TTM_PL_VRAM:
>>>> /* "On-card" video ram */
>>>> man->func = &amdgpu_vram_mgr_func;
>>>> man->gpu_offset = adev->gmc.vram_start;
>>>> man->flags = TTM_MEMTYPE_FLAG_FIXED |
>>>> TTM_MEMTYPE_FLAG_MAPPABLE;
>>>> man->available_caching = TTM_PL_FLAG_UNCACHED |
>>>> TTM_PL_FLAG_WC;
>>>> man->default_caching = TTM_PL_FLAG_WC;
>>>> break;
>>>> - case AMDGPU_PL_GDS:
>>>> - case AMDGPU_PL_GWS:
>>>> - case AMDGPU_PL_OA:
>>>> - /* On-chip GDS memory*/
>>>> - man->func = &ttm_bo_manager_func;
>>>> - man->gpu_offset = 0;
>>>> - man->flags = TTM_MEMTYPE_FLAG_FIXED |
>>>> TTM_MEMTYPE_FLAG_CMA;
>>>> - man->available_caching = TTM_PL_FLAG_UNCACHED;
>>>> - man->default_caching = TTM_PL_FLAG_UNCACHED;
>>>> - break;
>>>> default:
>>>> DRM_ERROR("Unsupported memory type %u\n",
>>>> (unsigned)type);
>>>> return -EINVAL;
>>>> }
>>>> return 0;
>>>> }
>>>> /**
>>>> * amdgpu_evict_flags - Compute placement flags
>>>> *
>>>> @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt
>>>> *ttm,
>>>> if (r) {
>>>> DRM_ERROR("failed to pin userptr\n");
>>>> return r;
>>>> }
>>>> }
>>>> if (!ttm->num_pages) {
>>>> WARN(1, "nothing to bind %lu pages for mreg %p back
>>>> %p!\n",
>>>> ttm->num_pages, bo_mem, ttm);
>>>> }
>>>> - if (bo_mem->mem_type == AMDGPU_PL_GDS ||
>>>> - bo_mem->mem_type == AMDGPU_PL_GWS ||
>>>> - bo_mem->mem_type == AMDGPU_PL_OA)
>>>> - return -EINVAL;
>>>> -
>>>> if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
>>>> gtt->offset = AMDGPU_BO_INVALID_OFFSET;
>>>> return 0;
>>>> }
>>>> /* compute PTE flags relevant to this BO memory */
>>>> flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
>>>> /* bind pages into GART page tables */
>>>> gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) -
>>>> adev->gmc.gart_start;
>>>> @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>>> /* Initialize GTT memory pool */
>>>> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >>
>>>> PAGE_SHIFT);
>>>> if (r) {
>>>> DRM_ERROR("Failed initializing GTT heap.\n");
>>>> return r;
>>>> }
>>>> DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
>>>> (unsigned)(gtt_size / (1024 * 1024)));
>>>> - /* Initialize various on-chip memory pools */
>>>> - adev->gds.mem.total_size = adev->gds.mem.total_size <<
>>>> AMDGPU_GDS_SHIFT;
>>>> - adev->gds.mem.gfx_partition_size =
>>>> adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
>>>> - adev->gds.mem.cs_partition_size =
>>>> adev->gds.mem.cs_partition_size
>>>> << AMDGPU_GDS_SHIFT;
>>>> - adev->gds.gws.total_size = adev->gds.gws.total_size <<
>>>> AMDGPU_GWS_SHIFT;
>>>> - adev->gds.gws.gfx_partition_size =
>>>> adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
>>>> - adev->gds.gws.cs_partition_size =
>>>> adev->gds.gws.cs_partition_size
>>>> << AMDGPU_GWS_SHIFT;
>>>> - adev->gds.oa.total_size = adev->gds.oa.total_size <<
>>>> AMDGPU_OA_SHIFT;
>>>> - adev->gds.oa.gfx_partition_size =
>>>> adev->gds.oa.gfx_partition_size
>>>> << AMDGPU_OA_SHIFT;
>>>> - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size
>>>> <<
>>>> AMDGPU_OA_SHIFT;
>>>> - /* GDS Memory */
>>>> - if (adev->gds.mem.total_size) {
>>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
>>>> - adev->gds.mem.total_size >>
>>>> PAGE_SHIFT);
>>>> - if (r) {
>>>> - DRM_ERROR("Failed initializing GDS heap.\n");
>>>> - return r;
>>>> - }
>>>> - }
>>>> -
>>>> - /* GWS */
>>>> - if (adev->gds.gws.total_size) {
>>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
>>>> - adev->gds.gws.total_size >>
>>>> PAGE_SHIFT);
>>>> - if (r) {
>>>> - DRM_ERROR("Failed initializing gws heap.\n");
>>>> - return r;
>>>> - }
>>>> - }
>>>> -
>>>> - /* OA */
>>>> - if (adev->gds.oa.total_size) {
>>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
>>>> - adev->gds.oa.total_size >>
>>>> PAGE_SHIFT);
>>>> - if (r) {
>>>> - DRM_ERROR("Failed initializing oa heap.\n");
>>>> - return r;
>>>> - }
>>>> - }
>>>> -
>>>> /* Register debugfs entries for amdgpu_ttm */
>>>> r = amdgpu_ttm_debugfs_init(adev);
>>>> if (r) {
>>>> DRM_ERROR("Failed to init debugfs\n");
>>>> return r;
>>>> }
>>>> return 0;
>>>> }
>>>> /**
>>>> @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>>>> return;
>>>> amdgpu_ttm_debugfs_fini(adev);
>>>> amdgpu_ttm_fw_reserve_vram_fini(adev);
>>>> if (adev->mman.aper_base_kaddr)
>>>> iounmap(adev->mman.aper_base_kaddr);
>>>> adev->mman.aper_base_kaddr = NULL;
>>>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
>>>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
>>>> - if (adev->gds.mem.total_size)
>>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
>>>> - if (adev->gds.gws.total_size)
>>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
>>>> - if (adev->gds.oa.total_size)
>>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
>>>> ttm_bo_device_release(&adev->mman.bdev);
>>>> amdgpu_ttm_global_fini(adev);
>>>> adev->mman.initialized = false;
>>>> DRM_INFO("amdgpu: ttm finalized\n");
>>>> }
>>>> /**
>>>> * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer
>>>> functions
>>>> *
>>>> * @adev: amdgpu_device pointer
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> index fe8f276e9811..04557a382b19 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> @@ -20,28 +20,20 @@
>>>> * OTHER DEALINGS IN THE SOFTWARE.
>>>> *
>>>> */
>>>> #ifndef __AMDGPU_TTM_H__
>>>> #define __AMDGPU_TTM_H__
>>>> #include "amdgpu.h"
>>>> #include <drm/gpu_scheduler.h>
>>>> -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
>>>> -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
>>>> -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
>>>> -
>>>> -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0)
>>>> -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1)
>>>> -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2)
>>>> -
>>>> #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
>>>> #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
>>>> struct amdgpu_mman {
>>>> struct ttm_bo_global_ref bo_global_ref;
>>>> struct drm_global_reference mem_global_ref;
>>>> struct ttm_bo_device bdev;
>>>> bool mem_global_referenced;
>>>> bool initialized;
>>>> void __iomem *aper_base_kaddr;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index be1659fedf94..c66f1c6f0ba8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct
>>>> amdgpu_device *adev)
>>>> * Returns:
>>>> * True if sync is needed.
>>>> */
>>>> bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>>> struct amdgpu_job *job)
>>>> {
>>>> struct amdgpu_device *adev = ring->adev;
>>>> unsigned vmhub = ring->funcs->vmhub;
>>>> struct amdgpu_vmid_mgr *id_mgr =
>>>> &adev->vm_manager.id_mgr[vmhub];
>>>> struct amdgpu_vmid *id;
>>>> - bool gds_switch_needed;
>>>> bool vm_flush_needed = job->vm_needs_flush ||
>>>> ring->has_compute_vm_bug;
>>>> if (job->vmid == 0)
>>>> return false;
>>>> id = &id_mgr->ids[job->vmid];
>>>> - gds_switch_needed = ring->funcs->emit_gds_switch && (
>>>> - id->gds_base != job->gds_base ||
>>>> - id->gds_size != job->gds_size ||
>>>> - id->gws_base != job->gws_base ||
>>>> - id->gws_size != job->gws_size ||
>>>> - id->oa_base != job->oa_base ||
>>>> - id->oa_size != job->oa_size);
>>>> if (amdgpu_vmid_had_gpu_reset(adev, id))
>>>> return true;
>>>> - return vm_flush_needed || gds_switch_needed;
>>>> + return vm_flush_needed;
>>>> }
>>>> /**
>>>> * amdgpu_vm_flush - hardware flush the vm
>>>> *
>>>> * @ring: ring to use for flush
>>>> * @job: related job
>>>> * @need_pipe_sync: is pipe sync needed
>>>> *
>>>> * Emit a VM flush when it is necessary.
>>>> *
>>>> * Returns:
>>>> * 0 on success, errno otherwise.
>>>> */
>>>> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>> bool need_pipe_sync)
>>>> {
>>>> struct amdgpu_device *adev = ring->adev;
>>>> unsigned vmhub = ring->funcs->vmhub;
>>>> struct amdgpu_vmid_mgr *id_mgr =
>>>> &adev->vm_manager.id_mgr[vmhub];
>>>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>>>> - bool gds_switch_needed = ring->funcs->emit_gds_switch && (
>>>> - id->gds_base != job->gds_base ||
>>>> - id->gds_size != job->gds_size ||
>>>> - id->gws_base != job->gws_base ||
>>>> - id->gws_size != job->gws_size ||
>>>> - id->oa_base != job->oa_base ||
>>>> - id->oa_size != job->oa_size);
>>>> bool vm_flush_needed = job->vm_needs_flush;
>>>> bool pasid_mapping_needed = id->pasid != job->pasid ||
>>>> !id->pasid_mapping ||
>>>> !dma_fence_is_signaled(id->pasid_mapping);
>>>> struct dma_fence *fence = NULL;
>>>> unsigned patch_offset = 0;
>>>> int r;
>>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>>>> - gds_switch_needed = true;
>>>> vm_flush_needed = true;
>>>> pasid_mapping_needed = true;
>>>> }
>>>> - gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>>> vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>>>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>>> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping
>>>> &&
>>>> ring->funcs->emit_wreg;
>>>> - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
>>>> + if (!vm_flush_needed && !need_pipe_sync)
>>>> return 0;
>>>> if (ring->funcs->init_cond_exec)
>>>> patch_offset = amdgpu_ring_init_cond_exec(ring);
>>>> if (need_pipe_sync)
>>>> amdgpu_ring_emit_pipeline_sync(ring);
>>>> if (vm_flush_needed) {
>>>> trace_amdgpu_vm_flush(ring, job->vmid,
>>>> job->vm_pd_addr);
>>>> @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
>>>> struct
>>>> amdgpu_job *job, bool need_
>>>> mutex_unlock(&id_mgr->lock);
>>>> }
>>>> if (pasid_mapping_needed) {
>>>> id->pasid = job->pasid;
>>>> dma_fence_put(id->pasid_mapping);
>>>> id->pasid_mapping = dma_fence_get(fence);
>>>> }
>>>> dma_fence_put(fence);
>>>> - if (ring->funcs->emit_gds_switch && gds_switch_needed) {
>>>> - id->gds_base = job->gds_base;
>>>> - id->gds_size = job->gds_size;
>>>> - id->gws_base = job->gws_base;
>>>> - id->gws_size = job->gws_size;
>>>> - id->oa_base = job->oa_base;
>>>> - id->oa_size = job->oa_size;
>>>> - amdgpu_ring_emit_gds_switch(ring, job->vmid,
>>>> job->gds_base,
>>>> - job->gds_size,
>>>> job->gws_base,
>>>> - job->gws_size, job->oa_base,
>>>> - job->oa_size);
>>>> - }
>>>> -
>>>> if (ring->funcs->patch_cond_exec)
>>>> amdgpu_ring_patch_cond_exec(ring, patch_offset);
>>>> /* the double SWITCH_BUFFER here *cannot* be skipped by
>>>> COND_EXEC
>>>> */
>>>> if (ring->funcs->emit_switch_buffer) {
>>>> amdgpu_ring_emit_switch_buffer(ring);
>>>> amdgpu_ring_emit_switch_buffer(ring);
>>>> }
>>>> return 0;
>>>> }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>>> index a15d9c0f233b..f5228e169c3a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>>> @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct
>>>> amdgpu_device *adev)
>>>> *
>>>> * @adev: amdgpu_device pointer
>>>> *
>>>> * Configures the 3D engine and tiling configuration
>>>> * registers so that the 3D engine is usable.
>>>> */
>>>> static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
>>>> {
>>>> u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
>>>> u32 tmp;
>>>> - int i;
>>>> + int i, vmid;
>>>> WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
>>>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>>>> gfx_v7_0_tiling_mode_table_init(adev);
>>>> gfx_v7_0_setup_rb(adev);
>>>> @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct
>>>> amdgpu_device
>>>> *adev)
>>>> tmp = RREG32(mmSPI_ARB_PRIORITY);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
>>>> WREG32(mmSPI_ARB_PRIORITY, tmp);
>>>> mutex_unlock(&adev->grbm_idx_mutex);
>>>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
>>>> + unsigned gds_size, gws_size, oa_size;
>>>> +
>>>> + if (vmid <
>>>> adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids)
>>>> {
>>>> + gds_size = adev->gds.mem.gfx_size_per_vmid;
>>>> + gws_size = adev->gds.gws.gfx_size_per_vmid;
>>>> + oa_size = adev->gds.oa.gfx_size_per_vmid;
>>>> + } else {
>>>> + gds_size = adev->gds.mem.kfd_size_per_vmid;
>>>> + gws_size = adev->gds.gws.kfd_size_per_vmid;
>>>> + oa_size = adev->gds.oa.kfd_size_per_vmid;
>>>> + }
>>>> +
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid *
>>>> gds_size);
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
>>>> + (vmid * gws_size) |
>>>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
>>>> + ((1 << oa_size) - 1) << (vmid * oa_size));
>>>> + }
>>>> +
>>>> udelay(50);
>>>> }
>>>> /*
>>>> * GPU scratch registers helpers function.
>>>> */
>>>> /**
>>>> * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
>>>> *
>>>> * @adev: amdgpu_device pointer
>>>> @@ -4157,68 +4179,20 @@ static uint64_t
>>>> gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>>>> uint64_t clock;
>>>> mutex_lock(&adev->gfx.gpu_clock_mutex);
>>>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>>>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
>>>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>>>> mutex_unlock(&adev->gfx.gpu_clock_mutex);
>>>> return clock;
>>>> }
>>>> -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>>>> - uint32_t vmid,
>>>> - uint32_t gds_base, uint32_t
>>>> gds_size,
>>>> - uint32_t gws_base, uint32_t
>>>> gws_size,
>>>> - uint32_t oa_base, uint32_t
>>>> oa_size)
>>>> -{
>>>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
>>>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
>>>> -
>>>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
>>>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
>>>> -
>>>> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
>>>> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
>>>> -
>>>> - /* GDS Base */
>>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>>> - WRITE_DATA_DST_SEL(0)));
>>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
>>>> - amdgpu_ring_write(ring, 0);
>>>> - amdgpu_ring_write(ring, gds_base);
>>>> -
>>>> - /* GDS Size */
>>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>>> - WRITE_DATA_DST_SEL(0)));
>>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
>>>> - amdgpu_ring_write(ring, 0);
>>>> - amdgpu_ring_write(ring, gds_size);
>>>> -
>>>> - /* GWS */
>>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>>> - WRITE_DATA_DST_SEL(0)));
>>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
>>>> - amdgpu_ring_write(ring, 0);
>>>> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT |
>>>> gws_base);
>>>> -
>>>> - /* OA */
>>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>>> - WRITE_DATA_DST_SEL(0)));
>>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
>>>> - amdgpu_ring_write(ring, 0);
>>>> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 <<
>>>> oa_base));
>>>> -}
>>>> -
>>>> static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring,
>>>> unsigned vmid)
>>>> {
>>>> struct amdgpu_device *adev = ring->adev;
>>>> uint32_t value = 0;
>>>> value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
>>>> value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
>>>> value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
>>>> value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
>>>> WREG32(mmSQ_CMD, value);
>>>> @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle)
>>>>
>>>> ring_id,
>>>> i, k,
>>>> j);
>>>> if (r)
>>>> return r;
>>>> ring_id++;
>>>> }
>>>> }
>>>> }
>>>> - /* reserve GDS, GWS and OA resource for gfx */
>>>> - r = amdgpu_bo_create_kernel(adev,
>>>> adev->gds.mem.gfx_partition_size,
>>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
>>>> - &adev->gds.gds_gfx_bo, NULL, NULL);
>>>> - if (r)
>>>> - return r;
>>>> -
>>>> - r = amdgpu_bo_create_kernel(adev,
>>>> adev->gds.gws.gfx_partition_size,
>>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
>>>> - &adev->gds.gws_gfx_bo, NULL, NULL);
>>>> - if (r)
>>>> - return r;
>>>> -
>>>> - r = amdgpu_bo_create_kernel(adev,
>>>> adev->gds.oa.gfx_partition_size,
>>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
>>>> - &adev->gds.oa_gfx_bo, NULL, NULL);
>>>> - if (r)
>>>> - return r;
>>>> -
>>>> adev->gfx.ce_ram_size = 0x8000;
>>>> gfx_v7_0_gpu_early_init(adev);
>>>> return r;
>>>> }
>>>> static int gfx_v7_0_sw_fini(void *handle)
>>>> {
>>>> int i;
>>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>>>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>>>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>>>> -
>>>> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>>>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>>>> for (i = 0; i < adev->gfx.num_compute_rings; i++)
>>>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>>>> gfx_v7_0_cp_compute_fini(adev);
>>>> gfx_v7_0_rlc_fini(adev);
>>>> gfx_v7_0_mec_fini(adev);
>>>> amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
>>>> &adev->gfx.rlc.clear_state_gpu_addr,
>>>> @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs
>>>> gfx_v7_0_ip_funcs
>>>> = {
>>>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
>>>> .type = AMDGPU_RING_TYPE_GFX,
>>>> .align_mask = 0xff,
>>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>> .support_64bit_ptrs = false,
>>>> .get_rptr = gfx_v7_0_ring_get_rptr,
>>>> .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
>>>> .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
>>>> .emit_frame_size =
>>>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
>>>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>>>> 5 + /* hdp invalidate */
>>>> 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for
>>>> user
>>>> fence, vm fence */
>>>> 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
>>>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /*
>>>> gfx_v7_0_ring_emit_vm_flush */
>>>> 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt
>>>> flush*/
>>>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
>>>> .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
>>>> .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
>>>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>>>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>>>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>>>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>>>> .test_ring = gfx_v7_0_ring_test_ring,
>>>> .test_ib = gfx_v7_0_ring_test_ib,
>>>> .insert_nop = amdgpu_ring_insert_nop,
>>>> .pad_ib = amdgpu_ring_generic_pad_ib,
>>>> .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
>>>> .emit_wreg = gfx_v7_0_ring_emit_wreg,
>>>> .soft_recovery = gfx_v7_0_ring_soft_recovery,
>>>> };
>>>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute =
>>>> {
>>>> .type = AMDGPU_RING_TYPE_COMPUTE,
>>>> .align_mask = 0xff,
>>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>> .support_64bit_ptrs = false,
>>>> .get_rptr = gfx_v7_0_ring_get_rptr,
>>>> .get_wptr = gfx_v7_0_ring_get_wptr_compute,
>>>> .set_wptr = gfx_v7_0_ring_set_wptr_compute,
>>>> .emit_frame_size =
>>>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
>>>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>>>> 5 + /* hdp invalidate */
>>>> 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
>>>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
>>>> gfx_v7_0_ring_emit_vm_flush */
>>>> 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for
>>>> user
>>>> fence, vm fence */
>>>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
>>>> .emit_ib = gfx_v7_0_ring_emit_ib_compute,
>>>> .emit_fence = gfx_v7_0_ring_emit_fence_compute,
>>>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>>>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>>>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>>>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>>>> .test_ring = gfx_v7_0_ring_test_ring,
>>>> .test_ib = gfx_v7_0_ring_test_ib,
>>>> .insert_nop = amdgpu_ring_insert_nop,
>>>> .pad_ib = amdgpu_ring_generic_pad_ib,
>>>> .emit_wreg = gfx_v7_0_ring_emit_wreg,
>>>> };
>>>> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
>>>> {
>>>> @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct
>>>> amdgpu_device *adev)
>>>> adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
>>>> }
>>>> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
>>>> {
>>>> /* init asci gds info */
>>>> adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>>>> adev->gds.gws.total_size = 64;
>>>> adev->gds.oa.total_size = 16;
>>>> - if (adev->gds.mem.total_size == 64 * 1024) {
>>>> - adev->gds.mem.gfx_partition_size = 4096;
>>>> - adev->gds.mem.cs_partition_size = 4096;
>>>> -
>>>> - adev->gds.gws.gfx_partition_size = 4;
>>>> - adev->gds.gws.cs_partition_size = 4;
>>>> -
>>>> - adev->gds.oa.gfx_partition_size = 4;
>>>> - adev->gds.oa.cs_partition_size = 1;
>>>> - } else {
>>>> - adev->gds.mem.gfx_partition_size = 1024;
>>>> - adev->gds.mem.cs_partition_size = 1024;
>>>> -
>>>> - adev->gds.gws.gfx_partition_size = 16;
>>>> - adev->gds.gws.cs_partition_size = 16;
>>>> -
>>>> - adev->gds.oa.gfx_partition_size = 4;
>>>> - adev->gds.oa.cs_partition_size = 4;
>>>> - }
>>>> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size /
>>>> AMDGPU_NUM_VMID;
>>>> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size /
>>>> AMDGPU_NUM_VMID;
>>>> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size /
>>>> AMDGPU_NUM_VMID;
>>>> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size /
>>>> AMDGPU_NUM_VMID;
>>>> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /*
>>>> gfx only */
>>>> + adev->gds.oa.kfd_size_per_vmid = 0;
>>>> }
>>>> -
>>>> static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
>>>> {
>>>> int i, j, k, counter, active_cu_number = 0;
>>>> u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
>>>> struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
>>>> unsigned disable_masks[4 * 2];
>>>> u32 ao_cu_num;
>>>> if (adev->flags & AMD_IS_APU)
>>>> ao_cu_num = 2;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>>> index 3882689b2d8f..b11a54bd0668 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>>> @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle)
>>>> kiq = &adev->gfx.kiq;
>>>> r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>>>> if (r)
>>>> return r;
>>>> /* create MQD for all compute queues as well as KIQ for SRIOV
>>>> case
>>>> */
>>>> r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct
>>>> vi_mqd_allocation));
>>>> if (r)
>>>> return r;
>>>> - /* reserve GDS, GWS and OA resource for gfx */
>>>> - r = amdgpu_bo_create_kernel(adev,
>>>> adev->gds.mem.gfx_partition_size,
>>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
>>>> - &adev->gds.gds_gfx_bo, NULL, NULL);
>>>> - if (r)
>>>> - return r;
>>>> -
>>>> - r = amdgpu_bo_create_kernel(adev,
>>>> adev->gds.gws.gfx_partition_size,
>>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
>>>> - &adev->gds.gws_gfx_bo, NULL, NULL);
>>>> - if (r)
>>>> - return r;
>>>> -
>>>> - r = amdgpu_bo_create_kernel(adev,
>>>> adev->gds.oa.gfx_partition_size,
>>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
>>>> - &adev->gds.oa_gfx_bo, NULL, NULL);
>>>> - if (r)
>>>> - return r;
>>>> -
>>>> adev->gfx.ce_ram_size = 0x8000;
>>>> r = gfx_v8_0_gpu_early_init(adev);
>>>> if (r)
>>>> return r;
>>>> return 0;
>>>> }
>>>> static int gfx_v8_0_sw_fini(void *handle)
>>>> {
>>>> int i;
>>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>>>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>>>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>>>> -
>>>> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>>>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>>>> for (i = 0; i < adev->gfx.num_compute_rings; i++)
>>>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>>>> amdgpu_gfx_compute_mqd_sw_fini(adev);
>>>> amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring,
>>>> &adev->gfx.kiq.irq);
>>>> amdgpu_gfx_kiq_fini(adev);
>>>> gfx_v8_0_mec_fini(adev);
>>>> @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct
>>>> amdgpu_device *adev)
>>>> case CHIP_CARRIZO:
>>>> case CHIP_STONEY:
>>>> adev->gfx.config.double_offchip_lds_buf = 0;
>>>> break;
>>>> }
>>>> }
>>>> static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
>>>> {
>>>> u32 tmp, sh_static_mem_cfg;
>>>> - int i;
>>>> + int i, vmid;
>>>> WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
>>>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>>>> gfx_v8_0_tiling_mode_table_init(adev);
>>>> gfx_v8_0_setup_rb(adev);
>>>> gfx_v8_0_get_cu_info(adev);
>>>> gfx_v8_0_config_init(adev);
>>>> @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct
>>>> amdgpu_device
>>>> *adev)
>>>> tmp = RREG32(mmSPI_ARB_PRIORITY);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
>>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
>>>> WREG32(mmSPI_ARB_PRIORITY, tmp);
>>>> mutex_unlock(&adev->grbm_idx_mutex);
>>>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
>>>> + unsigned gds_size, gws_size, oa_size;
>>>> +
>>>> + if (vmid <
>>>> adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids)
>>>> {
>>>> + gds_size = adev->gds.mem.gfx_size_per_vmid;
>>>> + gws_size = adev->gds.gws.gfx_size_per_vmid;
>>>> + oa_size = adev->gds.oa.gfx_size_per_vmid;
>>>> + } else {
>>>> + gds_size = adev->gds.mem.kfd_size_per_vmid;
>>>> + gws_size = adev->gds.gws.kfd_size_per_vmid;
>>>> + oa_size = adev->gds.oa.kfd_size_per_vmid;
>>>> + }
>>>> +
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid *
>>>> gds_size);
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
>>>> + (vmid * gws_size) |
>>>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
>>>> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
>>>> + ((1 << oa_size) - 1) << (vmid * oa_size));
>>>> + }
>>>> }
>>>> static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device
>>>> *adev)
>>>> {
>>>> u32 i, j, k;
>>>> u32 mask;
>>>> mutex_lock(&adev->grbm_idx_mutex);
>>>> for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>>>> for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
>>>> @@ -5383,68 +5381,20 @@ static uint64_t
>>>> gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>>>> uint64_t clock;
>>>> mutex_lock(&adev->gfx.gpu_clock_mutex);
>>>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>>>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
>>>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>>>> mutex_unlock(&adev->gfx.gpu_clock_mutex);
>>>> return clock;
>>>> }
>>>> -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>>>> - uint32_t vmid,
>>>> - uint32_t gds_base, uint32_t
>>>> gds_size,
>>>> - uint32_t gws_base, uint32_t
>>>> gws_size,
>>>> - uint32_t oa_base, uint32_t
>>>> oa_size)
>>>> -{
>>>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
>>>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
>>>> -
>>>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
>>>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
>>>> -
>>>> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
>>>> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
>>>> -
>>>> - /* GDS Base */
>>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
More information about the amd-gfx
mailing list