[PATCH] drm/amdgpu: reserve GDS resources statically
Christian König
christian.koenig at amd.com
Thu Sep 13 17:26:01 UTC 2018
Are you sure of that? I mean it is rather pointless to have a Global
Data Share when it can't be used to share anything?
On the other hand I'm not opposed to get rid of all that stuff if we
really don't need it.
Christian.
Am 13.09.2018 um 17:27 schrieb Marek Olšák:
> That's OK. We don't need IBs to get the same VMID.
>
> Marek
>
> On Thu, Sep 13, 2018 at 4:40 AM, Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> As discussed internally that doesn't work because threads don't necessary
>> get the same VMID assigned.
>>
>> Christian.
>>
>> Am 12.09.2018 um 22:33 schrieb Marek Olšák:
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> I've chosen to do it like this because it's easy and allows an arbitrary
>>> number of processes.
>>>
>>> Signed-off-by: Marek Olšák <marek.olsak at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 --
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 -
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ----
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 -
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 --
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 -
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ----
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 -
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 -
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ----------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 --
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +-----
>>> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125 +++++---------------
>>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++--------------
>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++-------------
>>> include/uapi/drm/amdgpu_drm.h | 15 +--
>>> 19 files changed, 109 insertions(+), 519 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> index b80243d3972e..7264a4930b88 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> struct drm_file *filp,
>>> / sizeof(struct amdgpu_bo_list_entry))
>>> return -EINVAL;
>>> size = sizeof(struct amdgpu_bo_list);
>>> size += num_entries * sizeof(struct amdgpu_bo_list_entry);
>>> list = kvmalloc(size, GFP_KERNEL);
>>> if (!list)
>>> return -ENOMEM;
>>> kref_init(&list->refcount);
>>> - list->gds_obj = adev->gds.gds_gfx_bo;
>>> - list->gws_obj = adev->gds.gws_gfx_bo;
>>> - list->oa_obj = adev->gds.oa_gfx_bo;
>>> array = amdgpu_bo_list_array_entry(list, 0);
>>> memset(array, 0, num_entries * sizeof(struct
>>> amdgpu_bo_list_entry));
>>> for (i = 0; i < num_entries; ++i) {
>>> struct amdgpu_bo_list_entry *entry;
>>> struct drm_gem_object *gobj;
>>> struct amdgpu_bo *bo;
>>> struct mm_struct *usermm;
>>> @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device
>>> *adev, struct drm_file *filp,
>>> } else {
>>> entry = &array[last_entry++];
>>> }
>>> entry->robj = bo;
>>> entry->priority = min(info[i].bo_priority,
>>> AMDGPU_BO_LIST_MAX_PRIORITY);
>>> entry->tv.bo = &entry->robj->tbo;
>>> entry->tv.shared = !entry->robj->prime_shared_count;
>>> - if (entry->robj->preferred_domains ==
>>> AMDGPU_GEM_DOMAIN_GDS)
>>> - list->gds_obj = entry->robj;
>>> - if (entry->robj->preferred_domains ==
>>> AMDGPU_GEM_DOMAIN_GWS)
>>> - list->gws_obj = entry->robj;
>>> - if (entry->robj->preferred_domains ==
>>> AMDGPU_GEM_DOMAIN_OA)
>>> - list->oa_obj = entry->robj;
>>> -
>>> total_size += amdgpu_bo_size(entry->robj);
>>> trace_amdgpu_bo_list_set(list, entry->robj);
>>> }
>>> list->first_userptr = first_userptr;
>>> list->num_entries = num_entries;
>>> trace_amdgpu_cs_bo_status(list->num_entries, total_size);
>>> *result = list;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>> index 61b089768e1c..30f12a60aa28 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>>> @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry {
>>> struct ttm_validate_buffer tv;
>>> struct amdgpu_bo_va *bo_va;
>>> uint32_t priority;
>>> struct page **user_pages;
>>> int user_invalidated;
>>> };
>>> struct amdgpu_bo_list {
>>> struct rcu_head rhead;
>>> struct kref refcount;
>>> - struct amdgpu_bo *gds_obj;
>>> - struct amdgpu_bo *gws_obj;
>>> - struct amdgpu_bo *oa_obj;
>>> unsigned first_userptr;
>>> unsigned num_entries;
>>> };
>>> int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
>>> struct amdgpu_bo_list **result);
>>> void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>>> struct list_head *validated);
>>> void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>> int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 1081fd00b059..88b58facf29e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct
>>> amdgpu_cs_parser *p,
>>> return 0;
>>> }
>>> static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>> union drm_amdgpu_cs *cs)
>>> {
>>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>> struct amdgpu_vm *vm = &fpriv->vm;
>>> struct amdgpu_bo_list_entry *e;
>>> struct list_head duplicates;
>>> - struct amdgpu_bo *gds;
>>> - struct amdgpu_bo *gws;
>>> - struct amdgpu_bo *oa;
>>> unsigned tries = 10;
>>> int r;
>>> INIT_LIST_HEAD(&p->validated);
>>> /* p->bo_list could already be assigned if
>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>>> if (cs->in.bo_list_handle) {
>>> if (p->bo_list)
>>> return -EINVAL;
>>> @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct
>>> amdgpu_cs_parser *p,
>>> r = amdgpu_cs_list_validate(p, &p->validated);
>>> if (r) {
>>> DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
>>> goto error_validate;
>>> }
>>> amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>>> p->bytes_moved_vis);
>>> - gds = p->bo_list->gds_obj;
>>> - gws = p->bo_list->gws_obj;
>>> - oa = p->bo_list->oa_obj;
>>> -
>>> amdgpu_bo_list_for_each_entry(e, p->bo_list)
>>> e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
>>> - if (gds) {
>>> - p->job->gds_base = amdgpu_bo_gpu_offset(gds);
>>> - p->job->gds_size = amdgpu_bo_size(gds);
>>> - }
>>> - if (gws) {
>>> - p->job->gws_base = amdgpu_bo_gpu_offset(gws);
>>> - p->job->gws_size = amdgpu_bo_size(gws);
>>> - }
>>> - if (oa) {
>>> - p->job->oa_base = amdgpu_bo_gpu_offset(oa);
>>> - p->job->oa_size = amdgpu_bo_size(oa);
>>> - }
>>> -
>>> if (!r && p->uf_entry.robj) {
>>> struct amdgpu_bo *uf = p->uf_entry.robj;
>>> r = amdgpu_ttm_alloc_gart(&uf->tbo);
>>> p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>> }
>>> error_validate:
>>> if (r)
>>> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>> index e73728d90388..69ba25c2e921 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>>> @@ -17,48 +17,33 @@
>>> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES
>>> OR
>>> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>>> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>>> * OTHER DEALINGS IN THE SOFTWARE.
>>> *
>>> */
>>> #ifndef __AMDGPU_GDS_H__
>>> #define __AMDGPU_GDS_H__
>>> -/* Because TTM request that alloacted buffer should be PAGE_SIZE
>>> aligned,
>>> - * we should report GDS/GWS/OA size as PAGE_SIZE aligned
>>> - * */
>>> -#define AMDGPU_GDS_SHIFT 2
>>> -#define AMDGPU_GWS_SHIFT PAGE_SHIFT
>>> -#define AMDGPU_OA_SHIFT PAGE_SHIFT
>>> -
>>> struct amdgpu_ring;
>>> struct amdgpu_bo;
>>> struct amdgpu_gds_asic_info {
>>> uint32_t total_size;
>>> - uint32_t gfx_partition_size;
>>> - uint32_t cs_partition_size;
>>> + uint32_t gfx_size_per_vmid;
>>> + uint32_t kfd_size_per_vmid;
>>> };
>>> struct amdgpu_gds {
>>> struct amdgpu_gds_asic_info mem;
>>> struct amdgpu_gds_asic_info gws;
>>> struct amdgpu_gds_asic_info oa;
>>> - /* At present, GDS, GWS and OA resources for gfx (graphics)
>>> - * is always pre-allocated and available for graphics operation.
>>> - * Such resource is shared between all gfx clients.
>>> - * TODO: move this operation to user space
>>> - * */
>>> - struct amdgpu_bo* gds_gfx_bo;
>>> - struct amdgpu_bo* gws_gfx_bo;
>>> - struct amdgpu_bo* oa_gfx_bo;
>>> };
>>> struct amdgpu_gds_reg_offset {
>>> uint32_t mem_base;
>>> uint32_t mem_size;
>>> uint32_t gws;
>>> uint32_t oa;
>>> };
>>> #endif /* __AMDGPU_GDS_H__ */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d30a0838851b..c87ad4b4d0b6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>> AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>> AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
>>> AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
>>> return -EINVAL;
>>> /* reject invalid gem domains */
>>> - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>>> + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
>>> + AMDGPU_GEM_DOMAIN_GTT |
>>> + AMDGPU_GEM_DOMAIN_VRAM))
>>> return -EINVAL;
>>> - /* create a gem object to contain this object in */
>>> - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
>>> - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
>>> - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
>>> - /* if gds bo is created from user space, it must
>>> be
>>> - * passed to bo list
>>> - */
>>> - DRM_ERROR("GDS bo cannot be per-vm-bo\n");
>>> - return -EINVAL;
>>> - }
>>> - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
>>> - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
>>> - size = size << AMDGPU_GDS_SHIFT;
>>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
>>> - size = size << AMDGPU_GWS_SHIFT;
>>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
>>> - size = size << AMDGPU_OA_SHIFT;
>>> - else
>>> - return -EINVAL;
>>> - }
>>> size = roundup(size, PAGE_SIZE);
>>> if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
>>> r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>> if (r)
>>> return r;
>>> resv = vm->root.base.bo->tbo.resv;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>> index 3a072a7a39f0..c2e6a1a11d7f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>>> @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device
>>> *adev,
>>> * Reset saved GDW, GWS and OA to force switch on next flush.
>>> */
>>> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
>>> unsigned vmid)
>>> {
>>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>> struct amdgpu_vmid *id = &id_mgr->ids[vmid];
>>> mutex_lock(&id_mgr->lock);
>>> id->owner = 0;
>>> - id->gds_base = 0;
>>> - id->gds_size = 0;
>>> - id->gws_base = 0;
>>> - id->gws_size = 0;
>>> - id->oa_base = 0;
>>> - id->oa_size = 0;
>>> mutex_unlock(&id_mgr->lock);
>>> }
>>> /**
>>> * amdgpu_vmid_reset_all - reset VMID to zero
>>> *
>>> * @adev: amdgpu device structure
>>> *
>>> * Reset VMID to force flush on next use
>>> */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>> index 7625419f0fc2..06078e665532 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>> @@ -44,27 +44,20 @@ struct amdgpu_vmid {
>>> struct amdgpu_sync active;
>>> struct dma_fence *last_flush;
>>> uint64_t owner;
>>> uint64_t pd_gpu_addr;
>>> /* last flushed PD/PT update */
>>> struct dma_fence *flushed_updates;
>>> uint32_t current_gpu_reset_count;
>>> - uint32_t gds_base;
>>> - uint32_t gds_size;
>>> - uint32_t gws_base;
>>> - uint32_t gws_size;
>>> - uint32_t oa_base;
>>> - uint32_t oa_size;
>>> -
>>> unsigned pasid;
>>> struct dma_fence *pasid_mapping;
>>> };
>>> struct amdgpu_vmid_mgr {
>>> struct mutex lock;
>>> unsigned num_ids;
>>> struct list_head ids_lru;
>>> struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
>>> atomic_t reserved_vmid_num;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>> index 57cfe78a262b..3db553f6ad01 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>> @@ -42,23 +42,20 @@ struct amdgpu_job {
>>> struct amdgpu_sync sched_sync;
>>> struct amdgpu_ib *ibs;
>>> struct dma_fence *fence; /* the hw fence */
>>> uint32_t preamble_status;
>>> uint32_t num_ibs;
>>> void *owner;
>>> bool vm_needs_flush;
>>> uint64_t vm_pd_addr;
>>> unsigned vmid;
>>> unsigned pasid;
>>> - uint32_t gds_base, gds_size;
>>> - uint32_t gws_base, gws_size;
>>> - uint32_t oa_base, oa_size;
>>> uint32_t vram_lost_counter;
>>> /* user fence handling */
>>> uint64_t uf_addr;
>>> uint64_t uf_sequence;
>>> };
>>> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
>>> struct amdgpu_job **job, struct amdgpu_vm *vm);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index 29ac3873eeb0..209954290954 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file
>>> case AMDGPU_INFO_VIS_VRAM_USAGE:
>>> ui64 =
>>> amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
>>> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT :
>>> 0;
>>> case AMDGPU_INFO_GTT_USAGE:
>>> ui64 =
>>> amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
>>> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT :
>>> 0;
>>> case AMDGPU_INFO_GDS_CONFIG: {
>>> struct drm_amdgpu_info_gds gds_info;
>>> memset(&gds_info, 0, sizeof(gds_info));
>>> - gds_info.gds_gfx_partition_size =
>>> adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
>>> - gds_info.compute_partition_size =
>>> adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
>>> - gds_info.gds_total_size = adev->gds.mem.total_size >>
>>> AMDGPU_GDS_SHIFT;
>>> - gds_info.gws_per_gfx_partition =
>>> adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
>>> - gds_info.gws_per_compute_partition =
>>> adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
>>> - gds_info.oa_per_gfx_partition =
>>> adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
>>> - gds_info.oa_per_compute_partition =
>>> adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
>>> + gds_info.gds_gfx_partition_size =
>>> adev->gds.mem.gfx_size_per_vmid;
>>> + gds_info.compute_partition_size =
>>> adev->gds.mem.kfd_size_per_vmid;
>>> + gds_info.gds_total_size = adev->gds.mem.total_size;
>>> + gds_info.gws_per_gfx_partition =
>>> adev->gds.gws.gfx_size_per_vmid;
>>> + gds_info.gws_per_compute_partition =
>>> adev->gds.gws.kfd_size_per_vmid;
>>> + gds_info.oa_per_gfx_partition =
>>> adev->gds.oa.gfx_size_per_vmid;
>>> + gds_info.oa_per_compute_partition =
>>> adev->gds.oa.kfd_size_per_vmid;
>>> return copy_to_user(out, &gds_info,
>>> min((size_t)size, sizeof(gds_info))) ?
>>> -EFAULT : 0;
>>> }
>>> case AMDGPU_INFO_VRAM_GTT: {
>>> struct drm_amdgpu_info_vram_gtt vram_gtt;
>>> vram_gtt.vram_size = adev->gmc.real_vram_size -
>>> atomic64_read(&adev->vram_pin_size);
>>> vram_gtt.vram_cpu_accessible_size =
>>> adev->gmc.visible_vram_size -
>>> atomic64_read(&adev->visible_pin_size);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index de990bdcdd6c..76770a8c29a5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct
>>> amdgpu_bo *abo, u32 domain)
>>> places[c].lpfn = 0;
>>> places[c].flags = TTM_PL_FLAG_SYSTEM;
>>> if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>>> places[c].flags |= TTM_PL_FLAG_WC |
>>> TTM_PL_FLAG_UNCACHED;
>>> else
>>> places[c].flags |= TTM_PL_FLAG_CACHED;
>>> c++;
>>> }
>>> - if (domain & AMDGPU_GEM_DOMAIN_GDS) {
>>> - places[c].fpfn = 0;
>>> - places[c].lpfn = 0;
>>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>>> AMDGPU_PL_FLAG_GDS;
>>> - c++;
>>> - }
>>> -
>>> - if (domain & AMDGPU_GEM_DOMAIN_GWS) {
>>> - places[c].fpfn = 0;
>>> - places[c].lpfn = 0;
>>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>>> AMDGPU_PL_FLAG_GWS;
>>> - c++;
>>> - }
>>> -
>>> - if (domain & AMDGPU_GEM_DOMAIN_OA) {
>>> - places[c].fpfn = 0;
>>> - places[c].lpfn = 0;
>>> - places[c].flags = TTM_PL_FLAG_UNCACHED |
>>> AMDGPU_PL_FLAG_OA;
>>> - c++;
>>> - }
>>> -
>>> if (!c) {
>>> places[c].fpfn = 0;
>>> places[c].lpfn = 0;
>>> places[c].flags = TTM_PL_MASK_CACHING |
>>> TTM_PL_FLAG_SYSTEM;
>>> c++;
>>> }
>>> BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
>>> placement->num_placement = c;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 907fdf46d895..e089964cbcb7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -120,26 +120,20 @@ static inline struct amdgpu_bo
>>> *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
>>> */
>>> static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
>>> {
>>> switch (mem_type) {
>>> case TTM_PL_VRAM:
>>> return AMDGPU_GEM_DOMAIN_VRAM;
>>> case TTM_PL_TT:
>>> return AMDGPU_GEM_DOMAIN_GTT;
>>> case TTM_PL_SYSTEM:
>>> return AMDGPU_GEM_DOMAIN_CPU;
>>> - case AMDGPU_PL_GDS:
>>> - return AMDGPU_GEM_DOMAIN_GDS;
>>> - case AMDGPU_PL_GWS:
>>> - return AMDGPU_GEM_DOMAIN_GWS;
>>> - case AMDGPU_PL_OA:
>>> - return AMDGPU_GEM_DOMAIN_OA;
>>> default:
>>> break;
>>> }
>>> return 0;
>>> }
>>> /**
>>> * amdgpu_bo_reserve - reserve bo
>>> * @bo: bo structure
>>> * @no_intr: don't return -ERESTARTSYS on pending signal
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index 9cc239968e40..f6ea9604e611 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs {
>>> /* command emit functions */
>>> void (*emit_ib)(struct amdgpu_ring *ring,
>>> struct amdgpu_ib *ib,
>>> unsigned vmid, bool ctx_switch);
>>> void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
>>> uint64_t seq, unsigned flags);
>>> void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
>>> void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
>>> uint64_t pd_addr);
>>> void (*emit_hdp_flush)(struct amdgpu_ring *ring);
>>> - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
>>> - uint32_t gds_base, uint32_t gds_size,
>>> - uint32_t gws_base, uint32_t gws_size,
>>> - uint32_t oa_base, uint32_t oa_size);
>>> /* testing functions */
>>> int (*test_ring)(struct amdgpu_ring *ring);
>>> int (*test_ib)(struct amdgpu_ring *ring, long timeout);
>>> /* insert NOP packets */
>>> void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
>>> void (*insert_start)(struct amdgpu_ring *ring);
>>> void (*insert_end)(struct amdgpu_ring *ring);
>>> /* pad the indirect buffer to the necessary number of dw */
>>> void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>>> unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
>>> @@ -226,21 +222,20 @@ struct amdgpu_ring {
>>> #define amdgpu_ring_patch_cs_in_place(r, p, ib)
>>> ((r)->funcs->patch_cs_in_place((p), (ib)))
>>> #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
>>> #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
>>> #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
>>> #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
>>> #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
>>> #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r),
>>> (ib), (vmid), (c))
>>> #define amdgpu_ring_emit_pipeline_sync(r)
>>> (r)->funcs->emit_pipeline_sync((r))
>>> #define amdgpu_ring_emit_vm_flush(r, vmid, addr)
>>> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>>> #define amdgpu_ring_emit_fence(r, addr, seq, flags)
>>> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
>>> -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as)
>>> (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
>>> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
>>> #define amdgpu_ring_emit_switch_buffer(r)
>>> (r)->funcs->emit_switch_buffer((r))
>>> #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r),
>>> (d))
>>> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
>>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d),
>>> (v))
>>> #define amdgpu_ring_emit_reg_wait(r, d, v, m)
>>> (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m)
>>> (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>>> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 8a158ee922f7..2cc62b0e7ea8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct ttm_bo_device
>>> *bdev, uint32_t type,
>>> break;
>>> case TTM_PL_VRAM:
>>> /* "On-card" video ram */
>>> man->func = &amdgpu_vram_mgr_func;
>>> man->gpu_offset = adev->gmc.vram_start;
>>> man->flags = TTM_MEMTYPE_FLAG_FIXED |
>>> TTM_MEMTYPE_FLAG_MAPPABLE;
>>> man->available_caching = TTM_PL_FLAG_UNCACHED |
>>> TTM_PL_FLAG_WC;
>>> man->default_caching = TTM_PL_FLAG_WC;
>>> break;
>>> - case AMDGPU_PL_GDS:
>>> - case AMDGPU_PL_GWS:
>>> - case AMDGPU_PL_OA:
>>> - /* On-chip GDS memory*/
>>> - man->func = &ttm_bo_manager_func;
>>> - man->gpu_offset = 0;
>>> - man->flags = TTM_MEMTYPE_FLAG_FIXED |
>>> TTM_MEMTYPE_FLAG_CMA;
>>> - man->available_caching = TTM_PL_FLAG_UNCACHED;
>>> - man->default_caching = TTM_PL_FLAG_UNCACHED;
>>> - break;
>>> default:
>>> DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
>>> return -EINVAL;
>>> }
>>> return 0;
>>> }
>>> /**
>>> * amdgpu_evict_flags - Compute placement flags
>>> *
>>> @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt
>>> *ttm,
>>> if (r) {
>>> DRM_ERROR("failed to pin userptr\n");
>>> return r;
>>> }
>>> }
>>> if (!ttm->num_pages) {
>>> WARN(1, "nothing to bind %lu pages for mreg %p back
>>> %p!\n",
>>> ttm->num_pages, bo_mem, ttm);
>>> }
>>> - if (bo_mem->mem_type == AMDGPU_PL_GDS ||
>>> - bo_mem->mem_type == AMDGPU_PL_GWS ||
>>> - bo_mem->mem_type == AMDGPU_PL_OA)
>>> - return -EINVAL;
>>> -
>>> if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
>>> gtt->offset = AMDGPU_BO_INVALID_OFFSET;
>>> return 0;
>>> }
>>> /* compute PTE flags relevant to this BO memory */
>>> flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
>>> /* bind pages into GART page tables */
>>> gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) -
>>> adev->gmc.gart_start;
>>> @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>> /* Initialize GTT memory pool */
>>> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >>
>>> PAGE_SHIFT);
>>> if (r) {
>>> DRM_ERROR("Failed initializing GTT heap.\n");
>>> return r;
>>> }
>>> DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
>>> (unsigned)(gtt_size / (1024 * 1024)));
>>> - /* Initialize various on-chip memory pools */
>>> - adev->gds.mem.total_size = adev->gds.mem.total_size <<
>>> AMDGPU_GDS_SHIFT;
>>> - adev->gds.mem.gfx_partition_size =
>>> adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
>>> - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size
>>> << AMDGPU_GDS_SHIFT;
>>> - adev->gds.gws.total_size = adev->gds.gws.total_size <<
>>> AMDGPU_GWS_SHIFT;
>>> - adev->gds.gws.gfx_partition_size =
>>> adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
>>> - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size
>>> << AMDGPU_GWS_SHIFT;
>>> - adev->gds.oa.total_size = adev->gds.oa.total_size <<
>>> AMDGPU_OA_SHIFT;
>>> - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size
>>> << AMDGPU_OA_SHIFT;
>>> - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size <<
>>> AMDGPU_OA_SHIFT;
>>> - /* GDS Memory */
>>> - if (adev->gds.mem.total_size) {
>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
>>> - adev->gds.mem.total_size >>
>>> PAGE_SHIFT);
>>> - if (r) {
>>> - DRM_ERROR("Failed initializing GDS heap.\n");
>>> - return r;
>>> - }
>>> - }
>>> -
>>> - /* GWS */
>>> - if (adev->gds.gws.total_size) {
>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
>>> - adev->gds.gws.total_size >>
>>> PAGE_SHIFT);
>>> - if (r) {
>>> - DRM_ERROR("Failed initializing gws heap.\n");
>>> - return r;
>>> - }
>>> - }
>>> -
>>> - /* OA */
>>> - if (adev->gds.oa.total_size) {
>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
>>> - adev->gds.oa.total_size >> PAGE_SHIFT);
>>> - if (r) {
>>> - DRM_ERROR("Failed initializing oa heap.\n");
>>> - return r;
>>> - }
>>> - }
>>> -
>>> /* Register debugfs entries for amdgpu_ttm */
>>> r = amdgpu_ttm_debugfs_init(adev);
>>> if (r) {
>>> DRM_ERROR("Failed to init debugfs\n");
>>> return r;
>>> }
>>> return 0;
>>> }
>>> /**
>>> @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>>> return;
>>> amdgpu_ttm_debugfs_fini(adev);
>>> amdgpu_ttm_fw_reserve_vram_fini(adev);
>>> if (adev->mman.aper_base_kaddr)
>>> iounmap(adev->mman.aper_base_kaddr);
>>> adev->mman.aper_base_kaddr = NULL;
>>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
>>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
>>> - if (adev->gds.mem.total_size)
>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
>>> - if (adev->gds.gws.total_size)
>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
>>> - if (adev->gds.oa.total_size)
>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
>>> ttm_bo_device_release(&adev->mman.bdev);
>>> amdgpu_ttm_global_fini(adev);
>>> adev->mman.initialized = false;
>>> DRM_INFO("amdgpu: ttm finalized\n");
>>> }
>>> /**
>>> * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer
>>> functions
>>> *
>>> * @adev: amdgpu_device pointer
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index fe8f276e9811..04557a382b19 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -20,28 +20,20 @@
>>> * OTHER DEALINGS IN THE SOFTWARE.
>>> *
>>> */
>>> #ifndef __AMDGPU_TTM_H__
>>> #define __AMDGPU_TTM_H__
>>> #include "amdgpu.h"
>>> #include <drm/gpu_scheduler.h>
>>> -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
>>> -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
>>> -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
>>> -
>>> -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0)
>>> -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1)
>>> -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2)
>>> -
>>> #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
>>> #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
>>> struct amdgpu_mman {
>>> struct ttm_bo_global_ref bo_global_ref;
>>> struct drm_global_reference mem_global_ref;
>>> struct ttm_bo_device bdev;
>>> bool mem_global_referenced;
>>> bool initialized;
>>> void __iomem *aper_base_kaddr;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index be1659fedf94..c66f1c6f0ba8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct
>>> amdgpu_device *adev)
>>> * Returns:
>>> * True if sync is needed.
>>> */
>>> bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>> struct amdgpu_job *job)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> unsigned vmhub = ring->funcs->vmhub;
>>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>> struct amdgpu_vmid *id;
>>> - bool gds_switch_needed;
>>> bool vm_flush_needed = job->vm_needs_flush ||
>>> ring->has_compute_vm_bug;
>>> if (job->vmid == 0)
>>> return false;
>>> id = &id_mgr->ids[job->vmid];
>>> - gds_switch_needed = ring->funcs->emit_gds_switch && (
>>> - id->gds_base != job->gds_base ||
>>> - id->gds_size != job->gds_size ||
>>> - id->gws_base != job->gws_base ||
>>> - id->gws_size != job->gws_size ||
>>> - id->oa_base != job->oa_base ||
>>> - id->oa_size != job->oa_size);
>>> if (amdgpu_vmid_had_gpu_reset(adev, id))
>>> return true;
>>> - return vm_flush_needed || gds_switch_needed;
>>> + return vm_flush_needed;
>>> }
>>> /**
>>> * amdgpu_vm_flush - hardware flush the vm
>>> *
>>> * @ring: ring to use for flush
>>> * @job: related job
>>> * @need_pipe_sync: is pipe sync needed
>>> *
>>> * Emit a VM flush when it is necessary.
>>> *
>>> * Returns:
>>> * 0 on success, errno otherwise.
>>> */
>>> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>> bool need_pipe_sync)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> unsigned vmhub = ring->funcs->vmhub;
>>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>>> - bool gds_switch_needed = ring->funcs->emit_gds_switch && (
>>> - id->gds_base != job->gds_base ||
>>> - id->gds_size != job->gds_size ||
>>> - id->gws_base != job->gws_base ||
>>> - id->gws_size != job->gws_size ||
>>> - id->oa_base != job->oa_base ||
>>> - id->oa_size != job->oa_size);
>>> bool vm_flush_needed = job->vm_needs_flush;
>>> bool pasid_mapping_needed = id->pasid != job->pasid ||
>>> !id->pasid_mapping ||
>>> !dma_fence_is_signaled(id->pasid_mapping);
>>> struct dma_fence *fence = NULL;
>>> unsigned patch_offset = 0;
>>> int r;
>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>>> - gds_switch_needed = true;
>>> vm_flush_needed = true;
>>> pasid_mapping_needed = true;
>>> }
>>> - gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>> vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>> ring->funcs->emit_wreg;
>>> - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
>>> + if (!vm_flush_needed && !need_pipe_sync)
>>> return 0;
>>> if (ring->funcs->init_cond_exec)
>>> patch_offset = amdgpu_ring_init_cond_exec(ring);
>>> if (need_pipe_sync)
>>> amdgpu_ring_emit_pipeline_sync(ring);
>>> if (vm_flush_needed) {
>>> trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
>>> @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
>>> amdgpu_job *job, bool need_
>>> mutex_unlock(&id_mgr->lock);
>>> }
>>> if (pasid_mapping_needed) {
>>> id->pasid = job->pasid;
>>> dma_fence_put(id->pasid_mapping);
>>> id->pasid_mapping = dma_fence_get(fence);
>>> }
>>> dma_fence_put(fence);
>>> - if (ring->funcs->emit_gds_switch && gds_switch_needed) {
>>> - id->gds_base = job->gds_base;
>>> - id->gds_size = job->gds_size;
>>> - id->gws_base = job->gws_base;
>>> - id->gws_size = job->gws_size;
>>> - id->oa_base = job->oa_base;
>>> - id->oa_size = job->oa_size;
>>> - amdgpu_ring_emit_gds_switch(ring, job->vmid,
>>> job->gds_base,
>>> - job->gds_size, job->gws_base,
>>> - job->gws_size, job->oa_base,
>>> - job->oa_size);
>>> - }
>>> -
>>> if (ring->funcs->patch_cond_exec)
>>> amdgpu_ring_patch_cond_exec(ring, patch_offset);
>>> /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC
>>> */
>>> if (ring->funcs->emit_switch_buffer) {
>>> amdgpu_ring_emit_switch_buffer(ring);
>>> amdgpu_ring_emit_switch_buffer(ring);
>>> }
>>> return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>> index a15d9c0f233b..f5228e169c3a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>>> @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct
>>> amdgpu_device *adev)
>>> *
>>> * @adev: amdgpu_device pointer
>>> *
>>> * Configures the 3D engine and tiling configuration
>>> * registers so that the 3D engine is usable.
>>> */
>>> static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
>>> {
>>> u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
>>> u32 tmp;
>>> - int i;
>>> + int i, vmid;
>>> WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
>>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>>> gfx_v7_0_tiling_mode_table_init(adev);
>>> gfx_v7_0_setup_rb(adev);
>>> @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device
>>> *adev)
>>> tmp = RREG32(mmSPI_ARB_PRIORITY);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
>>> WREG32(mmSPI_ARB_PRIORITY, tmp);
>>> mutex_unlock(&adev->grbm_idx_mutex);
>>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
>>> + unsigned gds_size, gws_size, oa_size;
>>> +
>>> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids)
>>> {
>>> + gds_size = adev->gds.mem.gfx_size_per_vmid;
>>> + gws_size = adev->gds.gws.gfx_size_per_vmid;
>>> + oa_size = adev->gds.oa.gfx_size_per_vmid;
>>> + } else {
>>> + gds_size = adev->gds.mem.kfd_size_per_vmid;
>>> + gws_size = adev->gds.gws.kfd_size_per_vmid;
>>> + oa_size = adev->gds.oa.kfd_size_per_vmid;
>>> + }
>>> +
>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid *
>>> gds_size);
>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
>>> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
>>> + (vmid * gws_size) |
>>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
>>> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
>>> + ((1 << oa_size) - 1) << (vmid * oa_size));
>>> + }
>>> +
>>> udelay(50);
>>> }
>>> /*
>>> * GPU scratch registers helpers function.
>>> */
>>> /**
>>> * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
>>> *
>>> * @adev: amdgpu_device pointer
>>> @@ -4157,68 +4179,20 @@ static uint64_t
>>> gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>>> uint64_t clock;
>>> mutex_lock(&adev->gfx.gpu_clock_mutex);
>>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
>>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>>> mutex_unlock(&adev->gfx.gpu_clock_mutex);
>>> return clock;
>>> }
>>> -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>>> - uint32_t vmid,
>>> - uint32_t gds_base, uint32_t
>>> gds_size,
>>> - uint32_t gws_base, uint32_t
>>> gws_size,
>>> - uint32_t oa_base, uint32_t
>>> oa_size)
>>> -{
>>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
>>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
>>> -
>>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
>>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
>>> -
>>> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
>>> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
>>> -
>>> - /* GDS Base */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, gds_base);
>>> -
>>> - /* GDS Size */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, gds_size);
>>> -
>>> - /* GWS */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT |
>>> gws_base);
>>> -
>>> - /* OA */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 <<
>>> oa_base));
>>> -}
>>> -
>>> static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring,
>>> unsigned vmid)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> uint32_t value = 0;
>>> value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
>>> value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
>>> value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
>>> value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
>>> WREG32(mmSQ_CMD, value);
>>> @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle)
>>> ring_id,
>>> i, k, j);
>>> if (r)
>>> return r;
>>> ring_id++;
>>> }
>>> }
>>> }
>>> - /* reserve GDS, GWS and OA resource for gfx */
>>> - r = amdgpu_bo_create_kernel(adev,
>>> adev->gds.mem.gfx_partition_size,
>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
>>> - &adev->gds.gds_gfx_bo, NULL, NULL);
>>> - if (r)
>>> - return r;
>>> -
>>> - r = amdgpu_bo_create_kernel(adev,
>>> adev->gds.gws.gfx_partition_size,
>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
>>> - &adev->gds.gws_gfx_bo, NULL, NULL);
>>> - if (r)
>>> - return r;
>>> -
>>> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
>>> - &adev->gds.oa_gfx_bo, NULL, NULL);
>>> - if (r)
>>> - return r;
>>> -
>>> adev->gfx.ce_ram_size = 0x8000;
>>> gfx_v7_0_gpu_early_init(adev);
>>> return r;
>>> }
>>> static int gfx_v7_0_sw_fini(void *handle)
>>> {
>>> int i;
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>>> -
>>> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>>> for (i = 0; i < adev->gfx.num_compute_rings; i++)
>>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>>> gfx_v7_0_cp_compute_fini(adev);
>>> gfx_v7_0_rlc_fini(adev);
>>> gfx_v7_0_mec_fini(adev);
>>> amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
>>> &adev->gfx.rlc.clear_state_gpu_addr,
>>> @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs
>>> = {
>>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
>>> .type = AMDGPU_RING_TYPE_GFX,
>>> .align_mask = 0xff,
>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>> .support_64bit_ptrs = false,
>>> .get_rptr = gfx_v7_0_ring_get_rptr,
>>> .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
>>> .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
>>> .emit_frame_size =
>>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
>>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>>> 5 + /* hdp invalidate */
>>> 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user
>>> fence, vm fence */
>>> 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
>>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /*
>>> gfx_v7_0_ring_emit_vm_flush */
>>> 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
>>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
>>> .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
>>> .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
>>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>>> .test_ring = gfx_v7_0_ring_test_ring,
>>> .test_ib = gfx_v7_0_ring_test_ib,
>>> .insert_nop = amdgpu_ring_insert_nop,
>>> .pad_ib = amdgpu_ring_generic_pad_ib,
>>> .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
>>> .emit_wreg = gfx_v7_0_ring_emit_wreg,
>>> .soft_recovery = gfx_v7_0_ring_soft_recovery,
>>> };
>>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
>>> .type = AMDGPU_RING_TYPE_COMPUTE,
>>> .align_mask = 0xff,
>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>> .support_64bit_ptrs = false,
>>> .get_rptr = gfx_v7_0_ring_get_rptr,
>>> .get_wptr = gfx_v7_0_ring_get_wptr_compute,
>>> .set_wptr = gfx_v7_0_ring_set_wptr_compute,
>>> .emit_frame_size =
>>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
>>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>>> 5 + /* hdp invalidate */
>>> 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
>>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
>>> gfx_v7_0_ring_emit_vm_flush */
>>> 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user
>>> fence, vm fence */
>>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
>>> .emit_ib = gfx_v7_0_ring_emit_ib_compute,
>>> .emit_fence = gfx_v7_0_ring_emit_fence_compute,
>>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>>> .test_ring = gfx_v7_0_ring_test_ring,
>>> .test_ib = gfx_v7_0_ring_test_ib,
>>> .insert_nop = amdgpu_ring_insert_nop,
>>> .pad_ib = amdgpu_ring_generic_pad_ib,
>>> .emit_wreg = gfx_v7_0_ring_emit_wreg,
>>> };
>>> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
>>> {
>>> @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct
>>> amdgpu_device *adev)
>>> adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
>>> }
>>> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
>>> {
>>> /* init asci gds info */
>>> adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>>> adev->gds.gws.total_size = 64;
>>> adev->gds.oa.total_size = 16;
>>> - if (adev->gds.mem.total_size == 64 * 1024) {
>>> - adev->gds.mem.gfx_partition_size = 4096;
>>> - adev->gds.mem.cs_partition_size = 4096;
>>> -
>>> - adev->gds.gws.gfx_partition_size = 4;
>>> - adev->gds.gws.cs_partition_size = 4;
>>> -
>>> - adev->gds.oa.gfx_partition_size = 4;
>>> - adev->gds.oa.cs_partition_size = 1;
>>> - } else {
>>> - adev->gds.mem.gfx_partition_size = 1024;
>>> - adev->gds.mem.cs_partition_size = 1024;
>>> -
>>> - adev->gds.gws.gfx_partition_size = 16;
>>> - adev->gds.gws.cs_partition_size = 16;
>>> -
>>> - adev->gds.oa.gfx_partition_size = 4;
>>> - adev->gds.oa.cs_partition_size = 4;
>>> - }
>>> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size /
>>> AMDGPU_NUM_VMID;
>>> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size /
>>> AMDGPU_NUM_VMID;
>>> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size /
>>> AMDGPU_NUM_VMID;
>>> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size /
>>> AMDGPU_NUM_VMID;
>>> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /*
>>> gfx only */
>>> + adev->gds.oa.kfd_size_per_vmid = 0;
>>> }
>>> -
>>> static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
>>> {
>>> int i, j, k, counter, active_cu_number = 0;
>>> u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
>>> struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
>>> unsigned disable_masks[4 * 2];
>>> u32 ao_cu_num;
>>> if (adev->flags & AMD_IS_APU)
>>> ao_cu_num = 2;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> index 3882689b2d8f..b11a54bd0668 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle)
>>> kiq = &adev->gfx.kiq;
>>> r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>>> if (r)
>>> return r;
>>> /* create MQD for all compute queues as well as KIQ for SRIOV case
>>> */
>>> r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct
>>> vi_mqd_allocation));
>>> if (r)
>>> return r;
>>> - /* reserve GDS, GWS and OA resource for gfx */
>>> - r = amdgpu_bo_create_kernel(adev,
>>> adev->gds.mem.gfx_partition_size,
>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
>>> - &adev->gds.gds_gfx_bo, NULL, NULL);
>>> - if (r)
>>> - return r;
>>> -
>>> - r = amdgpu_bo_create_kernel(adev,
>>> adev->gds.gws.gfx_partition_size,
>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
>>> - &adev->gds.gws_gfx_bo, NULL, NULL);
>>> - if (r)
>>> - return r;
>>> -
>>> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
>>> - &adev->gds.oa_gfx_bo, NULL, NULL);
>>> - if (r)
>>> - return r;
>>> -
>>> adev->gfx.ce_ram_size = 0x8000;
>>> r = gfx_v8_0_gpu_early_init(adev);
>>> if (r)
>>> return r;
>>> return 0;
>>> }
>>> static int gfx_v8_0_sw_fini(void *handle)
>>> {
>>> int i;
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
>>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>>> -
>>> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>>> for (i = 0; i < adev->gfx.num_compute_rings; i++)
>>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>>> amdgpu_gfx_compute_mqd_sw_fini(adev);
>>> amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>>> amdgpu_gfx_kiq_fini(adev);
>>> gfx_v8_0_mec_fini(adev);
>>> @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct
>>> amdgpu_device *adev)
>>> case CHIP_CARRIZO:
>>> case CHIP_STONEY:
>>> adev->gfx.config.double_offchip_lds_buf = 0;
>>> break;
>>> }
>>> }
>>> static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
>>> {
>>> u32 tmp, sh_static_mem_cfg;
>>> - int i;
>>> + int i, vmid;
>>> WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
>>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
>>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>>> gfx_v8_0_tiling_mode_table_init(adev);
>>> gfx_v8_0_setup_rb(adev);
>>> gfx_v8_0_get_cu_info(adev);
>>> gfx_v8_0_config_init(adev);
>>> @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device
>>> *adev)
>>> tmp = RREG32(mmSPI_ARB_PRIORITY);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
>>> WREG32(mmSPI_ARB_PRIORITY, tmp);
>>> mutex_unlock(&adev->grbm_idx_mutex);
>>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
>>> + unsigned gds_size, gws_size, oa_size;
>>> +
>>> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids)
>>> {
>>> + gds_size = adev->gds.mem.gfx_size_per_vmid;
>>> + gws_size = adev->gds.gws.gfx_size_per_vmid;
>>> + oa_size = adev->gds.oa.gfx_size_per_vmid;
>>> + } else {
>>> + gds_size = adev->gds.mem.kfd_size_per_vmid;
>>> + gws_size = adev->gds.gws.kfd_size_per_vmid;
>>> + oa_size = adev->gds.oa.kfd_size_per_vmid;
>>> + }
>>> +
>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid *
>>> gds_size);
>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
>>> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
>>> + (vmid * gws_size) |
>>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
>>> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
>>> + ((1 << oa_size) - 1) << (vmid * oa_size));
>>> + }
>>> }
>>> static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
>>> {
>>> u32 i, j, k;
>>> u32 mask;
>>> mutex_lock(&adev->grbm_idx_mutex);
>>> for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>>> for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
>>> @@ -5383,68 +5381,20 @@ static uint64_t
>>> gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>>> uint64_t clock;
>>> mutex_lock(&adev->gfx.gpu_clock_mutex);
>>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
>>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
>>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
>>> mutex_unlock(&adev->gfx.gpu_clock_mutex);
>>> return clock;
>>> }
>>> -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>>> - uint32_t vmid,
>>> - uint32_t gds_base, uint32_t
>>> gds_size,
>>> - uint32_t gws_base, uint32_t
>>> gws_size,
>>> - uint32_t oa_base, uint32_t
>>> oa_size)
>>> -{
>>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
>>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
>>> -
>>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
>>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
>>> -
>>> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
>>> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
>>> -
>>> - /* GDS Base */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, gds_base);
>>> -
>>> - /* GDS Size */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, gds_size);
>>> -
>>> - /* GWS */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT |
>>> gws_base);
>>> -
>>> - /* OA */
>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>>> - WRITE_DATA_DST_SEL(0)));
>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
>>> - amdgpu_ring_write(ring, 0);
>>> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 <<
>>> oa_base));
>>> -}
>>> -
>>> static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd,
>>> uint32_t wave, uint32_t address)
>>> {
>>> WREG32(mmSQ_IND_INDEX,
>>> (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
>>> (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
>>> (address << SQ_IND_INDEX__INDEX__SHIFT) |
>>> (SQ_IND_INDEX__FORCE_READ_MASK));
>>> return RREG32(mmSQ_IND_DATA);
>>> }
>>> @@ -7132,21 +7082,20 @@ static const struct amdgpu_ring_funcs
>>> gfx_v8_0_ring_funcs_gfx = {
>>> 31 + /* DE_META */
>>> 3 + /* CNTX_CTRL */
>>> 5 + /* HDP_INVL */
>>> 8 + 8 + /* FENCE x2 */
>>> 2, /* SWITCH_BUFFER */
>>> .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
>>> .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
>>> .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
>>> .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
>>> .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
>>> - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
>>> .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
>>> .test_ring = gfx_v8_0_ring_test_ring,
>>> .test_ib = gfx_v8_0_ring_test_ib,
>>> .insert_nop = amdgpu_ring_insert_nop,
>>> .pad_ib = amdgpu_ring_generic_pad_ib,
>>> .emit_switch_buffer = gfx_v8_ring_emit_sb,
>>> .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>>> .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
>>> .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
>>> .emit_wreg = gfx_v8_0_ring_emit_wreg,
>>> @@ -7155,51 +7104,48 @@ static const struct amdgpu_ring_funcs
>>> gfx_v8_0_ring_funcs_gfx = {
>>> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
>>> .type = AMDGPU_RING_TYPE_COMPUTE,
>>> .align_mask = 0xff,
>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>> .support_64bit_ptrs = false,
>>> .get_rptr = gfx_v8_0_ring_get_rptr,
More information about the amd-gfx
mailing list