[PATCH] drm/amdgpu: reserve GDS resources statically
Christian König
ckoenig.leichtzumerken at gmail.com
Thu Sep 13 08:40:08 UTC 2018
As discussed internally that doesn't work because threads don't
necessary get the same VMID assigned.
Christian.
Am 12.09.2018 um 22:33 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
>
> I've chosen to do it like this because it's easy and allows an arbitrary
> number of processes.
>
> Signed-off-by: Marek Olšák <marek.olsak at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ----
> drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ----
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +-----
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125 +++++---------------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++--------------
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++-------------
> include/uapi/drm/amdgpu_drm.h | 15 +--
> 19 files changed, 109 insertions(+), 519 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index b80243d3972e..7264a4930b88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
> / sizeof(struct amdgpu_bo_list_entry))
> return -EINVAL;
>
> size = sizeof(struct amdgpu_bo_list);
> size += num_entries * sizeof(struct amdgpu_bo_list_entry);
> list = kvmalloc(size, GFP_KERNEL);
> if (!list)
> return -ENOMEM;
>
> kref_init(&list->refcount);
> - list->gds_obj = adev->gds.gds_gfx_bo;
> - list->gws_obj = adev->gds.gws_gfx_bo;
> - list->oa_obj = adev->gds.oa_gfx_bo;
>
> array = amdgpu_bo_list_array_entry(list, 0);
> memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
>
> for (i = 0; i < num_entries; ++i) {
> struct amdgpu_bo_list_entry *entry;
> struct drm_gem_object *gobj;
> struct amdgpu_bo *bo;
> struct mm_struct *usermm;
>
> @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
> } else {
> entry = &array[last_entry++];
> }
>
> entry->robj = bo;
> entry->priority = min(info[i].bo_priority,
> AMDGPU_BO_LIST_MAX_PRIORITY);
> entry->tv.bo = &entry->robj->tbo;
> entry->tv.shared = !entry->robj->prime_shared_count;
>
> - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
> - list->gds_obj = entry->robj;
> - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
> - list->gws_obj = entry->robj;
> - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
> - list->oa_obj = entry->robj;
> -
> total_size += amdgpu_bo_size(entry->robj);
> trace_amdgpu_bo_list_set(list, entry->robj);
> }
>
> list->first_userptr = first_userptr;
> list->num_entries = num_entries;
>
> trace_amdgpu_cs_bo_status(list->num_entries, total_size);
>
> *result = list;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 61b089768e1c..30f12a60aa28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry {
> struct ttm_validate_buffer tv;
> struct amdgpu_bo_va *bo_va;
> uint32_t priority;
> struct page **user_pages;
> int user_invalidated;
> };
>
> struct amdgpu_bo_list {
> struct rcu_head rhead;
> struct kref refcount;
> - struct amdgpu_bo *gds_obj;
> - struct amdgpu_bo *gws_obj;
> - struct amdgpu_bo *oa_obj;
> unsigned first_userptr;
> unsigned num_entries;
> };
>
> int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
> struct amdgpu_bo_list **result);
> void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
> struct list_head *validated);
> void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
> int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1081fd00b059..88b58facf29e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
> return 0;
> }
>
> static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> union drm_amdgpu_cs *cs)
> {
> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> struct amdgpu_vm *vm = &fpriv->vm;
> struct amdgpu_bo_list_entry *e;
> struct list_head duplicates;
> - struct amdgpu_bo *gds;
> - struct amdgpu_bo *gws;
> - struct amdgpu_bo *oa;
> unsigned tries = 10;
> int r;
>
> INIT_LIST_HEAD(&p->validated);
>
> /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
> if (cs->in.bo_list_handle) {
> if (p->bo_list)
> return -EINVAL;
>
> @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>
> r = amdgpu_cs_list_validate(p, &p->validated);
> if (r) {
> DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
> goto error_validate;
> }
>
> amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
> p->bytes_moved_vis);
>
> - gds = p->bo_list->gds_obj;
> - gws = p->bo_list->gws_obj;
> - oa = p->bo_list->oa_obj;
> -
> amdgpu_bo_list_for_each_entry(e, p->bo_list)
> e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
>
> - if (gds) {
> - p->job->gds_base = amdgpu_bo_gpu_offset(gds);
> - p->job->gds_size = amdgpu_bo_size(gds);
> - }
> - if (gws) {
> - p->job->gws_base = amdgpu_bo_gpu_offset(gws);
> - p->job->gws_size = amdgpu_bo_size(gws);
> - }
> - if (oa) {
> - p->job->oa_base = amdgpu_bo_gpu_offset(oa);
> - p->job->oa_size = amdgpu_bo_size(oa);
> - }
> -
> if (!r && p->uf_entry.robj) {
> struct amdgpu_bo *uf = p->uf_entry.robj;
>
> r = amdgpu_ttm_alloc_gart(&uf->tbo);
> p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
> }
>
> error_validate:
> if (r)
> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index e73728d90388..69ba25c2e921 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -17,48 +17,33 @@
> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> * OTHER DEALINGS IN THE SOFTWARE.
> *
> */
>
> #ifndef __AMDGPU_GDS_H__
> #define __AMDGPU_GDS_H__
>
> -/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
> - * we should report GDS/GWS/OA size as PAGE_SIZE aligned
> - * */
> -#define AMDGPU_GDS_SHIFT 2
> -#define AMDGPU_GWS_SHIFT PAGE_SHIFT
> -#define AMDGPU_OA_SHIFT PAGE_SHIFT
> -
> struct amdgpu_ring;
> struct amdgpu_bo;
>
> struct amdgpu_gds_asic_info {
> uint32_t total_size;
> - uint32_t gfx_partition_size;
> - uint32_t cs_partition_size;
> + uint32_t gfx_size_per_vmid;
> + uint32_t kfd_size_per_vmid;
> };
>
> struct amdgpu_gds {
> struct amdgpu_gds_asic_info mem;
> struct amdgpu_gds_asic_info gws;
> struct amdgpu_gds_asic_info oa;
> - /* At present, GDS, GWS and OA resources for gfx (graphics)
> - * is always pre-allocated and available for graphics operation.
> - * Such resource is shared between all gfx clients.
> - * TODO: move this operation to user space
> - * */
> - struct amdgpu_bo* gds_gfx_bo;
> - struct amdgpu_bo* gws_gfx_bo;
> - struct amdgpu_bo* oa_gfx_bo;
> };
>
> struct amdgpu_gds_reg_offset {
> uint32_t mem_base;
> uint32_t mem_size;
> uint32_t gws;
> uint32_t oa;
> };
>
> #endif /* __AMDGPU_GDS_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d30a0838851b..c87ad4b4d0b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
> AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> AMDGPU_GEM_CREATE_VRAM_CLEARED |
> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
> AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
>
> return -EINVAL;
>
> /* reject invalid gem domains */
> - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
> + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
> + AMDGPU_GEM_DOMAIN_GTT |
> + AMDGPU_GEM_DOMAIN_VRAM))
> return -EINVAL;
>
> - /* create a gem object to contain this object in */
> - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
> - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
> - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
> - /* if gds bo is created from user space, it must be
> - * passed to bo list
> - */
> - DRM_ERROR("GDS bo cannot be per-vm-bo\n");
> - return -EINVAL;
> - }
> - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
> - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
> - size = size << AMDGPU_GDS_SHIFT;
> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
> - size = size << AMDGPU_GWS_SHIFT;
> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
> - size = size << AMDGPU_OA_SHIFT;
> - else
> - return -EINVAL;
> - }
> size = roundup(size, PAGE_SIZE);
>
> if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
> r = amdgpu_bo_reserve(vm->root.base.bo, false);
> if (r)
> return r;
>
> resv = vm->root.base.bo->tbo.resv;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 3a072a7a39f0..c2e6a1a11d7f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> * Reset saved GDW, GWS and OA to force switch on next flush.
> */
> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
> unsigned vmid)
> {
> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
> struct amdgpu_vmid *id = &id_mgr->ids[vmid];
>
> mutex_lock(&id_mgr->lock);
> id->owner = 0;
> - id->gds_base = 0;
> - id->gds_size = 0;
> - id->gws_base = 0;
> - id->gws_size = 0;
> - id->oa_base = 0;
> - id->oa_size = 0;
> mutex_unlock(&id_mgr->lock);
> }
>
> /**
> * amdgpu_vmid_reset_all - reset VMID to zero
> *
> * @adev: amdgpu device structure
> *
> * Reset VMID to force flush on next use
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index 7625419f0fc2..06078e665532 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -44,27 +44,20 @@ struct amdgpu_vmid {
> struct amdgpu_sync active;
> struct dma_fence *last_flush;
> uint64_t owner;
>
> uint64_t pd_gpu_addr;
> /* last flushed PD/PT update */
> struct dma_fence *flushed_updates;
>
> uint32_t current_gpu_reset_count;
>
> - uint32_t gds_base;
> - uint32_t gds_size;
> - uint32_t gws_base;
> - uint32_t gws_size;
> - uint32_t oa_base;
> - uint32_t oa_size;
> -
> unsigned pasid;
> struct dma_fence *pasid_mapping;
> };
>
> struct amdgpu_vmid_mgr {
> struct mutex lock;
> unsigned num_ids;
> struct list_head ids_lru;
> struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
> atomic_t reserved_vmid_num;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index 57cfe78a262b..3db553f6ad01 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -42,23 +42,20 @@ struct amdgpu_job {
> struct amdgpu_sync sched_sync;
> struct amdgpu_ib *ibs;
> struct dma_fence *fence; /* the hw fence */
> uint32_t preamble_status;
> uint32_t num_ibs;
> void *owner;
> bool vm_needs_flush;
> uint64_t vm_pd_addr;
> unsigned vmid;
> unsigned pasid;
> - uint32_t gds_base, gds_size;
> - uint32_t gws_base, gws_size;
> - uint32_t oa_base, oa_size;
> uint32_t vram_lost_counter;
>
> /* user fence handling */
> uint64_t uf_addr;
> uint64_t uf_sequence;
>
> };
>
> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> struct amdgpu_job **job, struct amdgpu_vm *vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 29ac3873eeb0..209954290954 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
> case AMDGPU_INFO_VIS_VRAM_USAGE:
> ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
> case AMDGPU_INFO_GTT_USAGE:
> ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
> case AMDGPU_INFO_GDS_CONFIG: {
> struct drm_amdgpu_info_gds gds_info;
>
> memset(&gds_info, 0, sizeof(gds_info));
> - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
> - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
> - gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT;
> - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
> - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
> - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
> - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
> + gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_size_per_vmid;
> + gds_info.compute_partition_size = adev->gds.mem.kfd_size_per_vmid;
> + gds_info.gds_total_size = adev->gds.mem.total_size;
> + gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_size_per_vmid;
> + gds_info.gws_per_compute_partition = adev->gds.gws.kfd_size_per_vmid;
> + gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_size_per_vmid;
> + gds_info.oa_per_compute_partition = adev->gds.oa.kfd_size_per_vmid;
> return copy_to_user(out, &gds_info,
> min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
> }
> case AMDGPU_INFO_VRAM_GTT: {
> struct drm_amdgpu_info_vram_gtt vram_gtt;
>
> vram_gtt.vram_size = adev->gmc.real_vram_size -
> atomic64_read(&adev->vram_pin_size);
> vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
> atomic64_read(&adev->visible_pin_size);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index de990bdcdd6c..76770a8c29a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
> places[c].lpfn = 0;
> places[c].flags = TTM_PL_FLAG_SYSTEM;
> if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
> places[c].flags |= TTM_PL_FLAG_WC |
> TTM_PL_FLAG_UNCACHED;
> else
> places[c].flags |= TTM_PL_FLAG_CACHED;
> c++;
> }
>
> - if (domain & AMDGPU_GEM_DOMAIN_GDS) {
> - places[c].fpfn = 0;
> - places[c].lpfn = 0;
> - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS;
> - c++;
> - }
> -
> - if (domain & AMDGPU_GEM_DOMAIN_GWS) {
> - places[c].fpfn = 0;
> - places[c].lpfn = 0;
> - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS;
> - c++;
> - }
> -
> - if (domain & AMDGPU_GEM_DOMAIN_OA) {
> - places[c].fpfn = 0;
> - places[c].lpfn = 0;
> - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA;
> - c++;
> - }
> -
> if (!c) {
> places[c].fpfn = 0;
> places[c].lpfn = 0;
> places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
> c++;
> }
>
> BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
>
> placement->num_placement = c;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 907fdf46d895..e089964cbcb7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -120,26 +120,20 @@ static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
> */
> static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
> {
> switch (mem_type) {
> case TTM_PL_VRAM:
> return AMDGPU_GEM_DOMAIN_VRAM;
> case TTM_PL_TT:
> return AMDGPU_GEM_DOMAIN_GTT;
> case TTM_PL_SYSTEM:
> return AMDGPU_GEM_DOMAIN_CPU;
> - case AMDGPU_PL_GDS:
> - return AMDGPU_GEM_DOMAIN_GDS;
> - case AMDGPU_PL_GWS:
> - return AMDGPU_GEM_DOMAIN_GWS;
> - case AMDGPU_PL_OA:
> - return AMDGPU_GEM_DOMAIN_OA;
> default:
> break;
> }
> return 0;
> }
>
> /**
> * amdgpu_bo_reserve - reserve bo
> * @bo: bo structure
> * @no_intr: don't return -ERESTARTSYS on pending signal
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 9cc239968e40..f6ea9604e611 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs {
> /* command emit functions */
> void (*emit_ib)(struct amdgpu_ring *ring,
> struct amdgpu_ib *ib,
> unsigned vmid, bool ctx_switch);
> void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
> uint64_t seq, unsigned flags);
> void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
> void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
> uint64_t pd_addr);
> void (*emit_hdp_flush)(struct amdgpu_ring *ring);
> - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
> - uint32_t gds_base, uint32_t gds_size,
> - uint32_t gws_base, uint32_t gws_size,
> - uint32_t oa_base, uint32_t oa_size);
> /* testing functions */
> int (*test_ring)(struct amdgpu_ring *ring);
> int (*test_ib)(struct amdgpu_ring *ring, long timeout);
> /* insert NOP packets */
> void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
> void (*insert_start)(struct amdgpu_ring *ring);
> void (*insert_end)(struct amdgpu_ring *ring);
> /* pad the indirect buffer to the necessary number of dw */
> void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
> unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
> @@ -226,21 +222,20 @@ struct amdgpu_ring {
> #define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
> #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
> #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
> #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
> #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
> #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
> #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
> #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
> #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
> #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
> -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
> #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
> #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
> #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 8a158ee922f7..2cc62b0e7ea8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
> break;
> case TTM_PL_VRAM:
> /* "On-card" video ram */
> man->func = &amdgpu_vram_mgr_func;
> man->gpu_offset = adev->gmc.vram_start;
> man->flags = TTM_MEMTYPE_FLAG_FIXED |
> TTM_MEMTYPE_FLAG_MAPPABLE;
> man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
> man->default_caching = TTM_PL_FLAG_WC;
> break;
> - case AMDGPU_PL_GDS:
> - case AMDGPU_PL_GWS:
> - case AMDGPU_PL_OA:
> - /* On-chip GDS memory*/
> - man->func = &ttm_bo_manager_func;
> - man->gpu_offset = 0;
> - man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
> - man->available_caching = TTM_PL_FLAG_UNCACHED;
> - man->default_caching = TTM_PL_FLAG_UNCACHED;
> - break;
> default:
> DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
> return -EINVAL;
> }
> return 0;
> }
>
> /**
> * amdgpu_evict_flags - Compute placement flags
> *
> @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
> if (r) {
> DRM_ERROR("failed to pin userptr\n");
> return r;
> }
> }
> if (!ttm->num_pages) {
> WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
> ttm->num_pages, bo_mem, ttm);
> }
>
> - if (bo_mem->mem_type == AMDGPU_PL_GDS ||
> - bo_mem->mem_type == AMDGPU_PL_GWS ||
> - bo_mem->mem_type == AMDGPU_PL_OA)
> - return -EINVAL;
> -
> if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
> gtt->offset = AMDGPU_BO_INVALID_OFFSET;
> return 0;
> }
>
> /* compute PTE flags relevant to this BO memory */
> flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
>
> /* bind pages into GART page tables */
> gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) - adev->gmc.gart_start;
> @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>
> /* Initialize GTT memory pool */
> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
> if (r) {
> DRM_ERROR("Failed initializing GTT heap.\n");
> return r;
> }
> DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
> (unsigned)(gtt_size / (1024 * 1024)));
>
> - /* Initialize various on-chip memory pools */
> - adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
> - adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
> - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
> - adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
> - adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
> - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
> - adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
> - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
> - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
> - /* GDS Memory */
> - if (adev->gds.mem.total_size) {
> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
> - adev->gds.mem.total_size >> PAGE_SHIFT);
> - if (r) {
> - DRM_ERROR("Failed initializing GDS heap.\n");
> - return r;
> - }
> - }
> -
> - /* GWS */
> - if (adev->gds.gws.total_size) {
> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
> - adev->gds.gws.total_size >> PAGE_SHIFT);
> - if (r) {
> - DRM_ERROR("Failed initializing gws heap.\n");
> - return r;
> - }
> - }
> -
> - /* OA */
> - if (adev->gds.oa.total_size) {
> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
> - adev->gds.oa.total_size >> PAGE_SHIFT);
> - if (r) {
> - DRM_ERROR("Failed initializing oa heap.\n");
> - return r;
> - }
> - }
> -
> /* Register debugfs entries for amdgpu_ttm */
> r = amdgpu_ttm_debugfs_init(adev);
> if (r) {
> DRM_ERROR("Failed to init debugfs\n");
> return r;
> }
> return 0;
> }
>
> /**
> @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
> return;
>
> amdgpu_ttm_debugfs_fini(adev);
> amdgpu_ttm_fw_reserve_vram_fini(adev);
> if (adev->mman.aper_base_kaddr)
> iounmap(adev->mman.aper_base_kaddr);
> adev->mman.aper_base_kaddr = NULL;
>
> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
> - if (adev->gds.mem.total_size)
> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
> - if (adev->gds.gws.total_size)
> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
> - if (adev->gds.oa.total_size)
> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
> ttm_bo_device_release(&adev->mman.bdev);
> amdgpu_ttm_global_fini(adev);
> adev->mman.initialized = false;
> DRM_INFO("amdgpu: ttm finalized\n");
> }
>
> /**
> * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
> *
> * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index fe8f276e9811..04557a382b19 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -20,28 +20,20 @@
> * OTHER DEALINGS IN THE SOFTWARE.
> *
> */
>
> #ifndef __AMDGPU_TTM_H__
> #define __AMDGPU_TTM_H__
>
> #include "amdgpu.h"
> #include <drm/gpu_scheduler.h>
>
> -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
> -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
> -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
> -
> -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0)
> -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1)
> -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2)
> -
> #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
> #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
>
> struct amdgpu_mman {
> struct ttm_bo_global_ref bo_global_ref;
> struct drm_global_reference mem_global_ref;
> struct ttm_bo_device bdev;
> bool mem_global_referenced;
> bool initialized;
> void __iomem *aper_base_kaddr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index be1659fedf94..c66f1c6f0ba8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
> * Returns:
> * True if sync is needed.
> */
> bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
> struct amdgpu_job *job)
> {
> struct amdgpu_device *adev = ring->adev;
> unsigned vmhub = ring->funcs->vmhub;
> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
> struct amdgpu_vmid *id;
> - bool gds_switch_needed;
> bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
>
> if (job->vmid == 0)
> return false;
> id = &id_mgr->ids[job->vmid];
> - gds_switch_needed = ring->funcs->emit_gds_switch && (
> - id->gds_base != job->gds_base ||
> - id->gds_size != job->gds_size ||
> - id->gws_base != job->gws_base ||
> - id->gws_size != job->gws_size ||
> - id->oa_base != job->oa_base ||
> - id->oa_size != job->oa_size);
>
> if (amdgpu_vmid_had_gpu_reset(adev, id))
> return true;
>
> - return vm_flush_needed || gds_switch_needed;
> + return vm_flush_needed;
> }
>
> /**
> * amdgpu_vm_flush - hardware flush the vm
> *
> * @ring: ring to use for flush
> * @job: related job
> * @need_pipe_sync: is pipe sync needed
> *
> * Emit a VM flush when it is necessary.
> *
> * Returns:
> * 0 on success, errno otherwise.
> */
> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
> {
> struct amdgpu_device *adev = ring->adev;
> unsigned vmhub = ring->funcs->vmhub;
> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
> - bool gds_switch_needed = ring->funcs->emit_gds_switch && (
> - id->gds_base != job->gds_base ||
> - id->gds_size != job->gds_size ||
> - id->gws_base != job->gws_base ||
> - id->gws_size != job->gws_size ||
> - id->oa_base != job->oa_base ||
> - id->oa_size != job->oa_size);
> bool vm_flush_needed = job->vm_needs_flush;
> bool pasid_mapping_needed = id->pasid != job->pasid ||
> !id->pasid_mapping ||
> !dma_fence_is_signaled(id->pasid_mapping);
> struct dma_fence *fence = NULL;
> unsigned patch_offset = 0;
> int r;
>
> if (amdgpu_vmid_had_gpu_reset(adev, id)) {
> - gds_switch_needed = true;
> vm_flush_needed = true;
> pasid_mapping_needed = true;
> }
>
> - gds_switch_needed &= !!ring->funcs->emit_gds_switch;
> vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
> ring->funcs->emit_wreg;
>
> - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
> + if (!vm_flush_needed && !need_pipe_sync)
> return 0;
>
> if (ring->funcs->init_cond_exec)
> patch_offset = amdgpu_ring_init_cond_exec(ring);
>
> if (need_pipe_sync)
> amdgpu_ring_emit_pipeline_sync(ring);
>
> if (vm_flush_needed) {
> trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
> @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
> mutex_unlock(&id_mgr->lock);
> }
>
> if (pasid_mapping_needed) {
> id->pasid = job->pasid;
> dma_fence_put(id->pasid_mapping);
> id->pasid_mapping = dma_fence_get(fence);
> }
> dma_fence_put(fence);
>
> - if (ring->funcs->emit_gds_switch && gds_switch_needed) {
> - id->gds_base = job->gds_base;
> - id->gds_size = job->gds_size;
> - id->gws_base = job->gws_base;
> - id->gws_size = job->gws_size;
> - id->oa_base = job->oa_base;
> - id->oa_size = job->oa_size;
> - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
> - job->gds_size, job->gws_base,
> - job->gws_size, job->oa_base,
> - job->oa_size);
> - }
> -
> if (ring->funcs->patch_cond_exec)
> amdgpu_ring_patch_cond_exec(ring, patch_offset);
>
> /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
> if (ring->funcs->emit_switch_buffer) {
> amdgpu_ring_emit_switch_buffer(ring);
> amdgpu_ring_emit_switch_buffer(ring);
> }
> return 0;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index a15d9c0f233b..f5228e169c3a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev)
> *
> * @adev: amdgpu_device pointer
> *
> * Configures the 3D engine and tiling configuration
> * registers so that the 3D engine is usable.
> */
> static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
> {
> u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
> u32 tmp;
> - int i;
> + int i, vmid;
>
> WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
>
> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>
> gfx_v7_0_tiling_mode_table_init(adev);
>
> gfx_v7_0_setup_rb(adev);
> @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
>
> tmp = RREG32(mmSPI_ARB_PRIORITY);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
> WREG32(mmSPI_ARB_PRIORITY, tmp);
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
> + unsigned gds_size, gws_size, oa_size;
> +
> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) {
> + gds_size = adev->gds.mem.gfx_size_per_vmid;
> + gws_size = adev->gds.gws.gfx_size_per_vmid;
> + oa_size = adev->gds.oa.gfx_size_per_vmid;
> + } else {
> + gds_size = adev->gds.mem.kfd_size_per_vmid;
> + gws_size = adev->gds.gws.kfd_size_per_vmid;
> + oa_size = adev->gds.oa.kfd_size_per_vmid;
> + }
> +
> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * gds_size);
> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
> + (vmid * gws_size) |
> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
> + ((1 << oa_size) - 1) << (vmid * oa_size));
> + }
> +
> udelay(50);
> }
>
> /*
> * GPU scratch registers helpers function.
> */
> /**
> * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
> *
> * @adev: amdgpu_device pointer
> @@ -4157,68 +4179,20 @@ static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
> uint64_t clock;
>
> mutex_lock(&adev->gfx.gpu_clock_mutex);
> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
> mutex_unlock(&adev->gfx.gpu_clock_mutex);
> return clock;
> }
>
> -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
> - uint32_t vmid,
> - uint32_t gds_base, uint32_t gds_size,
> - uint32_t gws_base, uint32_t gws_size,
> - uint32_t oa_base, uint32_t oa_size)
> -{
> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
> -
> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
> -
> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
> -
> - /* GDS Base */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, gds_base);
> -
> - /* GDS Size */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, gds_size);
> -
> - /* GWS */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
> -
> - /* OA */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
> -}
> -
> static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
> {
> struct amdgpu_device *adev = ring->adev;
> uint32_t value = 0;
>
> value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
> value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
> value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
> value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
> WREG32(mmSQ_CMD, value);
> @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle)
> ring_id,
> i, k, j);
> if (r)
> return r;
>
> ring_id++;
> }
> }
> }
>
> - /* reserve GDS, GWS and OA resource for gfx */
> - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
> - &adev->gds.gds_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
> - &adev->gds.gws_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
> - &adev->gds.oa_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> adev->gfx.ce_ram_size = 0x8000;
>
> gfx_v7_0_gpu_early_init(adev);
>
> return r;
> }
>
> static int gfx_v7_0_sw_fini(void *handle)
> {
> int i;
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> -
> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> for (i = 0; i < adev->gfx.num_compute_rings; i++)
> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>
> gfx_v7_0_cp_compute_fini(adev);
> gfx_v7_0_rlc_fini(adev);
> gfx_v7_0_mec_fini(adev);
> amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
> &adev->gfx.rlc.clear_state_gpu_addr,
> @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
>
> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
> .type = AMDGPU_RING_TYPE_GFX,
> .align_mask = 0xff,
> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> .support_64bit_ptrs = false,
> .get_rptr = gfx_v7_0_ring_get_rptr,
> .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
> .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
> .emit_frame_size =
> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
> 5 + /* hdp invalidate */
> 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
> 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
> 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
> .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
> .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
> .test_ring = gfx_v7_0_ring_test_ring,
> .test_ib = gfx_v7_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
> .emit_wreg = gfx_v7_0_ring_emit_wreg,
> .soft_recovery = gfx_v7_0_ring_soft_recovery,
> };
>
> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
> .type = AMDGPU_RING_TYPE_COMPUTE,
> .align_mask = 0xff,
> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> .support_64bit_ptrs = false,
> .get_rptr = gfx_v7_0_ring_get_rptr,
> .get_wptr = gfx_v7_0_ring_get_wptr_compute,
> .set_wptr = gfx_v7_0_ring_set_wptr_compute,
> .emit_frame_size =
> - 20 + /* gfx_v7_0_ring_emit_gds_switch */
> 7 + /* gfx_v7_0_ring_emit_hdp_flush */
> 5 + /* hdp invalidate */
> 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
> 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
> .emit_ib = gfx_v7_0_ring_emit_ib_compute,
> .emit_fence = gfx_v7_0_ring_emit_fence_compute,
> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
> .test_ring = gfx_v7_0_ring_test_ring,
> .test_ib = gfx_v7_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_wreg = gfx_v7_0_ring_emit_wreg,
> };
>
> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
> {
> @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
> adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
> }
>
> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
> {
> /* init asci gds info */
> adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
> adev->gds.gws.total_size = 64;
> adev->gds.oa.total_size = 16;
>
> - if (adev->gds.mem.total_size == 64 * 1024) {
> - adev->gds.mem.gfx_partition_size = 4096;
> - adev->gds.mem.cs_partition_size = 4096;
> -
> - adev->gds.gws.gfx_partition_size = 4;
> - adev->gds.gws.cs_partition_size = 4;
> -
> - adev->gds.oa.gfx_partition_size = 4;
> - adev->gds.oa.cs_partition_size = 1;
> - } else {
> - adev->gds.mem.gfx_partition_size = 1024;
> - adev->gds.mem.cs_partition_size = 1024;
> -
> - adev->gds.gws.gfx_partition_size = 16;
> - adev->gds.gws.cs_partition_size = 16;
> -
> - adev->gds.oa.gfx_partition_size = 4;
> - adev->gds.oa.cs_partition_size = 4;
> - }
> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID;
> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID;
> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID;
> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID;
> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */
> + adev->gds.oa.kfd_size_per_vmid = 0;
> }
>
> -
> static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
> {
> int i, j, k, counter, active_cu_number = 0;
> u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
> struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
> unsigned disable_masks[4 * 2];
> u32 ao_cu_num;
>
> if (adev->flags & AMD_IS_APU)
> ao_cu_num = 2;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 3882689b2d8f..b11a54bd0668 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle)
> kiq = &adev->gfx.kiq;
> r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
> if (r)
> return r;
>
> /* create MQD for all compute queues as well as KIQ for SRIOV case */
> r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
> if (r)
> return r;
>
> - /* reserve GDS, GWS and OA resource for gfx */
> - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
> - &adev->gds.gds_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
> - &adev->gds.gws_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
> - &adev->gds.oa_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> adev->gfx.ce_ram_size = 0x8000;
>
> r = gfx_v8_0_gpu_early_init(adev);
> if (r)
> return r;
>
> return 0;
> }
>
> static int gfx_v8_0_sw_fini(void *handle)
> {
> int i;
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> -
> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> for (i = 0; i < adev->gfx.num_compute_rings; i++)
> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>
> amdgpu_gfx_compute_mqd_sw_fini(adev);
> amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
> amdgpu_gfx_kiq_fini(adev);
>
> gfx_v8_0_mec_fini(adev);
> @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev)
> case CHIP_CARRIZO:
> case CHIP_STONEY:
> adev->gfx.config.double_offchip_lds_buf = 0;
> break;
> }
> }
>
> static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
> {
> u32 tmp, sh_static_mem_cfg;
> - int i;
> + int i, vmid;
>
> WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
>
> gfx_v8_0_tiling_mode_table_init(adev);
> gfx_v8_0_setup_rb(adev);
> gfx_v8_0_get_cu_info(adev);
> gfx_v8_0_config_init(adev);
> @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
>
> tmp = RREG32(mmSPI_ARB_PRIORITY);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
> WREG32(mmSPI_ARB_PRIORITY, tmp);
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
> + unsigned gds_size, gws_size, oa_size;
> +
> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) {
> + gds_size = adev->gds.mem.gfx_size_per_vmid;
> + gws_size = adev->gds.gws.gfx_size_per_vmid;
> + oa_size = adev->gds.oa.gfx_size_per_vmid;
> + } else {
> + gds_size = adev->gds.mem.kfd_size_per_vmid;
> + gws_size = adev->gds.gws.kfd_size_per_vmid;
> + oa_size = adev->gds.oa.kfd_size_per_vmid;
> + }
> +
> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * gds_size);
> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size);
> + WREG32(amdgpu_gds_reg_offset[vmid].gws,
> + (vmid * gws_size) |
> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
> + WREG32(amdgpu_gds_reg_offset[vmid].oa,
> + ((1 << oa_size) - 1) << (vmid * oa_size));
> + }
> }
>
> static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
> {
> u32 i, j, k;
> u32 mask;
>
> mutex_lock(&adev->grbm_idx_mutex);
> for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> @@ -5383,68 +5381,20 @@ static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
> uint64_t clock;
>
> mutex_lock(&adev->gfx.gpu_clock_mutex);
> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
> mutex_unlock(&adev->gfx.gpu_clock_mutex);
> return clock;
> }
>
> -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
> - uint32_t vmid,
> - uint32_t gds_base, uint32_t gds_size,
> - uint32_t gws_base, uint32_t gws_size,
> - uint32_t oa_base, uint32_t oa_size)
> -{
> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
> -
> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
> -
> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
> -
> - /* GDS Base */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, gds_base);
> -
> - /* GDS Size */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, gds_size);
> -
> - /* GWS */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
> -
> - /* OA */
> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> - WRITE_DATA_DST_SEL(0)));
> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
> - amdgpu_ring_write(ring, 0);
> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
> -}
> -
> static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
> {
> WREG32(mmSQ_IND_INDEX,
> (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
> (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
> (address << SQ_IND_INDEX__INDEX__SHIFT) |
> (SQ_IND_INDEX__FORCE_READ_MASK));
> return RREG32(mmSQ_IND_DATA);
> }
>
> @@ -7132,21 +7082,20 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
> 31 + /* DE_META */
> 3 + /* CNTX_CTRL */
> 5 + /* HDP_INVL */
> 8 + 8 + /* FENCE x2 */
> 2, /* SWITCH_BUFFER */
> .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
> .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
> .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
> .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
> - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
> .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
> .test_ring = gfx_v8_0_ring_test_ring,
> .test_ib = gfx_v8_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_switch_buffer = gfx_v8_ring_emit_sb,
> .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
> .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
> .emit_wreg = gfx_v8_0_ring_emit_wreg,
> @@ -7155,51 +7104,48 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> .type = AMDGPU_RING_TYPE_COMPUTE,
> .align_mask = 0xff,
> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> .support_64bit_ptrs = false,
> .get_rptr = gfx_v8_0_ring_get_rptr,
> .get_wptr = gfx_v8_0_ring_get_wptr_compute,
> .set_wptr = gfx_v8_0_ring_set_wptr_compute,
> .emit_frame_size =
> - 20 + /* gfx_v8_0_ring_emit_gds_switch */
> 7 + /* gfx_v8_0_ring_emit_hdp_flush */
> 5 + /* hdp_invalidate */
> 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
> VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
> 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
> .emit_ib = gfx_v8_0_ring_emit_ib_compute,
> .emit_fence = gfx_v8_0_ring_emit_fence_compute,
> .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
> - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
> .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
> .test_ring = gfx_v8_0_ring_test_ring,
> .test_ib = gfx_v8_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .set_priority = gfx_v8_0_ring_set_priority_compute,
> .emit_wreg = gfx_v8_0_ring_emit_wreg,
> };
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
> .type = AMDGPU_RING_TYPE_KIQ,
> .align_mask = 0xff,
> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> .support_64bit_ptrs = false,
> .get_rptr = gfx_v8_0_ring_get_rptr,
> .get_wptr = gfx_v8_0_ring_get_wptr_compute,
> .set_wptr = gfx_v8_0_ring_set_wptr_compute,
> .emit_frame_size =
> - 20 + /* gfx_v8_0_ring_emit_gds_switch */
> 7 + /* gfx_v8_0_ring_emit_hdp_flush */
> 5 + /* hdp_invalidate */
> 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
> 17 + /* gfx_v8_0_ring_emit_vm_flush */
> 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
> .emit_ib = gfx_v8_0_ring_emit_ib_compute,
> .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
> .test_ring = gfx_v8_0_ring_test_ring,
> .test_ib = gfx_v8_0_ring_test_ib,
> @@ -7278,39 +7224,26 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
> adev->gfx.rlc.funcs = &iceland_rlc_funcs;
> }
>
> static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
> {
> /* init asci gds info */
> adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
> adev->gds.gws.total_size = 64;
> adev->gds.oa.total_size = 16;
>
> - if (adev->gds.mem.total_size == 64 * 1024) {
> - adev->gds.mem.gfx_partition_size = 4096;
> - adev->gds.mem.cs_partition_size = 4096;
> -
> - adev->gds.gws.gfx_partition_size = 4;
> - adev->gds.gws.cs_partition_size = 4;
> -
> - adev->gds.oa.gfx_partition_size = 4;
> - adev->gds.oa.cs_partition_size = 1;
> - } else {
> - adev->gds.mem.gfx_partition_size = 1024;
> - adev->gds.mem.cs_partition_size = 1024;
> -
> - adev->gds.gws.gfx_partition_size = 16;
> - adev->gds.gws.cs_partition_size = 16;
> -
> - adev->gds.oa.gfx_partition_size = 4;
> - adev->gds.oa.cs_partition_size = 4;
> - }
> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID;
> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID;
> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID;
> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID;
> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */
> + adev->gds.oa.kfd_size_per_vmid = 0;
> }
>
> static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
> u32 bitmap)
> {
> u32 data;
>
> if (!bitmap)
> return;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 3594704a6f9b..48a7e25514f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1351,31 +1351,32 @@ static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
> NULL);
>
> memset(&adev->gfx.ngg.buf[0], 0,
> sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
>
> adev->gfx.ngg.init = false;
>
> return 0;
> }
>
> +/* TODO: remove */
> static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
> {
> int r;
>
> if (!amdgpu_ngg || adev->gfx.ngg.init == true)
> return 0;
>
> /* GDS reserve memory: 64 bytes alignment */
> adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
> adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
> - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
> + adev->gds.mem.gfx_size_per_vmid -= adev->gfx.ngg.gds_reserve_size;
> adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
> adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
>
> /* Primitive Buffer */
> r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
> amdgpu_prim_buf_per_se,
> 64 * 1024);
> if (r) {
> dev_err(adev->dev, "Failed to create Primitive Buffer\n");
> goto err;
> @@ -1412,20 +1413,21 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
> }
>
> out:
> adev->gfx.ngg.init = true;
> return 0;
> err:
> gfx_v9_0_ngg_fini(adev);
> return r;
> }
>
> +/* TODO: remove */
> static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
> {
> struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
> int r;
> u32 data, base;
>
> if (!amdgpu_ngg)
> return 0;
>
> /* Program buffer size */
> @@ -1469,23 +1471,22 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
> /* Clear GDS reserved memory */
> r = amdgpu_ring_alloc(ring, 17);
> if (r) {
> DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
> ring->idx, r);
> return r;
> }
>
> gfx_v9_0_write_data_to_reg(ring, 0, false,
> SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
> - (adev->gds.mem.total_size +
> - adev->gfx.ngg.gds_reserve_size) >>
> - AMDGPU_GDS_SHIFT);
> + adev->gds.mem.total_size +
> + adev->gfx.ngg.gds_reserve_size);
>
> amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
> amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
> PACKET3_DMA_DATA_DST_SEL(1) |
> PACKET3_DMA_DATA_SRC_SEL(2)));
> amdgpu_ring_write(ring, 0);
> amdgpu_ring_write(ring, 0);
> amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
> amdgpu_ring_write(ring, 0);
> amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
> @@ -1644,62 +1645,39 @@ static int gfx_v9_0_sw_init(void *handle)
> kiq = &adev->gfx.kiq;
> r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
> if (r)
> return r;
>
> /* create MQD for all compute queues as wel as KIQ for SRIOV case */
> r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
> if (r)
> return r;
>
> - /* reserve GDS, GWS and OA resource for gfx */
> - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
> - &adev->gds.gds_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
> - &adev->gds.gws_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
> - &adev->gds.oa_gfx_bo, NULL, NULL);
> - if (r)
> - return r;
> -
> adev->gfx.ce_ram_size = 0x8000;
>
> r = gfx_v9_0_gpu_early_init(adev);
> if (r)
> return r;
>
> r = gfx_v9_0_ngg_init(adev);
> if (r)
> return r;
>
> return 0;
> }
>
>
> static int gfx_v9_0_sw_fini(void *handle)
> {
> int i;
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> -
> for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> for (i = 0; i < adev->gfx.num_compute_rings; i++)
> amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>
> amdgpu_gfx_compute_mqd_sw_fini(adev);
> amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
> amdgpu_gfx_kiq_fini(adev);
>
> gfx_v9_0_mec_fini(adev);
> @@ -1813,21 +1791,21 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
> WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
> WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
> }
> soc15_grbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
> }
>
> static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
> {
> u32 tmp;
> - int i;
> + int i, vmid;
>
> WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
>
> gfx_v9_0_tiling_mode_table_init(adev);
>
> gfx_v9_0_setup_rb(adev);
> gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
> adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
>
> /* XXX SH_MEM regs */
> @@ -1869,20 +1847,43 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
> (adev->gfx.config.sc_prim_fifo_size_frontend <<
> PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
> (adev->gfx.config.sc_prim_fifo_size_backend <<
> PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
> (adev->gfx.config.sc_hiz_tile_fifo_size <<
> PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
> (adev->gfx.config.sc_earlyz_tile_fifo_size <<
> PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) {
> + unsigned gds_size, gws_size, oa_size;
> +
> + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) {
> + gds_size = adev->gds.mem.gfx_size_per_vmid;
> + gws_size = adev->gds.gws.gfx_size_per_vmid;
> + oa_size = adev->gds.oa.gfx_size_per_vmid;
> + } else {
> + gds_size = adev->gds.mem.kfd_size_per_vmid;
> + gws_size = adev->gds.gws.kfd_size_per_vmid;
> + oa_size = adev->gds.oa.kfd_size_per_vmid;
> + }
> +
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid,
> + vmid * gds_size);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid,
> + gds_size);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid,
> + (vmid * gws_size) |
> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT));
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid,
> + ((1 << oa_size) - 1) << (vmid * oa_size));
> + }
> }
>
> static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
> {
> u32 i, j, k;
> u32 mask;
>
> mutex_lock(&adev->grbm_idx_mutex);
> for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> @@ -3407,58 +3408,20 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
> uint64_t clock;
>
> mutex_lock(&adev->gfx.gpu_clock_mutex);
> WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
> clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
> ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
> mutex_unlock(&adev->gfx.gpu_clock_mutex);
> return clock;
> }
>
> -static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
> - uint32_t vmid,
> - uint32_t gds_base, uint32_t gds_size,
> - uint32_t gws_base, uint32_t gws_size,
> - uint32_t oa_base, uint32_t oa_size)
> -{
> - struct amdgpu_device *adev = ring->adev;
> -
> - gds_base = gds_base >> AMDGPU_GDS_SHIFT;
> - gds_size = gds_size >> AMDGPU_GDS_SHIFT;
> -
> - gws_base = gws_base >> AMDGPU_GWS_SHIFT;
> - gws_size = gws_size >> AMDGPU_GWS_SHIFT;
> -
> - oa_base = oa_base >> AMDGPU_OA_SHIFT;
> - oa_size = oa_size >> AMDGPU_OA_SHIFT;
> -
> - /* GDS Base */
> - gfx_v9_0_write_data_to_reg(ring, 0, false,
> - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
> - gds_base);
> -
> - /* GDS Size */
> - gfx_v9_0_write_data_to_reg(ring, 0, false,
> - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
> - gds_size);
> -
> - /* GWS */
> - gfx_v9_0_write_data_to_reg(ring, 0, false,
> - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
> - gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
> -
> - /* OA */
> - gfx_v9_0_write_data_to_reg(ring, 0, false,
> - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
> - (1 << (oa_size + oa_base)) - (1 << oa_base));
> -}
> -
> static int gfx_v9_0_early_init(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
> gfx_v9_0_set_ring_funcs(adev);
> gfx_v9_0_set_irq_funcs(adev);
> gfx_v9_0_set_gds_init(adev);
> gfx_v9_0_set_rlc_funcs(adev);
> @@ -4695,21 +4658,20 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> 31 + /* DE_META */
> 3 + /* CNTX_CTRL */
> 5 + /* HDP_INVL */
> 8 + 8 + /* FENCE x2 */
> 2, /* SWITCH_BUFFER */
> .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
> .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> .emit_fence = gfx_v9_0_ring_emit_fence,
> .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> - .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> .test_ring = gfx_v9_0_ring_test_ring,
> .test_ib = gfx_v9_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_switch_buffer = gfx_v9_ring_emit_sb,
> .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> .emit_tmz = gfx_v9_0_ring_emit_tmz,
> @@ -4722,34 +4684,32 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
> .type = AMDGPU_RING_TYPE_COMPUTE,
> .align_mask = 0xff,
> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> .support_64bit_ptrs = true,
> .vmhub = AMDGPU_GFXHUB,
> .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> .emit_frame_size =
> - 20 + /* gfx_v9_0_ring_emit_gds_switch */
> 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> 5 + /* hdp invalidate */
> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> 2 + /* gfx_v9_0_ring_emit_vm_flush */
> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
> .emit_ib = gfx_v9_0_ring_emit_ib_compute,
> .emit_fence = gfx_v9_0_ring_emit_fence,
> .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> - .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> .test_ring = gfx_v9_0_ring_test_ring,
> .test_ib = gfx_v9_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .set_priority = gfx_v9_0_ring_set_priority_compute,
> .emit_wreg = gfx_v9_0_ring_emit_wreg,
> .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> };
> @@ -4757,21 +4717,20 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
> static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
> .type = AMDGPU_RING_TYPE_KIQ,
> .align_mask = 0xff,
> .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> .support_64bit_ptrs = true,
> .vmhub = AMDGPU_GFXHUB,
> .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> .emit_frame_size =
> - 20 + /* gfx_v9_0_ring_emit_gds_switch */
> 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> 5 + /* hdp invalidate */
> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> 2 + /* gfx_v9_0_ring_emit_vm_flush */
> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
> .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
> .emit_ib = gfx_v9_0_ring_emit_ib_compute,
> .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
> @@ -4847,39 +4806,26 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
> }
> }
>
> static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
> {
> /* init asci gds info */
> adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
> adev->gds.gws.total_size = 64;
> adev->gds.oa.total_size = 16;
>
> - if (adev->gds.mem.total_size == 64 * 1024) {
> - adev->gds.mem.gfx_partition_size = 4096;
> - adev->gds.mem.cs_partition_size = 4096;
> -
> - adev->gds.gws.gfx_partition_size = 4;
> - adev->gds.gws.cs_partition_size = 4;
> -
> - adev->gds.oa.gfx_partition_size = 4;
> - adev->gds.oa.cs_partition_size = 1;
> - } else {
> - adev->gds.mem.gfx_partition_size = 1024;
> - adev->gds.mem.cs_partition_size = 1024;
> -
> - adev->gds.gws.gfx_partition_size = 16;
> - adev->gds.gws.cs_partition_size = 16;
> -
> - adev->gds.oa.gfx_partition_size = 4;
> - adev->gds.oa.cs_partition_size = 4;
> - }
> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID;
> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID;
> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID;
> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID;
> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */
> + adev->gds.oa.kfd_size_per_vmid = 0;
> }
>
> static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
> u32 bitmap)
> {
> u32 data;
>
> if (!bitmap)
> return;
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 94444eeba55b..9b9512b14cae 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -81,36 +81,27 @@ extern "C" {
> * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible.
> * Memory in this pool could be swapped out to disk if there is pressure.
> *
> * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the
> * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
> * pages of system memory, allows GPU access system memory in a linezrized
> * fashion.
> *
> * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory
> * carved out by the BIOS.
> - *
> - * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data
> - * across shader threads.
> - *
> - * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the
> - * execution of all the waves on a device.
> - *
> - * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines
> - * for appending data.
> */
> #define AMDGPU_GEM_DOMAIN_CPU 0x1
> #define AMDGPU_GEM_DOMAIN_GTT 0x2
> #define AMDGPU_GEM_DOMAIN_VRAM 0x4
> -#define AMDGPU_GEM_DOMAIN_GDS 0x8
> -#define AMDGPU_GEM_DOMAIN_GWS 0x10
> -#define AMDGPU_GEM_DOMAIN_OA 0x20
> +#define AMDGPU_GEM_DOMAIN_GDS 0x8 /* non-functional */
> +#define AMDGPU_GEM_DOMAIN_GWS 0x10 /* non-functional */
> +#define AMDGPU_GEM_DOMAIN_OA 0x20 /* non-functional */
> #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \
> AMDGPU_GEM_DOMAIN_GTT | \
> AMDGPU_GEM_DOMAIN_VRAM | \
> AMDGPU_GEM_DOMAIN_GDS | \
> AMDGPU_GEM_DOMAIN_GWS | \
> AMDGPU_GEM_DOMAIN_OA)
>
> /* Flag that CPU access will be required for the case of VRAM domain */
> #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0)
> /* Flag that CPU access will not work, this VRAM domain is invisible */
More information about the amd-gfx
mailing list