[PATCH 3/6] drm/amdgpu: untie user ring ids from kernel ring ids v4

Thu Apr 13 22:04:33 UTC 2017

Add amdgpu_queue_mgr, a mechanism that allows disjointing usermode's
ring ids from the kernel's ring ids.

The queue manager maintains a per-file descriptor map of user ring ids
to amdgpu_ring pointers. Once a map is created it is permanent (this is
required to maintain FIFO execution guarantees for a context's ring).

Different queue map policies can be configured for each HW IP.
Currently all HW IPs use the identity mapper, i.e. kernel ring id is
equal to the user ring id.

The purpose of this mechanism is to distribute the load across multiple
queues more effectively for HW IPs that support multiple rings.
Userspace clients are unable to check whether a specific resource is in
use by a different client. Therefore, it is up to the kernel driver to
make the optimal choice.

v2: remove amdgpu_queue_mapper_funcs
v3: made amdgpu_queue_mgr per context instead of per-fd
v4: add context_put on error paths

Reviewed-by: Christian König <christian.koenig at amd.com>
Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  27 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 117 +++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 230 ++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      |  45 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h      |   2 +
 7 files changed, 335 insertions(+), 95 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 660786a..dd48eb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -7,41 +7,42 @@ FULL_AMD_PATH=$(src)/..
 ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/scheduler \
 	-I$(FULL_AMD_PATH)/powerplay/inc \
 	-I$(FULL_AMD_PATH)/acp/include
 
 amdgpu-y := amdgpu_drv.o
 
 # add KMS driver
 amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
 	atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
 	amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
 	amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
 	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
+	amdgpu_queue_mgr.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
 	amdgpu_amdkfd_gfx_v7.o
 
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 amdgpu-y += \
 	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o
 
 # add GMC block
 amdgpu-y += \
 	gmc_v7_0.o \
 	gmc_v8_0.o \
 	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
 
 # add IH block
 amdgpu-y += \
 	amdgpu_irq.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0a58575..1d9053f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -756,52 +756,76 @@ struct amdgpu_ib {
 	uint32_t			length_dw;
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
 	uint32_t			flags;
 };
 
 extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct dma_fence **f);
 
 /*
+ * Queue manager
+ */
+struct amdgpu_queue_mapper {
+	int 		hw_ip;
+	struct mutex	lock;
+	/* protected by lock */
+	struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
+};
+
+struct amdgpu_queue_mgr {
+	struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
+};
+
+int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr);
+int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr);
+int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
+			 struct amdgpu_queue_mgr *mgr,
+			 int hw_ip, int instance, int ring,
+			 struct amdgpu_ring **out_ring);
+
+/*
  * context related structures
  */
 
 struct amdgpu_ctx_ring {
 	uint64_t		sequence;
 	struct dma_fence	**fences;
 	struct amd_sched_entity	entity;
 };
 
 struct amdgpu_ctx {
 	struct kref		refcount;
 	struct amdgpu_device    *adev;
+	struct amdgpu_queue_mgr queue_mgr;
 	unsigned		reset_counter;
 	spinlock_t		ring_lock;
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
 	bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
 	struct amdgpu_device	*adev;
 	struct mutex		lock;
 	/* protected by lock */
 	struct idr		ctx_handles;
 };
 
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 			      struct dma_fence *fence);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
@@ -1866,43 +1890,40 @@ static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
 #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
 #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
 #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
 #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
 #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
 #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 
 /* Common functions */
 int amdgpu_gpu_reset(struct amdgpu_device *adev);
 bool amdgpu_need_backup(struct amdgpu_device *adev);
 void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-		       u32 ip_instance, u32 ring,
-		       struct amdgpu_ring **out_ring);
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem);
 void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
 void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec71b93..3845965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -13,112 +13,40 @@
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *    Jerome Glisse <glisse at freedesktop.org>
  */
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-		       u32 ip_instance, u32 ring,
-		       struct amdgpu_ring **out_ring)
-{
-	/* Right now all IPs have only one instance - multiple rings. */
-	if (ip_instance != 0) {
-		DRM_ERROR("invalid ip instance: %d\n", ip_instance);
-		return -EINVAL;
-	}
-
-	switch (ip_type) {
-	default:
-		DRM_ERROR("unknown ip type: %d\n", ip_type);
-		return -EINVAL;
-	case AMDGPU_HW_IP_GFX:
-		if (ring < adev->gfx.num_gfx_rings) {
-			*out_ring = &adev->gfx.gfx_ring[ring];
-		} else {
-			DRM_ERROR("only %d gfx rings are supported now\n",
-				  adev->gfx.num_gfx_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		if (ring < adev->gfx.num_compute_rings) {
-			*out_ring = &adev->gfx.compute_ring[ring];
-		} else {
-			DRM_ERROR("only %d compute rings are supported now\n",
-				  adev->gfx.num_compute_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_DMA:
-		if (ring < adev->sdma.num_instances) {
-			*out_ring = &adev->sdma.instance[ring].ring;
-		} else {
-			DRM_ERROR("only %d SDMA rings are supported\n",
-				  adev->sdma.num_instances);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.ring;
-		break;
-	case AMDGPU_HW_IP_VCE:
-		if (ring < adev->vce.num_rings){
-			*out_ring = &adev->vce.ring[ring];
-		} else {
-			DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		if (ring < adev->uvd.num_enc_rings){
-			*out_ring = &adev->uvd.ring_enc[ring];
-		} else {
-			DRM_ERROR("only %d UVD ENC rings are supported\n",
-				adev->uvd.num_enc_rings);
-			return -EINVAL;
-		}
-		break;
-	}
-
-	if (!(*out_ring && (*out_ring)->adev)) {
-		DRM_ERROR("Ring %d is not initialized on IP %d\n",
-			  ring, ip_type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
 				      uint32_t *offset)
 {
 	struct drm_gem_object *gobj;
 	unsigned long size;
 
 	gobj = drm_gem_object_lookup(p->filp, data->handle);
 	if (gobj == NULL)
 		return -EINVAL;
 
 	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
 	p->uf_entry.tv.shared = true;
 	p->uf_entry.user_pages = NULL;
 
 	size = amdgpu_bo_size(p->uf_entry.robj);
 	if (size != PAGE_SIZE || (data->offset + 8) > size)
 		return -EINVAL;
@@ -899,43 +827,42 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		chunk = &parser->chunks[i];
 		ib = &parser->job->ibs[j];
 		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 			continue;
 
 		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
 			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 					ce_preempt++;
 				else
 					de_preempt++;
 			}
 
 			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
 			if (ce_preempt > 1 || de_preempt > 1)
 				return -EINVAL;
 		}
 
-		r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
-				       chunk_ib->ip_instance, chunk_ib->ring,
-				       &ring);
+		r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
+					 chunk_ib->ip_instance, chunk_ib->ring, &ring);
 		if (r)
 			return r;
 
 		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
 			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
 			if (!parser->ctx->preamble_presented) {
 				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
 				parser->ctx->preamble_presented = true;
 			}
 		}
 
 		if (parser->job->ring && parser->job->ring != ring)
 			return -EINVAL;
 
 		parser->job->ring = ring;
 
 		if (ring->funcs->parse_cs) {
 			struct amdgpu_bo_va_mapping *m;
 			struct amdgpu_bo *aobj = NULL;
 			uint64_t offset;
@@ -1003,50 +930,53 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 
 	for (i = 0; i < p->nchunks; ++i) {
 		struct drm_amdgpu_cs_chunk_dep *deps;
 		struct amdgpu_cs_chunk *chunk;
 		unsigned num_deps;
 
 		chunk = &p->chunks[i];
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
 			continue;
 
 		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
 		num_deps = chunk->length_dw * 4 /
 			sizeof(struct drm_amdgpu_cs_chunk_dep);
 
 		for (j = 0; j < num_deps; ++j) {
 			struct amdgpu_ring *ring;
 			struct amdgpu_ctx *ctx;
 			struct dma_fence *fence;
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
-			if (r)
-				return r;
-
 			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
 			if (ctx == NULL)
 				return -EINVAL;
 
+			r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
+						 deps[j].ip_type,
+						 deps[j].ip_instance,
+						 deps[j].ring, &ring);
+			if (r) {
+				amdgpu_ctx_put(ctx);
+				return r;
+			}
+
 			fence = amdgpu_ctx_get_fence(ctx, ring,
 						     deps[j].handle);
 			if (IS_ERR(fence)) {
 				r = PTR_ERR(fence);
 				amdgpu_ctx_put(ctx);
 				return r;
 
 			} else if (fence) {
 				r = amdgpu_sync_fence(adev, &p->job->sync,
 						      fence);
 				dma_fence_put(fence);
 				amdgpu_ctx_put(ctx);
 				if (r)
 					return r;
 			}
 		}
 	}
 
 	return 0;
 }
@@ -1138,93 +1068,98 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 /**
  * amdgpu_cs_wait_ioctl - wait for a command submission to finish
  *
  * @dev: drm device
  * @data: data from userspace
  * @filp: file private
  *
  * Wait for the command submission identified by handle to finish.
  */
 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *filp)
 {
 	union drm_amdgpu_wait_cs *wait = data;
 	struct amdgpu_device *adev = dev->dev_private;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	long r;
 
-	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
-			       wait->in.ring, &ring);
-	if (r)
-		return r;
-
 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
 	if (ctx == NULL)
 		return -EINVAL;
 
+	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
+				 wait->in.ip_type, wait->in.ip_instance,
+				 wait->in.ring, &ring);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return r;
+	}
+
 	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 	else if (fence) {
 		r = dma_fence_wait_timeout(fence, true, timeout);
 		dma_fence_put(fence);
 	} else
 		r = 1;
 
 	amdgpu_ctx_put(ctx);
 	if (r < 0)
 		return r;
 
 	memset(wait, 0, sizeof(*wait));
 	wait->out.status = (r == 0);
 
 	return 0;
 }
 
 /**
  * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
  *
  * @adev: amdgpu device
  * @filp: file private
  * @user: drm_amdgpu_fence copied from user space
  */
 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 					     struct drm_file *filp,
 					     struct drm_amdgpu_fence *user)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	int r;
 
-	r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
-			       user->ring, &ring);
-	if (r)
-		return ERR_PTR(r);
-
 	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
 	if (ctx == NULL)
 		return ERR_PTR(-EINVAL);
 
+	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
+				 user->ip_instance, user->ring, &ring);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return ERR_PTR(r);
+	}
+
 	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
 	amdgpu_ctx_put(ctx);
 
 	return fence;
 }
 
 /**
  * amdgpu_cs_wait_all_fence - wait on all fences to signal
  *
  * @adev: amdgpu device
  * @filp: file private
  * @wait: wait parameters
  * @fences: array of drm_amdgpu_fence
  */
 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
 				     struct drm_file *filp,
 				     union drm_amdgpu_wait_fences *wait,
 				     struct drm_amdgpu_fence *fences)
 {
 	uint32_t fence_count = wait->in.fence_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 90d1ac8..1969f27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -41,68 +41,74 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 
 	/* create context entity for each ring */
 	for (i = 0; i < adev->num_rings; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amd_sched_rq *rq;
 
 		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 					  rq, amdgpu_sched_jobs);
 		if (r)
 			goto failed;
 	}
 
+	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
+	if (r)
+		goto failed;
+
 	return 0;
 
 failed:
 	for (j = 0; j < i; j++)
 		amd_sched_entity_fini(&adev->rings[j]->sched,
 				      &ctx->rings[j].entity);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	return r;
 }
 
 static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 {
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
 	if (!adev)
 		return;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
 			dma_fence_put(ctx->rings[i].fences[j]);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 
 	for (i = 0; i < adev->num_rings; i++)
 		amd_sched_entity_fini(&adev->rings[i]->sched,
 				      &ctx->rings[i].entity);
+
+	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
 			    uint32_t *id)
 {
 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 	struct amdgpu_ctx *ctx;
 	int r;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
 	mutex_lock(&mgr->lock);
 	r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
 	if (r < 0) {
 		mutex_unlock(&mgr->lock);
 		kfree(ctx);
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
new file mode 100644
index 0000000..3e9ac80
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ring.h"
+
+static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
+				    int hw_ip)
+{
+	if (!mapper)
+		return -EINVAL;
+
+	if (hw_ip > AMDGPU_MAX_IP_NUM)
+		return -EINVAL;
+
+	mapper->hw_ip = hw_ip;
+	mutex_init(&mapper->lock);
+
+	memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
+
+	return 0;
+}
+
+static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
+					  int ring)
+{
+	return mapper->queue_map[ring];
+}
+
+static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
+			     int ring, struct amdgpu_ring *pring)
+{
+	if (WARN_ON(mapper->queue_map[ring])) {
+		DRM_ERROR("Un-expected ring re-map\n");
+		return -EINVAL;
+	}
+
+	mapper->queue_map[ring] = pring;
+
+	return 0;
+}
+
+static int amdgpu_identity_map(struct amdgpu_device *adev,
+			       struct amdgpu_queue_mapper *mapper,
+			       int ring,
+			       struct amdgpu_ring **out_ring)
+{
+	switch (mapper->hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		*out_ring = &adev->gfx.gfx_ring[ring];
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		*out_ring = &adev->gfx.compute_ring[ring];
+		break;
+	case AMDGPU_HW_IP_DMA:
+		*out_ring = &adev->sdma.instance[ring].ring;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		*out_ring = &adev->uvd.ring;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		*out_ring = &adev->vce.ring[ring];
+		break;
+	default:
+		*out_ring = NULL;
+		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
+		return -EINVAL;
+	}
+
+	return amdgpu_update_cached_map(mapper, ring, *out_ring);
+}
+
+/**
+ * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ *
+ * Initialize the the selected @mgr (all asics).
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr)
+{
+	int i, r;
+
+	if (!adev || !mgr)
+		return -EINVAL;
+
+	memset(mgr, 0, sizeof(*mgr));
+
+	for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
+		r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ *
+ * De-initialize the the selected @mgr (all asics).
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr)
+{
+	return 0;
+}
+
+/**
+ * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ * @hw_ip: HW IP enum
+ * @instance: HW instance
+ * @ring: user ring id
+ * @our_ring: pointer to mapped amdgpu_ring
+ *
+ * Map a userspace ring id to an appropriate kernel ring. Different
+ * policies are configurable at a HW IP level.
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
+			 struct amdgpu_queue_mgr *mgr,
+			 int hw_ip, int instance, int ring,
+			 struct amdgpu_ring **out_ring)
+{
+	int r, ip_num_rings;
+	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
+
+	if (!adev || !mgr || !out_ring)
+		return -EINVAL;
+
+	if (hw_ip >= AMDGPU_MAX_IP_NUM)
+		return -EINVAL;
+
+	if (ring >= AMDGPU_MAX_RINGS)
+		return -EINVAL;
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_ERROR("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		ip_num_rings = adev->gfx.num_gfx_rings;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		ip_num_rings = adev->gfx.num_compute_rings;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		ip_num_rings = adev->sdma.num_instances;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		ip_num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		ip_num_rings = adev->vce.num_rings;
+		break;
+	default:
+		DRM_ERROR("unknown ip type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	if (ring >= ip_num_rings) {
+		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
+				ring, ip_num_rings, hw_ip);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mapper->lock);
+
+	*out_ring = amdgpu_get_cached_map(mapper, ring);
+	if (*out_ring) {
+		/* cache hit */
+		r = 0;
+		goto out_unlock;
+	}
+
+	switch (mapper->hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_HW_IP_COMPUTE:
+	case AMDGPU_HW_IP_DMA:
+	case AMDGPU_HW_IP_UVD:
+	case AMDGPU_HW_IP_VCE:
+		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
+		break;
+	default:
+		*out_ring = NULL;
+		r = -EINVAL;
+		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
+	}
+
+out_unlock:
+	mutex_unlock(&mapper->lock);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6a85db0..12fc815 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -35,40 +35,85 @@
 #include "atom.h"
 
 /*
  * Rings
  * Most engines on the GPU are fed via ring buffers.  Ring
  * buffers are areas of GPU accessible memory that the host
  * writes commands into and the GPU reads commands out of.
  * There is a rptr (read pointer) that determines where the
  * GPU is currently reading, and a wptr (write pointer)
  * which determines where the host has written.  When the
  * pointers are equal, the ring is idle.  When the host
  * writes commands to the ring buffer, it increments the
  * wptr.  The GPU then starts fetching commands and executes
  * them until the pointers are equal again.
  */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring);
 static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
 
 /**
+ * amdgpu_ring_is_valid_index - check if a ring idex is valid for a HW IP
+ *
+ * @adev: amdgpu_device pointer
+ * @ip_type: The HW IP to check against
+ * @ring: the ring index
+ *
+ * Check if @ring is a valid index for @ip_type (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
+			       int ip_type, int ring)
+{
+	int ip_num_rings;
+
+	switch (ip_type) {
+	case AMDGPU_HW_IP_GFX:
+		ip_num_rings = adev->gfx.num_gfx_rings;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		ip_num_rings = adev->gfx.num_compute_rings;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		ip_num_rings = adev->sdma.num_instances;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		ip_num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		ip_num_rings = adev->vce.num_rings;
+		break;
+	default:
+		DRM_ERROR("unknown ip type: %d\n", ip_type);
+		return -EINVAL;
+	}
+
+	if (ring >= ip_num_rings) {
+		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
+				ring, ip_num_rings, ip_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * amdgpu_ring_alloc - allocate space on the ring buffer
  *
  * @adev: amdgpu_device pointer
  * @ring: amdgpu_ring structure holding ring information
  * @ndw: number of dwords to allocate in the ring buffer
  *
  * Allocate @ndw dwords in the ring buffer (all asics).
  * Returns 0 on success, error on failure.
  */
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 {
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
 	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
 
 	/* Make sure we aren't trying to allocate more space
 	 * than the maximum for one submission
 	 */
 	if (WARN_ON_ONCE(ndw > ring->max_dw))
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 944443c..4de0d83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -168,38 +168,40 @@ struct amdgpu_ring {
 	u32			queue;
 	struct amdgpu_bo	*mqd_obj;
 	uint64_t                mqd_gpu_addr;
 	void                    *mqd_ptr;
 	uint64_t                eop_gpu_addr;
 	u32			doorbell_index;
 	bool			use_doorbell;
 	unsigned		wptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
 	char			name[16];
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
 };
 
+int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
+			       int hw_ip, int ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
 	while (i <= ring->buf_mask)
 		ring->ring[i++] = ring->funcs->nop;
 
 }
 
 #endif
-- 
2.9.3