[PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL
Christian König
deathsimple at vodafone.de
Mon Aug 29 08:10:01 UTC 2016
Am 29.08.2016 um 04:55 schrieb Monk Liu:
> use CONTEXT_CONTROL package to dynamically skip
> preamble IB and other load_xxx command in sequence.
>
> Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
Again, please rebase on top of amd-staging-4.6. Apart from that I need
to take a closer look later today.
BTW: Where is patch #3?
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++-------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 32 ++++++++++++++++++++++++++++++++
> 4 files changed, 62 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9132719..a9dfeb5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -339,6 +339,7 @@ struct amdgpu_ring_funcs {
> void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
> void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> void (*emit_switch_buffer) (struct amdgpu_ring *ring);
> + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
> };
>
> /*
> @@ -1050,6 +1051,7 @@ struct amdgpu_ctx {
> spinlock_t ring_lock;
> struct fence **fences;
> struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
> + bool preamble_presented;
> };
>
> struct amdgpu_ctx_mgr {
> @@ -1320,8 +1322,13 @@ struct amdgpu_cs_parser {
>
> /* user fence */
> struct amdgpu_bo_list_entry uf_entry;
> + bool preamble_present; /* True means this command submit involves a preamble IB */
> };
>
> +#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
> +#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
> +#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
> +
> struct amdgpu_job {
> struct amd_sched_job base;
> struct amdgpu_device *adev;
> @@ -1330,6 +1337,7 @@ struct amdgpu_job {
> struct amdgpu_sync sync;
> struct amdgpu_ib *ibs;
> struct fence *fence; /* the hw fence */
> + uint32_t preamble_status;
> uint32_t num_ibs;
> void *owner;
> uint64_t fence_ctx; /* the fence_context this job uses */
> @@ -2374,6 +2382,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
> #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
> #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
> #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
> +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 78d3831..f2d739a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -711,6 +711,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> if (r)
> return r;
>
> + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> + parser->preamble_present = true;
> +
> if (parser->job->ring && parser->job->ring != ring)
> return -EINVAL;
>
> @@ -849,6 +852,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> return r;
> }
>
> + if (p->preamble_present) {
> + job->preamble_status |= PREAMBLE_IB_PRESENT;
> + if (!p->ctx->preamble_presented)
> + job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
> + }
> +
> job->owner = p->filp;
> job->fence_ctx = entity->fence_context;
> p->fence = fence_get(&job->base.s_fence->finished);
> @@ -859,6 +868,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> trace_amdgpu_cs_ioctl(job);
> amd_sched_entity_push_job(&job->base);
>
> + if (p->preamble_present)
> + p->ctx->preamble_presented = true;
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index df379c7..7c501ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> {
> struct amdgpu_device *adev = ring->adev;
> struct amdgpu_ib *ib = &ibs[0];
> - bool skip_preamble, need_ctx_switch;
> + bool need_ctx_switch;
> unsigned patch_offset = ~0;
> struct amdgpu_vm *vm;
> uint64_t fence_ctx;
> + uint32_t status = 0;
>
> unsigned i;
> int r = 0;
> @@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> /* always set cond_exec_polling to CONTINUE */
> *ring->cond_exe_cpu_addr = 1;
>
> - skip_preamble = ring->current_ctx == fence_ctx;
> need_ctx_switch = ring->current_ctx != fence_ctx;
> + if (job && ring->funcs->emit_cntxcntl) {
> + if (need_ctx_switch)
> + status |= HAVE_CTX_SWITCH;
> + status |= job->preamble_status;
> + amdgpu_ring_emit_cntxcntl(ring, status);
> + }
> +
> for (i = 0; i < num_ibs; ++i) {
> ib = &ibs[i];
> -
> - /* drop preamble IBs if we don't have a context switch */
> - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
> - continue;
> -
> amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
> need_ctx_switch);
> need_ctx_switch = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index ce1e616..8f6d860 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6164,6 +6164,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
> amdgpu_ring_write(ring, 0);
> }
>
> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> +{
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs for GFX */
> + if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
> + dw2 |= 0x10002;
> +
> + /* set load_ce_ram if preamble presented */
> + if (PREAMBLE_IB_PRESENT & flags)
> + dw2 |= 0x10000000;
> + } else {
> + /* still load_ce_ram if this is the first time preamble presented
> + * although there is no context switch happens.
> + */
> + if (PREAMBLE_IB_PRESENT_FIRST & flags)
> + dw2 |= 0x10000000;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
> enum amdgpu_interrupt_state state)
> {
> @@ -6472,6 +6502,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_switch_buffer = gfx_v8_ring_emit_sb,
> + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> @@ -6490,6 +6521,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> .test_ib = gfx_v8_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
More information about the amd-gfx
mailing list