[PATCH 4/5] drm/amd/sriov porting sriov cap to vcn3.0

Leo Liu leo.liu at amd.com
Tue Jul 14 15:06:56 UTC 2020


This patch is:

Reviewed-by: Leo Liu <leo.liu at amd.com>

On 2020-07-13 10:47 p.m., Jack Zhang wrote:
> 1.In early_init and for sriov, hardcode
>    harvest_config=0, enc_num=1
>
> 2.sw_init/fini
>    alloc & free mm_table for sriov
>    doorbell setting for sriov
>
> 3.hw_init/fini
>    Under sriov, add start_sriov to config mmsch
>    Skip ring_test to avoid mmio in VF, but need to initialize wptr for vcn rings.
>
> 4.Implementation for vcn_v3_0_start_sriov
>
> V2:Clean-up some uneccessary funciton declaration.
>
> Signed-off-by: Jack Zhang <Jack.Zhang1 at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 350 +++++++++++++++++++++++---
>   1 file changed, 318 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> index 90fe95f345e3..0a0ca10bf55b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> @@ -28,6 +28,7 @@
>   #include "soc15.h"
>   #include "soc15d.h"
>   #include "vcn_v2_0.h"
> +#include "mmsch_v3_0.h"
>   
>   #include "vcn/vcn_3_0_0_offset.h"
>   #include "vcn/vcn_3_0_0_sh_mask.h"
> @@ -48,6 +49,17 @@
>   
>   #define VCN_INSTANCES_SIENNA_CICHLID	 				2
>   
> +static int amdgpu_ih_clientid_vcns[] = {
> +	SOC15_IH_CLIENTID_VCN,
> +	SOC15_IH_CLIENTID_VCN1
> +};
> +
> +static int amdgpu_ucode_id_vcns[] = {
> +       AMDGPU_UCODE_ID_VCN,
> +       AMDGPU_UCODE_ID_VCN1
> +};
> +
> +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
>   static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
>   static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
>   static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
> @@ -56,10 +68,8 @@ static int vcn_v3_0_set_powergating_state(void *handle,
>   static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
>   			int inst_idx, struct dpg_pause_state *new_state);
>   
> -static int amdgpu_ih_clientid_vcns[] = {
> -	SOC15_IH_CLIENTID_VCN,
> -	SOC15_IH_CLIENTID_VCN1
> -};
> +static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
> +static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
>   
>   /**
>    * vcn_v3_0_early_init - set function pointers
> @@ -71,25 +81,33 @@ static int amdgpu_ih_clientid_vcns[] = {
>   static int vcn_v3_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -	if (adev->asic_type == CHIP_SIENNA_CICHLID) {
> -		u32 harvest;
> -		int i;
>   
> +	if (amdgpu_sriov_vf(adev)) {
>   		adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
> -		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> -			harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
> -			if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
> -				adev->vcn.harvest_config |= 1 << i;
> -		}
> +		adev->vcn.harvest_config = 0;
> +		adev->vcn.num_enc_rings = 1;
>   
> -		if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
> -			 AMDGPU_VCN_HARVEST_VCN1))
> -			/* both instances are harvested, disable the block */
> -			return -ENOENT;
> -	} else
> -		adev->vcn.num_vcn_inst = 1;
> +	} else {
> +		if (adev->asic_type == CHIP_SIENNA_CICHLID) {
> +			u32 harvest;
> +			int i;
> +
> +			adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
> +			for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> +				harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
> +				if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
> +					adev->vcn.harvest_config |= 1 << i;
> +			}
>   
> -	adev->vcn.num_enc_rings = 2;
> +			if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
> +						AMDGPU_VCN_HARVEST_VCN1))
> +				/* both instances are harvested, disable the block */
> +				return -ENOENT;
> +		} else
> +			adev->vcn.num_vcn_inst = 1;
> +
> +		adev->vcn.num_enc_rings = 2;
> +	}
>   
>   	vcn_v3_0_set_dec_ring_funcs(adev);
>   	vcn_v3_0_set_enc_ring_funcs(adev);
> @@ -109,6 +127,7 @@ static int vcn_v3_0_sw_init(void *handle)
>   {
>   	struct amdgpu_ring *ring;
>   	int i, j, r;
> +	int vcn_doorbell_index = 0;
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
>   	r = amdgpu_vcn_sw_init(adev);
> @@ -136,6 +155,12 @@ static int vcn_v3_0_sw_init(void *handle)
>   	if (r)
>   		return r;
>   
> +	if (amdgpu_sriov_vf(adev)) {
> +		vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;
> +		/* get DWORD offset */
> +		vcn_doorbell_index = vcn_doorbell_index << 1;
> +	}
> +
>   	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
>   		if (adev->vcn.harvest_config & (1 << i))
>   			continue;
> @@ -166,7 +191,13 @@ static int vcn_v3_0_sw_init(void *handle)
>   
>   		ring = &adev->vcn.inst[i].ring_dec;
>   		ring->use_doorbell = true;
> -		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
> +		if (amdgpu_sriov_vf(adev)) {
> +			ring->doorbell_index = vcn_doorbell_index;
> +			/* NOTE: increment so next VCN engine use next DOORBELL DWORD */
> +			vcn_doorbell_index++;
> +		} else {
> +			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
> +		}
>   		if (i != 0)
>   			ring->no_scheduler = true;
>   		sprintf(ring->name, "vcn_dec_%d", i);
> @@ -184,7 +215,13 @@ static int vcn_v3_0_sw_init(void *handle)
>   
>   			ring = &adev->vcn.inst[i].ring_enc[j];
>   			ring->use_doorbell = true;
> -			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
> +			if (amdgpu_sriov_vf(adev)) {
> +				ring->doorbell_index = vcn_doorbell_index;
> +				/* NOTE: increment so next VCN engine use next DOORBELL DWORD */
> +				vcn_doorbell_index++;
> +			} else {
> +				ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
> +			}
>   			if (i != 1)
>   				ring->no_scheduler = true;
>   			sprintf(ring->name, "vcn_enc_%d.%d", i, j);
> @@ -195,6 +232,11 @@ static int vcn_v3_0_sw_init(void *handle)
>   		}
>   	}
>   
> +	if (amdgpu_sriov_vf(adev)) {
> +		r = amdgpu_virt_alloc_mm_table(adev);
> +		if (r)
> +			return r;
> +	}
>   	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
>   		adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
>   
> @@ -213,6 +255,9 @@ static int vcn_v3_0_sw_fini(void *handle)
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   	int r;
>   
> +	if (amdgpu_sriov_vf(adev))
> +		amdgpu_virt_free_mm_table(adev);
> +
>   	r = amdgpu_vcn_suspend(adev);
>   	if (r)
>   		return r;
> @@ -235,24 +280,50 @@ static int vcn_v3_0_hw_init(void *handle)
>   	struct amdgpu_ring *ring;
>   	int i, j, r;
>   
> -	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> -		if (adev->vcn.harvest_config & (1 << i))
> -			continue;
> +	if (amdgpu_sriov_vf(adev)) {
> +		r = vcn_v3_0_start_sriov(adev);
> +		if (r)
> +			goto done;
>   
> -		ring = &adev->vcn.inst[i].ring_dec;
> +		/* initialize VCN dec and enc ring buffers */
> +		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +			if (adev->vcn.harvest_config & (1 << i))
> +				continue;
> +
> +			ring = &adev->vcn.inst[i].ring_dec;
> +			ring->wptr = 0;
> +			ring->wptr_old = 0;
> +			vcn_v3_0_dec_ring_set_wptr(ring);
> +			ring->sched.ready = true;
> +
> +			for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
> +				ring = &adev->vcn.inst[i].ring_enc[j];
> +				ring->wptr = 0;
> +				ring->wptr_old = 0;
> +				vcn_v3_0_enc_ring_set_wptr(ring);
> +				ring->sched.ready = true;
> +			}
> +		}
> +	} else {
> +		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +			if (adev->vcn.harvest_config & (1 << i))
> +				continue;
>   
> -		adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
> -						     ring->doorbell_index, i);
> +			ring = &adev->vcn.inst[i].ring_dec;
>   
> -		r = amdgpu_ring_test_helper(ring);
> -		if (r)
> -			goto done;
> +			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
> +						     ring->doorbell_index, i);
>   
> -		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
> -			ring = &adev->vcn.inst[i].ring_enc[j];
>   			r = amdgpu_ring_test_helper(ring);
>   			if (r)
>   				goto done;
> +
> +			for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
> +				ring = &adev->vcn.inst[i].ring_enc[j];
> +				r = amdgpu_ring_test_helper(ring);
> +				if (r)
> +					goto done;
> +			}
>   		}
>   	}
>   
> @@ -1137,6 +1208,221 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
> +{
> +	int i, j;
> +	struct amdgpu_ring *ring;
> +	uint64_t cache_addr;
> +	uint64_t rb_addr;
> +	uint64_t ctx_addr;
> +	uint32_t param, resp, expected;
> +	uint32_t offset, cache_size;
> +	uint32_t tmp, timeout;
> +	uint32_t id;
> +
> +	struct amdgpu_mm_table *table = &adev->virt.mm_table;
> +	uint32_t *table_loc;
> +	uint32_t table_size;
> +	uint32_t size, size_dw;
> +
> +	struct mmsch_v3_0_cmd_direct_write
> +		direct_wt = { {0} };
> +	struct mmsch_v3_0_cmd_direct_read_modify_write
> +		direct_rd_mod_wt = { {0} };
> +	struct mmsch_v3_0_cmd_direct_polling
> +		direct_poll = { {0} };
> +	struct mmsch_v3_0_cmd_end end = { {0} };
> +	struct mmsch_v3_0_init_header header;
> +
> +	direct_wt.cmd_header.command_type =
> +		MMSCH_COMMAND__DIRECT_REG_WRITE;
> +	direct_rd_mod_wt.cmd_header.command_type =
> +		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
> +	direct_poll.cmd_header.command_type =
> +		MMSCH_COMMAND__DIRECT_REG_POLLING;
> +	end.cmd_header.command_type =
> +		MMSCH_COMMAND__END;
> +
> +	header.version = MMSCH_VERSION;
> +	header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
> +	for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
> +		header.inst[i].init_status = 0;
> +		header.inst[i].table_offset = 0;
> +		header.inst[i].table_size = 0;
> +	}
> +
> +	table_loc = (uint32_t *)table->cpu_addr;
> +	table_loc += header.total_size;
> +	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> +		if (adev->vcn.harvest_config & (1 << i))
> +			continue;
> +
> +		table_size = 0;
> +
> +		MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_STATUS),
> +			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
> +
> +		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> +
> +		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> +			id = amdgpu_ucode_id_vcns[i];
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> +				adev->firmware.ucode[id].tmr_mc_addr_lo);
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> +				adev->firmware.ucode[id].tmr_mc_addr_hi);
> +			offset = 0;
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_VCPU_CACHE_OFFSET0),
> +				0);
> +		} else {
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> +				lower_32_bits(adev->vcn.inst[i].gpu_addr));
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> +				upper_32_bits(adev->vcn.inst[i].gpu_addr));
> +			offset = cache_size;
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_VCPU_CACHE_OFFSET0),
> +				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> +		}
> +
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_VCPU_CACHE_SIZE0),
> +			cache_size);
> +
> +		cache_addr = adev->vcn.inst[i].gpu_addr + offset;
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
> +			lower_32_bits(cache_addr));
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
> +			upper_32_bits(cache_addr));
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_VCPU_CACHE_OFFSET1),
> +			0);
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_VCPU_CACHE_SIZE1),
> +			AMDGPU_VCN_STACK_SIZE);
> +
> +		cache_addr = adev->vcn.inst[i].gpu_addr + offset +
> +			AMDGPU_VCN_STACK_SIZE;
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
> +			lower_32_bits(cache_addr));
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
> +			upper_32_bits(cache_addr));
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_VCPU_CACHE_OFFSET2),
> +			0);
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_VCPU_CACHE_SIZE2),
> +			AMDGPU_VCN_CONTEXT_SIZE);
> +
> +		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
> +			ring = &adev->vcn.inst[i].ring_enc[j];
> +			ring->wptr = 0;
> +			rb_addr = ring->gpu_addr;
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_RB_BASE_LO),
> +				lower_32_bits(rb_addr));
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_RB_BASE_HI),
> +				upper_32_bits(rb_addr));
> +			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +				mmUVD_RB_SIZE),
> +				ring->ring_size / 4);
> +		}
> +
> +		ring = &adev->vcn.inst[i].ring_dec;
> +		ring->wptr = 0;
> +		rb_addr = ring->gpu_addr;
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
> +			lower_32_bits(rb_addr));
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
> +			upper_32_bits(rb_addr));
> +		/* force RBC into idle state */
> +		tmp = order_base_2(ring->ring_size);
> +		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
> +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
> +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
> +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
> +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
> +		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
> +			mmUVD_RBC_RB_CNTL),
> +			tmp);
> +
> +		/* add end packet */
> +		MMSCH_V3_0_INSERT_END();
> +
> +		/* refine header */
> +		header.inst[i].init_status = 1;
> +		header.inst[i].table_offset = header.total_size;
> +		header.inst[i].table_size = table_size;
> +		header.total_size += table_size;
> +	}
> +
> +	/* Update init table header in memory */
> +        size = sizeof(struct mmsch_v3_0_init_header);
> +	table_loc = (uint32_t *)table->cpu_addr;
> +	memcpy((void *)table_loc, &header, size);
> +
> +	/* message MMSCH (in VCN[0]) to initialize this client
> +	 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
> +	 * of memory descriptor location
> +	 */
> +	ctx_addr = table->gpu_addr;
> +	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
> +	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
> +
> +	/* 2, update vmid of descriptor */
> +	tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
> +	tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> +	/* use domain0 for MM scheduler */
> +	tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
> +	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp);
> +
> +	/* 3, notify mmsch about the size of this descriptor */
> +	size = header.total_size;
> +	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
> +
> +	/* 4, set resp to zero */
> +	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
> +
> +	/* 5, kick off the initialization and wait until
> +	 * MMSCH_VF_MAILBOX_RESP becomes non-zero
> +	 */
> +	param = 0x10000001;
> +	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param);
> +	tmp = 0;
> +	timeout = 1000;
> +	resp = 0;
> +	expected = param + 1;
> +	while (resp != expected) {
> +		resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
> +		if (resp == expected)
> +			break;
> +
> +		udelay(10);
> +		tmp = tmp + 10;
> +		if (tmp >= timeout) {
> +			DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
> +				" waiting for mmMMSCH_VF_MAILBOX_RESP "\
> +				"(expected=0x%08x, readback=0x%08x)\n",
> +				tmp, expected, resp);
> +			return -EBUSY;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
>   {
>   	uint32_t tmp;


More information about the amd-gfx mailing list