[PATCH] drm/i915/gvt: support inconsecutive partial PTE update

Mon Sep 17 05:06:27 UTC 2018

On 09/17/2018 10:58 AM, Zhenyu Wang wrote:
> On 2018.09.14 19:08:25 +0800, hang.yuan at linux.intel.com wrote:
>> From: Hang Yuan <hang.yuan at linux.intel.com>
>>
>> Previously we assumed two 4-byte writes to the same PTE coming in sequence.
>> But recently we observed inconsecutive partial write happening as well. So
>> this patch enhances the previous solution. It now uses a list to save more
>> partial writes. If one partial write can be combined with another one in
>> the list to construct a full PTE, update its shadow entry. Otherwise, save
>> the partial write in the list.
>>
>> This patch also update ggtt_mmio_write more to remove unnecessary virtual
>> PTE read and add old shadow PTE invalidation on PTE update case.
>>
>> v2: invalidate old entry and flush ggtt (Zhenyu)
>>
>> Signed-off-by: Hang Yuan <hang.yuan at linux.intel.com>
>> Cc: Yan Zhao <yan.y.zhao at intel.com>
>> Cc: Xiaolin Zhang <xiaolin.zhang at intel.com>
>> Cc: Zhenyu Wang <zhenyu.z.wang at intel.com>
>> ---
>>   drivers/gpu/drm/i915/gvt/gtt.c | 119 +++++++++++++++++++++--------------------
>>   drivers/gpu/drm/i915/gvt/gtt.h |   9 +++-
>>   2 files changed, 69 insertions(+), 59 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
>> index 2402395..cf11f8f 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.c
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
>> @@ -1905,7 +1905,6 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
>>   		vgpu_free_mm(mm);
>>   		return ERR_PTR(-ENOMEM);
>>   	}
>> -	mm->ggtt_mm.last_partial_off = -1UL;
>>   
>>   	return mm;
>>   }
>> @@ -1930,7 +1929,6 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
>>   		invalidate_ppgtt_mm(mm);
>>   	} else {
>>   		vfree(mm->ggtt_mm.virtual_ggtt);
>> -		mm->ggtt_mm.last_partial_off = -1UL;
>>   	}
>>   
>>   	vgpu_free_mm(mm);
>> @@ -2168,6 +2166,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
>>   	struct intel_gvt_gtt_entry e, m;
>>   	dma_addr_t dma_addr;
>>   	int ret;
>> +	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
>> +	bool partial_update = false;
>>   
>>   	if (bytes != 4 && bytes != 8)
>>   		return -EINVAL;
>> @@ -2178,68 +2178,61 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
>>   	if (!vgpu_gmadr_is_valid(vgpu, gma))
>>   		return 0;
>>   
>> -	ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
>> -
>> +	e.type = GTT_TYPE_GGTT_PTE;
>>   	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
>>   			bytes);
>>   
>>   	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
>> -	 * write, we assume the two 4 bytes writes are consecutive.
>> -	 * Otherwise, we abort and report error
>> +	 * write, save the first 4 bytes in a list and update virtual
>> +	 * PTE. Only update shadow PTE when the second 4 bytes comes.
>>   	 */
>>   	if (bytes < info->gtt_entry_size) {
>> -		if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) {
>> -			/* the first partial part*/
>> -			ggtt_mm->ggtt_mm.last_partial_off = off;
>> -			ggtt_mm->ggtt_mm.last_partial_data = e.val64;
>> -			return 0;
>> -		} else if ((g_gtt_index ==
>> -				(ggtt_mm->ggtt_mm.last_partial_off >>
>> -				info->gtt_entry_size_shift)) &&
>> -			(off !=	ggtt_mm->ggtt_mm.last_partial_off)) {
>> -			/* the second partial part */
>> -
>> -			int last_off = ggtt_mm->ggtt_mm.last_partial_off &
>> -				(info->gtt_entry_size - 1);
>> -
>> -			memcpy((void *)&e.val64 + last_off,
>> -				(void *)&ggtt_mm->ggtt_mm.last_partial_data +
>> -				last_off, bytes);
>> -
>> -			ggtt_mm->ggtt_mm.last_partial_off = -1UL;
>> -		} else {
>> -			int last_offset;
>> -
>> -			gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n",
>> -					ggtt_mm->ggtt_mm.last_partial_off, off,
>> -					bytes, info->gtt_entry_size);
>> -
>> -			/* set host ggtt entry to scratch page and clear
>> -			 * virtual ggtt entry as not present for last
>> -			 * partially write offset
>> -			 */
>> -			last_offset = ggtt_mm->ggtt_mm.last_partial_off &
>> -					(~(info->gtt_entry_size - 1));
>> -
>> -			ggtt_get_host_entry(ggtt_mm, &m, last_offset);
>> -			ggtt_invalidate_pte(vgpu, &m);
>> -			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
>> -			ops->clear_present(&m);
>> -			ggtt_set_host_entry(ggtt_mm, &m, last_offset);
>> -			ggtt_invalidate(gvt->dev_priv);
>> -
>> -			ggtt_get_guest_entry(ggtt_mm, &e, last_offset);
>> -			ops->clear_present(&e);
>> -			ggtt_set_guest_entry(ggtt_mm, &e, last_offset);
>> -
>> -			ggtt_mm->ggtt_mm.last_partial_off = off;
>> -			ggtt_mm->ggtt_mm.last_partial_data = e.val64;
>> +		bool found = false;
>> +
>> +		list_for_each_entry_safe(pos, n,
>> +				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
>> +			if (g_gtt_index == pos->offset >>
>> +					info->gtt_entry_size_shift) {
>> +				if (off != pos->offset) {
>> +					/* the second partial part*/
>> +
>> +					int last_off = pos->offset &
>> +						(info->gtt_entry_size - 1);
>> +
>> +					memcpy((void *)&e.val64 + last_off,
>> +						(void *)&pos->data + last_off,
>> +						bytes);
>> +
>> +					list_del(&pos->list);
>> +					kfree(pos);
>> +				} else {
>> +					/* update of the first partial part */
>> +
>> +					pos->data = e.val64;
>> +					ggtt_set_guest_entry(ggtt_mm, &e,
>> +						g_gtt_index);
>> +					return 0;
>> +				}
>> +				found = true;
>> +				break;
> 
> Move this two lines in above if (off != pos->offset) is easier to read.
Ok.

>> +			}
>> +		}
>>   
>> -			return 0;
>> +		if (!found) {
>> +			/* the first partial part */
>> +
>> +			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
>> +			if (!partial_pte)
>> +				return -ENOMEM;
>> +			partial_pte->offset = off;
>> +			partial_pte->data = e.val64;
>> +			list_add_tail(&partial_pte->list,
>> +				&ggtt_mm->ggtt_mm.partial_pte_list);
>> +			partial_update = true;
>>   		}
>>   	}
>>   
>> -	if (ops->test_present(&e)) {
>> +	if (!partial_update && (ops->test_present(&e))) {
>>   		gfn = ops->get_pfn(&e);
>>   		m = e;
>>   
>> @@ -2263,16 +2256,18 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
>>   		} else
>>   			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
>>   	} else {
>> -		ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index);
>> -		ggtt_invalidate_pte(vgpu, &m);
>>   		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
>>   		ops->clear_present(&m);
>>   	}
>>   
>>   out:
>> +	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
>> +
>> +	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
>> +	ggtt_invalidate_pte(vgpu, &e);
> 
> For normal 8b size update of ggtt entry, does this invalidate of old pte page
> always required? If yes, looks better split this as a separate fix instead.
Yes. If old PTE is present, it will be invalidated. I will split the 
patch. Thanks for the review.

>> +
>>   	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
>>   	ggtt_invalidate(gvt->dev_priv);
>> -	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
>>   	return 0;
>>   }
>>   
>> @@ -2430,6 +2425,8 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
>>   
>>   	intel_vgpu_reset_ggtt(vgpu, false);
>>   
>> +	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
>> +
>>   	return create_scratch_page_tree(vgpu);
>>   }
>>   
>> @@ -2454,6 +2451,14 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
>>   
>>   static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
>>   {
>> +	struct intel_gvt_partial_pte *pos;
>> +
>> +	list_for_each_entry(pos,
>> +			&vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) {
>> +		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
>> +			pos->offset, pos->data);
>> +		kfree(pos);
>> +	}
>>   	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
>>   	vgpu->gtt.ggtt_mm = NULL;
>>   }
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
>> index 7a9b361..a11bfee 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.h
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.h
>> @@ -133,6 +133,12 @@ enum intel_gvt_mm_type {
>>   
>>   #define GVT_RING_CTX_NR_PDPS	GEN8_3LVL_PDPES
>>   
>> +struct intel_gvt_partial_pte {
>> +	unsigned long offset;
>> +	u64 data;
>> +	struct list_head list;
>> +};
>> +
>>   struct intel_vgpu_mm {
>>   	enum intel_gvt_mm_type type;
>>   	struct intel_vgpu *vgpu;
>> @@ -157,8 +163,7 @@ struct intel_vgpu_mm {
>>   		} ppgtt_mm;
>>   		struct {
>>   			void *virtual_ggtt;
>> -			unsigned long last_partial_off;
>> -			u64 last_partial_data;
>> +			struct list_head partial_pte_list;
>>   		} ggtt_mm;
>>   	};
>>   };
>> -- 
>> 2.7.4
>>
>