[PATCH 8/9] drm/i915/gvt: Manage shadow pages with radix tree

Zhi Wang zhi.a.wang at intel.com
Mon Dec 25 10:30:30 UTC 2017


Yes. Mostly I think it's because page cache has to use radix tree. Just 
like I said you only get benefit in large scalability case. Jike did a 
research and comparison during writting IOVA map cache and I did see 
some FS moved to use rb tree from radix tree because of the memory 
consumption.

With those considerations, I suggest you attach some data in the patch 
comments. Because it might be good to page cache, but might not be good 
to us.

Thanks,
Zhi.

On 12/25/17 18:16, Du, Changbin wrote:
> On Mon, Dec 25, 2017 at 05:35:05PM +0800, Zhi Wang wrote:
>> Better attach some investigation and performance data here.
>>
>> Radix tree is quite heavy and requires more memory than hash and rb tree. A
>> lot of Linux subsystems finally turn to rb tree from radix because of that.
>>
> Sure? then why page cache still use radix tree? :
> 
>> It only shows better performance in a very large scalability, compared to
>> hash and RB. So, usually, people tends to use RB tree instead of radix, if
>> the hash table is not satisfying.
>>
> Here we what we need is a map and radix is it. Most of the rb-tree use cases are
> to manage a data with range, like IOVA allocator, VMALLOC and so on.
> 
> Actually, here is not so sensitive. Bot rb-tree and radix tree are very quick,
> but a little slower than hasn table.
> 
>> Thanks,
>> Zhi.
>>
>> On 12/25/17 17:11, changbin.du at intel.com wrote:
>>> From: Changbin Du <changbin.du at intel.com>
>>>
>>> We don't know how many page tables will be shadowed. It varies
>>> considerably corresponding to guest load. So radix tree is the
>>> best choice for us.
>>>
>>> Signed-off-by: Changbin Du <changbin.du at intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gvt/gtt.c | 48 ++++++++++++++++++++++--------------------
>>>    drivers/gpu/drm/i915/gvt/gtt.h |  4 +---
>>>    2 files changed, 26 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
>>> index 1866c2d..1ce6393 100644
>>> --- a/drivers/gpu/drm/i915/gvt/gtt.c
>>> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
>>> @@ -638,8 +638,8 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
>>>    	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
>>>    		       PCI_DMA_BIDIRECTIONAL);
>>> -	if (!hlist_unhashed(&spt->node))
>>> -		hash_del(&spt->node);
>>> +
>>> +	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
>>>    	if (spt->guest_page.oos_page)
>>>    		detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
>>> @@ -652,12 +652,14 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
>>>    static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
>>>    {
>>> -	struct hlist_node *n;
>>>    	struct intel_vgpu_ppgtt_spt *spt;
>>> -	int i;
>>> +	struct radix_tree_iter iter;
>>> +	void **slot;
>>> -	hash_for_each_safe(vgpu->gtt.spt_hash_table, i, n, spt, node)
>>> +	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
>>> +		spt = radix_tree_deref_slot(slot);
>>>    		ppgtt_free_spt(spt);
>>> +	}
>>>    }
>>>    static int ppgtt_handle_guest_write_page_table_bytes(
>>> @@ -695,16 +697,10 @@ static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
>>>    }
>>>    /* Find the spt by shadow page mfn. */
>>> -static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
>>> +static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
>>>    		struct intel_vgpu *vgpu, unsigned long mfn)
>>>    {
>>> -	struct intel_vgpu_ppgtt_spt *spt;
>>> -
>>> -	hash_for_each_possible(vgpu->gtt.spt_hash_table, spt, node, mfn) {
>>> -		if (spt->shadow_page.mfn == mfn)
>>> -			return spt;
>>> -	}
>>> -	return NULL;
>>> +	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
>>>    }
>>>    static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
>>> @@ -739,8 +735,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
>>>    			     0, 4096, PCI_DMA_BIDIRECTIONAL);
>>>    	if (dma_mapping_error(kdev, daddr)) {
>>>    		gvt_vgpu_err("fail to map dma addr\n");
>>> -		free_spt(spt);
>>> -		return ERR_PTR(-EINVAL);
>>> +		ret = -EINVAL;
>>> +		goto err_free_spt;
>>>    	}
>>>    	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
>>>    	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
>>> @@ -753,17 +749,23 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
>>>    	ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
>>>    					ppgtt_write_protection_handler, spt);
>>> -	if (ret) {
>>> -		free_spt(spt);
>>> -		dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
>>> -		return ERR_PTR(ret);
>>> -	}
>>> +	if (ret)
>>> +		goto err_unmap_dma;
>>> -	INIT_HLIST_NODE(&spt->node);
>>> -	hash_add(vgpu->gtt.spt_hash_table, &spt->node, spt->shadow_page.mfn);
>>> +	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
>>> +	if (ret)
>>> +		goto err_unreg_page_track;
>>>    	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
>>>    	return spt;
>>> +
>>> +err_unreg_page_track:
>>> +	intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
>>> +err_unmap_dma:
>>> +	dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
>>> +err_free_spt:
>>> +	free_spt(spt);
>>> +	return ERR_PTR(ret);
>>>    }
>>>    #define pt_entry_size_shift(spt) \
>>> @@ -1965,7 +1967,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
>>>    {
>>>    	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
>>> -	hash_init(gtt->spt_hash_table);
>>> +	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
>>>    	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
>>>    	INIT_LIST_HEAD(&gtt->oos_page_list_head);
>>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
>>> index 32504c0..c006cc9d 100644
>>> --- a/drivers/gpu/drm/i915/gvt/gtt.h
>>> +++ b/drivers/gpu/drm/i915/gvt/gtt.h
>>> @@ -39,7 +39,6 @@
>>>    struct intel_vgpu_mm;
>>> -#define INTEL_GVT_GTT_HASH_BITS 8
>>>    #define INTEL_GVT_INVALID_ADDR (~0UL)
>>>    struct intel_gvt_gtt_entry {
>>> @@ -179,7 +178,7 @@ struct intel_vgpu_gtt {
>>>    	struct intel_vgpu_mm *ggtt_mm;
>>>    	unsigned long active_ppgtt_mm_bitmap;
>>>    	struct list_head ppgtt_mm_list_head;
>>> -	DECLARE_HASHTABLE(spt_hash_table, INTEL_GVT_GTT_HASH_BITS);
>>> +	struct radix_tree_root spt_tree;
>>>    	struct list_head oos_page_list_head;
>>>    	struct list_head post_shadow_list_head;
>>>    	struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
>>> @@ -212,7 +211,6 @@ struct intel_vgpu_oos_page {
>>>    struct intel_vgpu_ppgtt_spt {
>>>    	atomic_t refcount;
>>>    	struct intel_vgpu *vgpu;
>>> -	struct hlist_node node;
>>>    	struct {
>>>    		intel_gvt_gtt_type_t type;
>>>
>> _______________________________________________
>> intel-gvt-dev mailing list
>> intel-gvt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
> 


More information about the intel-gvt-dev mailing list