[Intel-gfx] [PATCH] drm/i915: Remove hole and padding from intel_shared_dpll

Lucas De Marchi lucas.demarchi at intel.com
Thu Mar 15 18:02:01 UTC 2018


On Thu, Mar 15, 2018 at 08:16:32AM -0300, Jani Nikula wrote:
> On Wed, 14 Mar 2018, Lucas De Marchi <lucas.demarchi at intel.com> wrote:
> > Reorder fields so we save 8 bytes per instance: this removes a 4-bytes
> > hole after enum intel_dpll_id and a 4-bytes padding.
> 
> Does GCC have anything like the Clang -Wpadded option to warn us about

Humn.. I never saw one.

> alignment holes and padding? Certainly it would be too noisy to run all
> the time anyway, but it seems a bit tedious to have to do this by
> hand. We could run it every once in a while to catch the worst
> offenders.

oh, no. I didn't do this manually. I used pahole
(https://git.kernel.org/pub/scm/devel/pahole/pahole.git/)
It has even a --reorganize option that should be used with care as some
times data locality is more important on big structs.

I always use it passing the struct I'm checking, but you can give it
just the .o/.ko to analyze all the structs there (possibly with the -H
option to filter by number of holes. Doing this on
drivers/gpu/drm/i915/i915_debugfs.o for example I find a big offender:

struct intel_dp {
...
	/* size: 2688, cachelines: 42, members: 55 */
	/* sum members: 2662, holes: 7, sum holes: 26 */
	/* paddings: 2, sum paddings: 8 */
};


Entire output of drm_i915_private for reference:

$ pahole -C drm_i915_private drivers/gpu/drm/i915/i915.ko

struct drm_i915_private {
	struct drm_device          drm;                  /*     0  1576 */
	/* --- cacheline 24 boundary (1536 bytes) was 40 bytes ago --- */
	struct kmem_cache *        objects;              /*  1576     8 */
	struct kmem_cache *        vmas;                 /*  1584     8 */
	struct kmem_cache *        luts;                 /*  1592     8 */
	/* --- cacheline 25 boundary (1600 bytes) --- */
	struct kmem_cache *        requests;             /*  1600     8 */
	struct kmem_cache *        dependencies;         /*  1608     8 */
	struct kmem_cache *        priorities;           /*  1616     8 */
	struct intel_device_infoconst info;              /*  1624   184 */
	/* --- cacheline 28 boundary (1792 bytes) was 16 bytes ago --- */
	struct intel_driver_caps   caps;                 /*  1808     4 */

	/* XXX 4 bytes hole, try to pack */

	struct resource            dsm;                  /*  1816    64 */
	/* --- cacheline 29 boundary (1856 bytes) was 24 bytes ago --- */
	struct resource            dsm_reserved;         /*  1880    64 */
	/* --- cacheline 30 boundary (1920 bytes) was 24 bytes ago --- */
	resource_size_t            stolen_usable_size;   /*  1944     8 */
	void *                     regs;                 /*  1952     8 */
	struct intel_uncore        uncore;               /*  1960   952 */
	/* --- cacheline 45 boundary (2880 bytes) was 32 bytes ago --- */
	struct i915_virtual_gpu    vgpu;                 /*  2912     8 */
	struct intel_gvt *         gvt;                  /*  2920     8 */
	struct intel_wopcm         wopcm;                /*  2928    12 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 46 boundary (2944 bytes) --- */
	struct intel_huc           huc;                  /*  2944    72 */
	/* --- cacheline 47 boundary (3008 bytes) was 8 bytes ago --- */
	struct intel_guc           guc;                  /*  3016   792 */
	/* --- cacheline 59 boundary (3776 bytes) was 32 bytes ago --- */
	struct intel_csr           csr;                  /*  3808   136 */
	/* --- cacheline 61 boundary (3904 bytes) was 40 bytes ago --- */
	struct intel_gmbus         gmbus[13];            /*  3944 14144 */
	/* --- cacheline 282 boundary (18048 bytes) was 40 bytes ago --- */
	struct mutex               gmbus_mutex;          /* 18088    32 */
	/* --- cacheline 283 boundary (18112 bytes) was 8 bytes ago --- */
	uint32_t                   gpio_mmio_base;       /* 18120     4 */
	uint32_t                   mipi_mmio_base;       /* 18124     4 */
	uint32_t                   psr_mmio_base;        /* 18128     4 */
	uint32_t                   pps_mmio_base;        /* 18132     4 */
	wait_queue_head_t          gmbus_wait_queue;     /* 18136    24 */
	struct pci_dev *           bridge_dev;           /* 18160     8 */
	struct intel_engine_cs *   engine[8];            /* 18168    64 */
	/* --- cacheline 284 boundary (18176 bytes) was 56 bytes ago --- */
	struct i915_gem_context *  kernel_context;       /* 18232     8 */
	/* --- cacheline 285 boundary (18240 bytes) --- */
	struct i915_gem_context *  preempt_context;      /* 18240     8 */
	struct intel_engine_cs *   engine_class[5][4];   /* 18248   160 */
	/* --- cacheline 287 boundary (18368 bytes) was 40 bytes ago --- */
	struct drm_dma_handle *    status_page_dmah;     /* 18408     8 */
	struct resource            mch_res;              /* 18416    64 */
	/* --- cacheline 288 boundary (18432 bytes) was 48 bytes ago --- */
	spinlock_t                 irq_lock;             /* 18480     4 */
	bool                       display_irqs_enabled; /* 18484     1 */

	/* XXX 3 bytes hole, try to pack */

	struct pm_qos_request      pm_qos;               /* 18488   136 */
	/* --- cacheline 291 boundary (18624 bytes) --- */
	struct mutex               sb_lock;              /* 18624    32 */
	union {
		u32                irq_mask;             /*           4 */
		u32                de_irq_mask[3];       /*          12 */
	};                                               /* 18656    12 */
	u32                        gt_irq_mask;          /* 18668     4 */
	u32                        pm_imr;               /* 18672     4 */
	u32                        pm_ier;               /* 18676     4 */
	u32                        pm_rps_events;        /* 18680     4 */
	u32                        pm_guc_events;        /* 18684     4 */
	/* --- cacheline 292 boundary (18688 bytes) --- */
	u32                        pipestat_irq_mask[3]; /* 18688    12 */

	/* XXX 4 bytes hole, try to pack */

	struct i915_hotplug        hotplug;              /* 18704   408 */
	/* --- cacheline 298 boundary (19072 bytes) was 40 bytes ago --- */
	struct intel_fbc           fbc;                  /* 19112   448 */
	/* --- cacheline 305 boundary (19520 bytes) was 40 bytes ago --- */
	struct i915_drrs           drrs;                 /* 19560   144 */
	/* --- cacheline 307 boundary (19648 bytes) was 56 bytes ago --- */
	struct intel_opregion      opregion;             /* 19704   112 */
	/* --- cacheline 309 boundary (19776 bytes) was 40 bytes ago --- */
	struct intel_vbt_data      vbt;                  /* 19816   384 */
	/* --- cacheline 315 boundary (20160 bytes) was 40 bytes ago --- */
	bool                       preserve_bios_swizzle; /* 20200     1 */

	/* XXX 7 bytes hole, try to pack */

	struct intel_overlay *     overlay;              /* 20208     8 */
	struct mutex               backlight_lock;       /* 20216    32 */
	/* --- cacheline 316 boundary (20224 bytes) was 24 bytes ago --- */
	bool                       no_aux_handshake;     /* 20248     1 */

	/* XXX 7 bytes hole, try to pack */

	struct mutex               pps_mutex;            /* 20256    32 */
	/* --- cacheline 317 boundary (20288 bytes) --- */
	struct drm_i915_fence_reg  fence_regs[32];       /* 20288  1536 */
	/* --- cacheline 341 boundary (21824 bytes) --- */
	int                        num_fence_regs;       /* 21824     4 */
	unsigned int               fsb_freq;             /* 21828     4 */
	unsigned int               mem_freq;             /* 21832     4 */
	unsigned int               is_ddr3;              /* 21836     4 */
	unsigned int               skl_preferred_vco_freq; /* 21840     4 */
	unsigned int               max_cdclk_freq;       /* 21844     4 */
	unsigned int               max_dotclk_freq;      /* 21848     4 */
	unsigned int               rawclk_freq;          /* 21852     4 */
	unsigned int               hpll_freq;            /* 21856     4 */
	unsigned int               fdi_pll_freq;         /* 21860     4 */
	unsigned int               czclk_freq;           /* 21864     4 */
	struct {
		struct intel_cdclk_state logical;        /* 21868    20 */
		struct intel_cdclk_state actual;         /* 21888    20 */
		struct intel_cdclk_state hw;             /* 21908    20 */
	} cdclk;                                         /* 21868    60 */
	/* --- cacheline 342 boundary (21888 bytes) was 40 bytes ago --- */
	struct workqueue_struct *  wq;                   /* 21928     8 */
	struct workqueue_struct *  modeset_wq;           /* 21936     8 */
	struct drm_i915_display_funcs display;           /* 21944   192 */
	/* --- cacheline 345 boundary (22080 bytes) was 56 bytes ago --- */
	enum intel_pch             pch_type;             /* 22136     4 */
	short unsigned int         pch_id;               /* 22140     2 */

	/* XXX 2 bytes hole, try to pack */

	/* --- cacheline 346 boundary (22144 bytes) --- */
	long unsigned int          quirks;               /* 22144     8 */
	enum modeset_restore       modeset_restore;      /* 22152     4 */

	/* XXX 4 bytes hole, try to pack */

	struct mutex               modeset_restore_lock; /* 22160    32 */
	struct drm_atomic_state *  modeset_restore_state; /* 22192     8 */
	struct drm_modeset_acquire_ctx reset_ctx;        /* 22200    56 */
	/* --- cacheline 347 boundary (22208 bytes) was 48 bytes ago --- */
	struct list_head           vm_list;              /* 22256    16 */
	/* --- cacheline 348 boundary (22272 bytes) --- */
	struct i915_ggtt           ggtt;                 /* 22272  1880 */
	/* --- cacheline 377 boundary (24128 bytes) was 24 bytes ago --- */
	struct i915_gem_mm         mm;                   /* 24152   680 */
	/* --- cacheline 388 boundary (24832 bytes) --- */
	struct hlist_head          mm_structs[128];      /* 24832  1024 */
	/* --- cacheline 404 boundary (25856 bytes) --- */
	struct mutex               mm_lock;              /* 25856    32 */
	struct intel_ppat          ppat;                 /* 25888   176 */
	/* --- cacheline 407 boundary (26048 bytes) was 16 bytes ago --- */
	struct intel_crtc *        plane_to_crtc_mapping[3]; /* 26064    24 */
	struct intel_crtc *        pipe_to_crtc_mapping[3]; /* 26088    24 */
	/* --- cacheline 408 boundary (26112 bytes) --- */
	struct intel_pipe_crc      pipe_crc[3];          /* 26112   192 */
	/* --- cacheline 411 boundary (26304 bytes) --- */
	int                        num_shared_dpll;      /* 26304     4 */

	/* XXX 4 bytes hole, try to pack */

	struct intel_shared_dpll   shared_dplls[6];      /* 26312   720 */
	/* --- cacheline 422 boundary (27008 bytes) was 24 bytes ago --- */
	const struct intel_dpll_mgr  * dpll_mgr;         /* 27032     8 */
	struct mutex               dpll_lock;            /* 27040    32 */
	/* --- cacheline 423 boundary (27072 bytes) --- */
	unsigned int               active_crtcs;         /* 27072     4 */
	int                        min_cdclk[3];         /* 27076    12 */
	u8                         min_voltage_level[3]; /* 27088     3 */

	/* XXX 1 byte hole, try to pack */

	int                        dpio_phy_iosf_port[2]; /* 27092     8 */
	struct i915_workarounds    workarounds;          /* 27100   228 */
	/* --- cacheline 427 boundary (27328 bytes) --- */
	struct i915_frontbuffer_tracking fb_tracking;    /* 27328    12 */

	/* XXX 4 bytes hole, try to pack */

	struct intel_atomic_helper atomic_helper;        /* 27344    40 */
	u16                        orig_clock;           /* 27384     2 */
	bool                       mchbar_need_disable;  /* 27386     1 */

	/* XXX 5 bytes hole, try to pack */

	/* --- cacheline 428 boundary (27392 bytes) --- */
	struct intel_l3_parity     l3_parity;            /* 27392    56 */
	u32                        edram_cap;            /* 27448     4 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 429 boundary (27456 bytes) --- */
	struct mutex               pcu_lock;             /* 27456    32 */
	struct intel_gen6_power_mgmt gt_pm;              /* 27488   176 */
	/* --- cacheline 432 boundary (27648 bytes) was 16 bytes ago --- */
	struct intel_ilk_power_mgmt ips;                 /* 27664    72 */
	/* --- cacheline 433 boundary (27712 bytes) was 24 bytes ago --- */
	struct i915_power_domains  power_domains;        /* 27736   208 */
	/* --- cacheline 436 boundary (27904 bytes) was 40 bytes ago --- */
	struct i915_psr            psr;                  /* 27944   200 */
	/* --- cacheline 439 boundary (28096 bytes) was 48 bytes ago --- */
	struct i915_gpu_error      gpu_error;            /* 28144   224 */
	/* --- cacheline 443 boundary (28352 bytes) was 16 bytes ago --- */
	struct drm_i915_gem_object * vlv_pctx;           /* 28368     8 */
	struct intel_fbdev *       fbdev;                /* 28376     8 */
	struct work_struct         fbdev_suspend_work;   /* 28384    32 */
	/* --- cacheline 444 boundary (28416 bytes) --- */
	struct drm_property *      broadcast_rgb_property; /* 28416     8 */
	struct drm_property *      force_audio_property; /* 28424     8 */
	struct i915_audio_component * audio_component;   /* 28432     8 */
	bool                       audio_component_registered; /* 28440     1 */

	/* XXX 7 bytes hole, try to pack */

	struct mutex               av_mutex;             /* 28448    32 */
	/* --- cacheline 445 boundary (28480 bytes) --- */
	struct {
		struct list_head   list;                 /* 28480    16 */
		struct llist_head  free_list;            /* 28496     8 */
		struct work_struct free_work;            /* 28504    32 */
		struct ida         hw_ida;               /* 28536    16 */
		/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
	} contexts;                                      /* 28480    72 */
	/* --- cacheline 446 boundary (28544 bytes) was 8 bytes ago --- */
	u32                        fdi_rx_config;        /* 28552     4 */
	u32                        chv_phy_control;      /* 28556     4 */
	u32                        chv_dpll_md[3];       /* 28560    12 */
	u32                        bxt_phy_grc;          /* 28572     4 */
	u32                        suspend_count;        /* 28576     4 */
	bool                       suspended_to_idle;    /* 28580     1 */

	/* XXX 3 bytes hole, try to pack */

	struct i915_suspend_saved_registers regfile;     /* 28584   424 */
	/* --- cacheline 453 boundary (28992 bytes) was 16 bytes ago --- */
	struct vlv_s0ix_state      vlv_s0ix_state;       /* 29008   260 */
	/* --- cacheline 457 boundary (29248 bytes) was 20 bytes ago --- */
	enum {
		I915_SAGV_UNKNOWN = 0,
		I915_SAGV_DISABLED = 1,
		I915_SAGV_ENABLED = 2,
		I915_SAGV_NOT_CONTROLLED = 3,
	} sagv_status;                                   /* 29268     4 */
	struct {
		uint16_t           pri_latency[5];       /* 29272    10 */
		uint16_t           spr_latency[5];       /* 29282    10 */
		uint16_t           cur_latency[5];       /* 29292    10 */
		uint16_t           skl_latency[8];       /* 29302    16 */
		union {
			struct ilk_wm_values hw;         /*          56 */
			struct skl_wm_values skl_hw;     /*         124 */
			struct vlv_wm_values vlv;        /*          60 */
			struct g4x_wm_values g4x;        /*          40 */
		};                                       /* 29320   124 */
		/* --- cacheline 2 boundary (128 bytes) was 42 bytes ago --- */
		uint8_t            max_level;            /* 29444     1 */
		struct mutex       wm_mutex;             /* 29448    32 */
		/* --- cacheline 3 boundary (192 bytes) was 11 bytes ago --- */
		bool               distrust_bios_wm;     /* 29480     1 */
	} wm;                                            /* 29272   216 */
	/* --- cacheline 460 boundary (29440 bytes) was 48 bytes ago --- */
	struct i915_runtime_pm     runtime_pm;           /* 29488     8 */
	struct {
		bool               initialized;          /* 29496     1 */
		struct kobject *   metrics_kobj;         /* 29504     8 */
		struct ctl_table_header * sysctl_header; /* 29512     8 */
		struct mutex       metrics_lock;         /* 29520    32 */
		struct idr         metrics_idr;          /* 29552    24 */
		/* --- cacheline 1 boundary (64 bytes) was 9 bytes ago --- */
		struct mutex       lock;                 /* 29576    32 */
		struct list_head   streams;              /* 29608    16 */
		struct {
			struct i915_perf_stream * exclusive_stream; /* 29624     8 */
			u32        specific_ctx_id;      /* 29632     4 */
			struct hrtimer poll_check_timer; /* 29640    64 */
			/* --- cacheline 1 boundary (64 bytes) was 12 bytes ago --- */
			wait_queue_head_t poll_wq;       /* 29704    24 */
			bool       pollin;               /* 29728     1 */
			struct ratelimit_state spurious_report_rs; /* 29736    40 */
			/* --- cacheline 2 boundary (128 bytes) was 13 bytes ago --- */
			bool       periodic;             /* 29776     1 */
			int        period_exponent;      /* 29780     4 */
			struct i915_oa_config test_config; /* 29784   192 */
			/* --- cacheline 5 boundary (320 bytes) was 18 bytes ago --- */
			struct {
				struct i915_vma * vma;   /* 29976     8 */
				u8 * vaddr;              /* 29984     8 */
				u32 last_ctx_id;         /* 29992     4 */
				int format;              /* 29996     4 */
				int format_size;         /* 30000     4 */
				spinlock_t ptr_lock;     /* 30004     4 */
				struct {
					u32    offset;   /* 30008     4 */
				} tails[2]; /* 30008     8 */
				unsigned int aged_tail_idx; /* 30016     4 */
				u64 aging_timestamp;     /* 30024     8 */
				u32 head;                /* 30032     4 */
			} oa_buffer;                     /* 29976    64 */
			/* --- cacheline 6 boundary (384 bytes) was 18 bytes ago --- */
			u32        gen7_latched_oastatus1; /* 30040     4 */
			u32        ctx_oactxctrl_offset; /* 30044     4 */
			u32        ctx_flexeu0_offset;   /* 30048     4 */
			u32        gen8_valid_ctx_bit;   /* 30052     4 */
			struct i915_oa_ops ops;          /* 30056    80 */
			/* --- cacheline 7 boundary (448 bytes) was 50 bytes ago --- */
			const struct i915_oa_format  * oa_formats; /* 30136     8 */
		} oa;                                    /* 29624   520 */
		/* --- cacheline 10 boundary (640 bytes) was 1 bytes ago --- */
	} perf;                                          /* 29496   648 */
	/* --- cacheline 471 boundary (30144 bytes) --- */
	struct {
		void               (*resume)(struct drm_i915_private *); /* 30144     8 */
		void               (*cleanup_engine)(struct intel_engine_cs *); /* 30152     8 */
		struct list_head   timelines;            /* 30160    16 */
		struct i915_gem_timeline global_timeline; /* 30176   992 */
		/* --- cacheline 16 boundary (1024 bytes) --- */
		u32                active_requests;      /* 31168     4 */
		bool               awake;                /* 31172     1 */
		unsigned int       epoch;                /* 31176     4 */
		struct delayed_work retire_work;         /* 31184    88 */
		/* --- cacheline 17 boundary (1088 bytes) was 33 bytes ago --- */
		struct delayed_work idle_work;           /* 31272    88 */
		/* --- cacheline 18 boundary (1152 bytes) was 57 bytes ago --- */
		ktime_t            last_init_time;       /* 31360     8 */
		/* --- cacheline 19 boundary (1216 bytes) was 1 bytes ago --- */
	} gt;                                            /* 30144  1224 */
	/* --- cacheline 490 boundary (31360 bytes) was 8 bytes ago --- */
	bool                       chv_phy_assert[2];    /* 31368     2 */
	bool                       ipc_enabled;          /* 31370     1 */

	/* XXX 5 bytes hole, try to pack */

	struct intel_encoder *     av_enc_map[3];        /* 31376    24 */
	struct {
		struct platform_device * platdev;        /* 31400     8 */
		int                irq;                  /* 31408     4 */
	} lpe_audio;                                     /* 31400    16 */
	struct i915_pmu            pmu;                  /* 31416   496 */
	/* --- cacheline 498 boundary (31872 bytes) was 40 bytes ago --- */

	/* size: 31912, cachelines: 499, members: 135 */
	/* sum members: 31844, holes: 16, sum holes: 68 */
	/* last cacheline: 40 bytes */
};

Lucas De Marchi


More information about the Intel-gfx mailing list